From 7cad8e4e0d77078991e62d2df33c21b59483ed32 Mon Sep 17 00:00:00 2001 From: jm12138 <2286040843@qq.com> Date: Mon, 28 Dec 2020 19:53:18 +0800 Subject: [PATCH] Add a depth estimation module MiDaS (#1148) Add a depth estimation module MiDaS --- .../depth_estimation/MiDaS_Large/README.md | 73 +++++++ .../depth_estimation/MiDaS_Large/inference.py | 141 ++++++++++++++ .../depth_estimation/MiDaS_Large/module.py | 134 +++++++++++++ .../MiDaS_Large/transforms.py | 184 ++++++++++++++++++ .../depth_estimation/MiDaS_Large/utils.py | 87 +++++++++ .../depth_estimation/MiDaS_Small/README.md | 73 +++++++ .../depth_estimation/MiDaS_Small/inference.py | 141 ++++++++++++++ .../depth_estimation/MiDaS_Small/module.py | 134 +++++++++++++ .../MiDaS_Small/transforms.py | 184 ++++++++++++++++++ .../depth_estimation/MiDaS_Small/utils.py | 87 +++++++++ 10 files changed, 1238 insertions(+) create mode 100644 modules/image/depth_estimation/MiDaS_Large/README.md create mode 100644 modules/image/depth_estimation/MiDaS_Large/inference.py create mode 100644 modules/image/depth_estimation/MiDaS_Large/module.py create mode 100644 modules/image/depth_estimation/MiDaS_Large/transforms.py create mode 100644 modules/image/depth_estimation/MiDaS_Large/utils.py create mode 100644 modules/image/depth_estimation/MiDaS_Small/README.md create mode 100644 modules/image/depth_estimation/MiDaS_Small/inference.py create mode 100644 modules/image/depth_estimation/MiDaS_Small/module.py create mode 100644 modules/image/depth_estimation/MiDaS_Small/transforms.py create mode 100644 modules/image/depth_estimation/MiDaS_Small/utils.py diff --git a/modules/image/depth_estimation/MiDaS_Large/README.md b/modules/image/depth_estimation/MiDaS_Large/README.md new file mode 100644 index 00000000..319c8a53 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/README.md @@ -0,0 +1,73 @@ +## 模型概述 +MiDas v2.1 large 单目深度估计模型 + +模型可通过输入图像估计其中的深度信息 + +模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目 + + +## 模型安装 + +```shell +$hub install MiDaS_Large +``` + +## 效果展示 +![效果展示](https://img-blog.csdnimg.cn/20201227112600975.jpg) + +## API 说明 + +```python +def depth_estimation( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +深度估计API + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + + +**返回** + +* res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='MiDaS_Large', use_gpu=False) + +# 模型预测 +result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 模型相关信息 + +### 模型代码 + +https://github.com/intel-isl/MiDaS + +### 依赖 + +paddlepaddle >= 2.0.0rc0 + +paddlehub >= 2.0.0b1 diff --git a/modules/image/depth_estimation/MiDaS_Large/inference.py b/modules/image/depth_estimation/MiDaS_Large/inference.py new file mode 100644 index 00000000..6ebe297a --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/inference.py @@ -0,0 +1,141 @@ +import os +import numpy as np + +from paddle.inference import create_predictor, Config + +__all__ = ['InferenceModel'] + +class InferenceModel(): + # 初始化函数 + def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True): + ''' + init the inference model + + modelpath: inference model path + + use_gpu: use gpu or not + + use_mkldnn: use mkldnn or not + + combined: inference model format is combined or not + ''' + # 加载模型配置 + self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined) + + # 打印函数 + def __repr__(self): + ''' + get the numbers and name of inputs and outputs + ''' + return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % ( + len(self.input_handles), + str(self.input_names), + len(self.output_handles), + str(self.output_names) + ) + + # 类调用函数 + def __call__(self, *input_datas, batch_size=1): + ''' + call function + ''' + return self.forward(*input_datas, batch_size=batch_size) + + # 模型参数加载函数 + def load_config(self, modelpath, use_gpu, use_mkldnn, combined): + ''' + load the model config + + modelpath: inference model path + + use_gpu: use gpu or not + + use_mkldnn: use mkldnn or not + + combined: inference model format is combined or not + ''' + # 对运行位置进行配置 + if use_gpu: + try: + int(os.environ.get('CUDA_VISIBLE_DEVICES')) + except Exception: + print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.') + use_gpu = False + + # 加载模型参数 + if combined: + model = os.path.join(modelpath, "__model__") + params = os.path.join(modelpath, "__params__") + config = Config(model, params) + else: + config = Config(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, 0) + else: + config.disable_gpu() + if use_mkldnn: + config.enable_mkldnn() + + # 返回配置 + return config + + # 预测器创建函数 + def eval(self): + ''' + create the model predictor by model config + ''' + # 创建预测器 + self.predictor = create_predictor(self.config) + + # 获取模型的输入输出名称 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + + # 获取输入 + self.input_handles = [] + for input_name in self.input_names: + self.input_handles.append(self.predictor.get_input_handle(input_name)) + + # 获取输出 + self.output_handles = [] + for output_name in self.output_names: + self.output_handles.append(self.predictor.get_output_handle(output_name)) + + # 前向计算函数 + def forward(self, *input_datas, batch_size=1): + """ + model inference + + batch_size: batch size + + *input_datas: x1, x2, ..., xn + """ + # 切分输入数据 + datas_num = input_datas[0].shape[0] + split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size + input_datas = [np.array_split(input_data, split_num) for input_data in input_datas] + + # 遍历输入数据进行预测 + outputs = {} + for step in range(split_num): + for i in range(len(self.input_handles)): + input_data = input_datas[i][step].copy() + self.input_handles[i].copy_from_cpu(input_data) + + self.predictor.run() + + for i in range(len(self.output_handles)): + output = self.output_handles[i].copy_to_cpu() + if i in outputs: + outputs[i].append(output) + else: + outputs[i] = [output] + + # 预测结果合并 + for key in outputs.keys(): + outputs[key] = np.concatenate(outputs[key], 0) + + # 返回预测结果 + return outputs \ No newline at end of file diff --git a/modules/image/depth_estimation/MiDaS_Large/module.py b/modules/image/depth_estimation/MiDaS_Large/module.py new file mode 100644 index 00000000..3250c419 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/module.py @@ -0,0 +1,134 @@ +import os +import cv2 +import numpy as np + +from paddlehub import Module +from paddlehub.module.module import moduleinfo + +from paddle.vision.transforms import Compose +from MiDaS_Large.utils import write_depth +from MiDaS_Large.inference import InferenceModel +from MiDaS_Large.transforms import Resize, NormalizeImage, PrepareForNet + + +@moduleinfo( + name="MiDaS_Large", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="MiDaS_Large", # 模型介绍 + version="1.0.0" # 版本号 +) +class MiDaS_Large(Module): + # 初始化函数 + def __init__(self, name=None, directory=None, use_gpu=False): + # 设置模型路径 + model_path = os.path.join(self.directory, "model-f6b98070") + + # 加载模型 + self.model = InferenceModel( + modelpath=model_path, + use_gpu=use_gpu, + use_mkldnn=False, + combined=True + ) + self.model.eval() + + # 数据预处理配置 + self.net_h, self.net_w = 384, 384 + self.transform = Compose([ + Resize( + self.net_w, + self.net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + PrepareForNet() + ]) + + # 数据读取函数 + @staticmethod + def load_datas(paths, images): + datas = [] + + # 读取数据列表 + if paths is not None: + for im_path in paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if images is not None: + datas = images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self, datas): + input_datas = [] + + for img in datas: + # 归一化 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + # 图像变换 + img = self.transform({"image": img})["image"] + + # 新增维度 + input_data = img[np.newaxis, ...] + + input_datas.append(input_data) + + # 拼接数据 + input_datas = np.concatenate(input_datas, 0) + + return input_datas + + # 数据后处理函数 + @staticmethod + def postprocess(datas, results, output_dir='output', visualization=False): + # 检查输出目录 + if visualization: + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + outputs = [] + + for img, result, count in zip(datas, results, range(len(datas))): + # 缩放回原尺寸 + output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # 可视化输出 + if visualization: + pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2) + + outputs.append(output) + + return outputs + + # 深度估计函数 + def depth_estimation(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + # 加载数据 + datas = self.load_datas(paths, images) + + # 数据预处理 + input_datas = self.preprocess(datas) + + # 模型预测 + results = self.model(input_datas, batch_size=batch_size)[0] + + # 结果后处理 + outputs = self.postprocess(datas, results, output_dir, visualization) + + return outputs \ No newline at end of file diff --git a/modules/image/depth_estimation/MiDaS_Large/transforms.py b/modules/image/depth_estimation/MiDaS_Large/transforms.py new file mode 100644 index 00000000..189dfce1 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/transforms.py @@ -0,0 +1,184 @@ +# Refer https://github.com/intel-isl/MiDaS + +import numpy as np +import cv2 + + +class Resize(object): + """Resize sample to given size (width, height). + """ + def __init__(self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * + self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * + self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, + min_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, + min_val=self.__width) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, + max_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, + max_val=self.__width) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size(sample["image"].shape[1], + sample["image"].shape[0]) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize(sample["depth"], (width, height), + interpolation=cv2.INTER_NEAREST) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/modules/image/depth_estimation/MiDaS_Large/utils.py b/modules/image/depth_estimation/MiDaS_Large/utils.py new file mode 100644 index 00000000..2a45c7b6 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Large/utils.py @@ -0,0 +1,87 @@ +# Refer https://github.com/intel-isl/MiDaS +"""Utils for monoDepth. +""" +import sys +import numpy as np +import cv2 + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif (len(image.shape) == 2 + or len(image.shape) == 3 and image.shape[2] == 1): # greyscale + color = False + else: + raise Exception( + "Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + return img + + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8 * bits)) - 1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.type) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + return path + '.pfm', path + ".png" diff --git a/modules/image/depth_estimation/MiDaS_Small/README.md b/modules/image/depth_estimation/MiDaS_Small/README.md new file mode 100644 index 00000000..886f430d --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Small/README.md @@ -0,0 +1,73 @@ +## 模型概述 +MiDas v2.1 small 单目深度估计模型 + +模型可通过输入图像估计其中的深度信息 + +模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目 + + +## 模型安装 + +```shell +$hub install MiDaS_Small +``` + +## 效果展示 +![效果展示](https://img-blog.csdnimg.cn/20201227112553903.jpg) + +## API 说明 + +```python +def depth_estimation( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False +) +``` + +深度估计API + +**参数** + +* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; +* paths (list\[str\]): 图片的路径,默认为 None; +* batch\_size (int): batch 的大小,默认设为 1; +* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; +* output\_dir (str): 图片的保存路径,默认设为 output。 + + +**返回** + +* res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\]。 + + +## 预测代码示例 + +```python +import cv2 +import paddlehub as hub + +# 模型加载 +# use_gpu:是否使用GPU进行预测 +model = hub.Module(name='MiDaS_Small', use_gpu=False) + +# 模型预测 +result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')]) + +# or +# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) +``` + +## 模型相关信息 + +### 模型代码 + +https://github.com/intel-isl/MiDaS + +### 依赖 + +paddlepaddle >= 2.0.0rc0 + +paddlehub >= 2.0.0b1 diff --git a/modules/image/depth_estimation/MiDaS_Small/inference.py b/modules/image/depth_estimation/MiDaS_Small/inference.py new file mode 100644 index 00000000..6ebe297a --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Small/inference.py @@ -0,0 +1,141 @@ +import os +import numpy as np + +from paddle.inference import create_predictor, Config + +__all__ = ['InferenceModel'] + +class InferenceModel(): + # 初始化函数 + def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True): + ''' + init the inference model + + modelpath: inference model path + + use_gpu: use gpu or not + + use_mkldnn: use mkldnn or not + + combined: inference model format is combined or not + ''' + # 加载模型配置 + self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined) + + # 打印函数 + def __repr__(self): + ''' + get the numbers and name of inputs and outputs + ''' + return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % ( + len(self.input_handles), + str(self.input_names), + len(self.output_handles), + str(self.output_names) + ) + + # 类调用函数 + def __call__(self, *input_datas, batch_size=1): + ''' + call function + ''' + return self.forward(*input_datas, batch_size=batch_size) + + # 模型参数加载函数 + def load_config(self, modelpath, use_gpu, use_mkldnn, combined): + ''' + load the model config + + modelpath: inference model path + + use_gpu: use gpu or not + + use_mkldnn: use mkldnn or not + + combined: inference model format is combined or not + ''' + # 对运行位置进行配置 + if use_gpu: + try: + int(os.environ.get('CUDA_VISIBLE_DEVICES')) + except Exception: + print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.') + use_gpu = False + + # 加载模型参数 + if combined: + model = os.path.join(modelpath, "__model__") + params = os.path.join(modelpath, "__params__") + config = Config(model, params) + else: + config = Config(modelpath) + + # 设置参数 + if use_gpu: + config.enable_use_gpu(100, 0) + else: + config.disable_gpu() + if use_mkldnn: + config.enable_mkldnn() + + # 返回配置 + return config + + # 预测器创建函数 + def eval(self): + ''' + create the model predictor by model config + ''' + # 创建预测器 + self.predictor = create_predictor(self.config) + + # 获取模型的输入输出名称 + self.input_names = self.predictor.get_input_names() + self.output_names = self.predictor.get_output_names() + + # 获取输入 + self.input_handles = [] + for input_name in self.input_names: + self.input_handles.append(self.predictor.get_input_handle(input_name)) + + # 获取输出 + self.output_handles = [] + for output_name in self.output_names: + self.output_handles.append(self.predictor.get_output_handle(output_name)) + + # 前向计算函数 + def forward(self, *input_datas, batch_size=1): + """ + model inference + + batch_size: batch size + + *input_datas: x1, x2, ..., xn + """ + # 切分输入数据 + datas_num = input_datas[0].shape[0] + split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size + input_datas = [np.array_split(input_data, split_num) for input_data in input_datas] + + # 遍历输入数据进行预测 + outputs = {} + for step in range(split_num): + for i in range(len(self.input_handles)): + input_data = input_datas[i][step].copy() + self.input_handles[i].copy_from_cpu(input_data) + + self.predictor.run() + + for i in range(len(self.output_handles)): + output = self.output_handles[i].copy_to_cpu() + if i in outputs: + outputs[i].append(output) + else: + outputs[i] = [output] + + # 预测结果合并 + for key in outputs.keys(): + outputs[key] = np.concatenate(outputs[key], 0) + + # 返回预测结果 + return outputs \ No newline at end of file diff --git a/modules/image/depth_estimation/MiDaS_Small/module.py b/modules/image/depth_estimation/MiDaS_Small/module.py new file mode 100644 index 00000000..1abd6fe5 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Small/module.py @@ -0,0 +1,134 @@ +import os +import cv2 +import numpy as np + +from paddlehub import Module +from paddlehub.module.module import moduleinfo + +from paddle.vision.transforms import Compose +from MiDaS_Small.utils import write_depth +from MiDaS_Small.inference import InferenceModel +from MiDaS_Small.transforms import Resize, NormalizeImage, PrepareForNet + + +@moduleinfo( + name="MiDaS_Small", # 模型名称 + type="CV/style_transfer", # 模型类型 + author="jm12138", # 作者名称 + author_email="jm12138@qq.com", # 作者邮箱 + summary="MiDaS_Small", # 模型介绍 + version="1.0.0" # 版本号 +) +class MiDaS_Small(Module): + # 初始化函数 + def __init__(self, name=None, directory=None, use_gpu=False): + # 设置模型路径 + model_path = os.path.join(self.directory, "model-small") + + # 加载模型 + self.model = InferenceModel( + modelpath=model_path, + use_gpu=use_gpu, + use_mkldnn=False, + combined=True + ) + self.model.eval() + + # 数据预处理配置 + self.net_h, self.net_w = 256, 256 + self.transform = Compose([ + Resize( + self.net_w, + self.net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]), + PrepareForNet() + ]) + + # 数据读取函数 + @staticmethod + def load_datas(paths, images): + datas = [] + + # 读取数据列表 + if paths is not None: + for im_path in paths: + assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path) + im = cv2.imread(im_path) + datas.append(im) + + if images is not None: + datas = images + + # 返回数据列表 + return datas + + # 数据预处理函数 + def preprocess(self, datas): + input_datas = [] + + for img in datas: + # 归一化 + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + # 图像变换 + img = self.transform({"image": img})["image"] + + # 新增维度 + input_data = img[np.newaxis, ...] + + input_datas.append(input_data) + + # 拼接数据 + input_datas = np.concatenate(input_datas, 0) + + return input_datas + + # 数据后处理函数 + @staticmethod + def postprocess(datas, results, output_dir='output', visualization=False): + # 检查输出目录 + if visualization: + if not os.path.exists(output_dir): + os.mkdir(output_dir) + + outputs = [] + + for img, result, count in zip(datas, results, range(len(datas))): + # 缩放回原尺寸 + output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # 可视化输出 + if visualization: + pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2) + + outputs.append(output) + + return outputs + + # 深度估计函数 + def depth_estimation(self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + # 加载数据 + datas = self.load_datas(paths, images) + + # 数据预处理 + input_datas = self.preprocess(datas) + + # 模型预测 + results = self.model(input_datas, batch_size=batch_size)[0] + + # 结果后处理 + outputs = self.postprocess(datas, results, output_dir, visualization) + + return outputs \ No newline at end of file diff --git a/modules/image/depth_estimation/MiDaS_Small/transforms.py b/modules/image/depth_estimation/MiDaS_Small/transforms.py new file mode 100644 index 00000000..189dfce1 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Small/transforms.py @@ -0,0 +1,184 @@ +# Refer https://github.com/intel-isl/MiDaS + +import numpy as np +import cv2 + + +class Resize(object): + """Resize sample to given size (width, height). + """ + def __init__(self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * + self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * + self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, + min_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, + min_val=self.__width) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of(scale_height * height, + max_val=self.__height) + new_width = self.constrain_to_multiple_of(scale_width * width, + max_val=self.__width) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size(sample["image"].shape[1], + sample["image"].shape[0]) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize(sample["depth"], (width, height), + interpolation=cv2.INTER_NEAREST) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/modules/image/depth_estimation/MiDaS_Small/utils.py b/modules/image/depth_estimation/MiDaS_Small/utils.py new file mode 100644 index 00000000..2a45c7b6 --- /dev/null +++ b/modules/image/depth_estimation/MiDaS_Small/utils.py @@ -0,0 +1,87 @@ +# Refer https://github.com/intel-isl/MiDaS +"""Utils for monoDepth. +""" +import sys +import numpy as np +import cv2 + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif (len(image.shape) == 2 + or len(image.shape) == 3 and image.shape[2] == 1): # greyscale + color = False + else: + raise Exception( + "Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + return img + + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8 * bits)) - 1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.type) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + return path + '.pfm', path + ".png" -- GitLab