Add a depth estimation module MiDaS (#1148)

Add a depth estimation module MiDaS

Add a depth estimation module MiDaS (#1148)
Add a depth estimation module MiDaS
7cad8e4e · jm_12138 · GitHub · 32df7f6e · 7cad8e4e · 7cad8e4e
10 changed file
--- a/modules/image/depth_estimation/MiDaS_Large/README.md
+++ b/modules/image/depth_estimation/MiDaS_Large/README.md
+## 模型概述
+MiDas v2.1 large 单目深度估计模型
+
+模型可通过输入图像估计其中的深度信息
+
+模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目
+
+
+## 模型安装
+
+```shell
+$hub install MiDaS_Large
+```
+
+## 效果展示
+![效果展示](https://img-blog.csdnimg.cn/20201227112600975.jpg)
+
+## API 说明
+
+```python
+def depth_estimation(
+    images=None,
+    paths=None,
+    batch_size=1,
+    output_dir='output',
+    visualization=False
+)
+```
+
+深度估计API
+
+**参数**
+
+* images (list\[numpy.ndarray\]): 图片数据，ndarray.shape 为 \[H, W, C\]，默认为 None；
+* paths (list\[str\]): 图片的路径，默认为 None；
+* batch\_size (int): batch 的大小，默认设为 1；
+* visualization (bool): 是否将识别结果保存为图片文件，默认设为 False；
+* output\_dir (str): 图片的保存路径，默认设为 output。
+
+
+**返回**
+
+* res (list\[numpy.ndarray\]): 图像深度数据，ndarray.shape 为 \[H, W\]。
+
+
+## 预测代码示例
+
+```python
+import cv2
+import paddlehub as hub
+
+# 模型加载
+# use_gpu：是否使用GPU进行预测
+model = hub.Module(name='MiDaS_Large', use_gpu=False)
+
+# 模型预测
+result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')])
+
+# or
+# result = model.style_transfer(paths=['/PATH/TO/IMAGE'])
+```
+
+## 模型相关信息
+
+### 模型代码
+
+https://github.com/intel-isl/MiDaS
+
+### 依赖
+
+paddlepaddle >= 2.0.0rc0
+
+paddlehub >= 2.0.0b1
--- a/modules/image/depth_estimation/MiDaS_Large/inference.py
+++ b/modules/image/depth_estimation/MiDaS_Large/inference.py
+import os
+import numpy as np
+
+from paddle.inference import create_predictor, Config
+
+__all__ = ['InferenceModel']
+
+class InferenceModel():
+    # 初始化函数
+    def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True):
+        '''
+        init the inference model
+
+        modelpath: inference model path
+
+        use_gpu: use gpu or not
+
+        use_mkldnn: use mkldnn or not
+
+        combined: inference model format is combined or not
+        '''
+        # 加载模型配置
+        self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined)
+
+    # 打印函数
+    def __repr__(self):
+        '''
+        get the numbers and name of inputs and outputs 
+        '''
+        return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % (
+            len(self.input_handles), 
+            str(self.input_names), 
+            len(self.output_handles), 
+            str(self.output_names)
+        )
+
+    # 类调用函数
+    def __call__(self, *input_datas, batch_size=1):
+        '''
+        call function
+        '''
+        return self.forward(*input_datas, batch_size=batch_size)
+
+    # 模型参数加载函数
+    def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
+        '''
+        load the model config
+
+        modelpath: inference model path
+
+        use_gpu: use gpu or not
+
+        use_mkldnn: use mkldnn or not
+
+        combined: inference model format is combined or not
+        '''
+        # 对运行位置进行配置
+        if use_gpu:
+            try:
+                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
+            except Exception:
+                print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.')
+                use_gpu = False
+                
+        # 加载模型参数
+        if combined:
+            model = os.path.join(modelpath, "__model__")
+            params = os.path.join(modelpath, "__params__")
+            config = Config(model, params)
+        else:
+            config = Config(modelpath)
+
+        # 设置参数
+        if use_gpu:   
+            config.enable_use_gpu(100, 0)
+        else:
+            config.disable_gpu()
+            if use_mkldnn:
+                config.enable_mkldnn()
+
+        # 返回配置
+        return config
+
+    # 预测器创建函数
+    def eval(self):
+        '''
+        create the model predictor by model config
+        '''
+        # 创建预测器
+        self.predictor = create_predictor(self.config)
+
+        # 获取模型的输入输出名称
+        self.input_names = self.predictor.get_input_names()
+        self.output_names = self.predictor.get_output_names()
+
+        # 获取输入
+        self.input_handles = []
+        for input_name in self.input_names:
+            self.input_handles.append(self.predictor.get_input_handle(input_name))
+
+        # 获取输出
+        self.output_handles = []
+        for output_name in self.output_names:
+            self.output_handles.append(self.predictor.get_output_handle(output_name))
+
+    # 前向计算函数
+    def forward(self, *input_datas, batch_size=1):
+        """
+        model inference
+
+        batch_size: batch size
+
+        *input_datas: x1, x2, ..., xn
+        """
+        # 切分输入数据
+        datas_num = input_datas[0].shape[0]
+        split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size
+        input_datas = [np.array_split(input_data, split_num) for input_data in input_datas]
+
+        # 遍历输入数据进行预测
+        outputs = {}
+        for step in range(split_num):
+            for i in range(len(self.input_handles)):
+                input_data = input_datas[i][step].copy()
+                self.input_handles[i].copy_from_cpu(input_data)
+
+            self.predictor.run()
+            
+            for i in range(len(self.output_handles)):
+                output = self.output_handles[i].copy_to_cpu()
+                if i in outputs:
+                    outputs[i].append(output)
+                else:
+                    outputs[i] = [output]
+        
+        # 预测结果合并
+        for key in outputs.keys():
+            outputs[key] = np.concatenate(outputs[key], 0)
+
+        # 返回预测结果
+        return outputs
\ No newline at end of file
--- a/modules/image/depth_estimation/MiDaS_Large/module.py
+++ b/modules/image/depth_estimation/MiDaS_Large/module.py
+import os
+import cv2
+import numpy as np
+
+from paddlehub import Module
+from paddlehub.module.module import moduleinfo
+
+from paddle.vision.transforms import Compose
+from MiDaS_Large.utils import write_depth
+from MiDaS_Large.inference import InferenceModel
+from MiDaS_Large.transforms import Resize, NormalizeImage, PrepareForNet
+
+
+@moduleinfo(
+    name="MiDaS_Large",  # 模型名称
+    type="CV/style_transfer",  # 模型类型
+    author="jm12138",  # 作者名称
+    author_email="jm12138@qq.com",  # 作者邮箱
+    summary="MiDaS_Large",  # 模型介绍
+    version="1.0.0"  # 版本号
+)
+class MiDaS_Large(Module):
+    # 初始化函数
+    def __init__(self, name=None, directory=None, use_gpu=False):
+        # 设置模型路径
+        model_path = os.path.join(self.directory, "model-f6b98070")
+
+        # 加载模型
+        self.model = InferenceModel(
+            modelpath=model_path, 
+            use_gpu=use_gpu, 
+            use_mkldnn=False, 
+            combined=True
+        )
+        self.model.eval()
+
+        # 数据预处理配置
+        self.net_h, self.net_w = 384, 384
+        self.transform = Compose([
+            Resize(
+                self.net_w,
+                self.net_h,
+                resize_target=None,
+                keep_aspect_ratio=False,
+                ensure_multiple_of=32,
+                resize_method="upper_bound",
+                image_interpolation_method=cv2.INTER_CUBIC,
+            ),
+            NormalizeImage(mean=[0.485, 0.456, 0.406],
+                           std=[0.229, 0.224, 0.225]),
+            PrepareForNet()
+        ])
+    
+    # 数据读取函数
+    @staticmethod
+    def load_datas(paths, images):
+        datas = []
+
+        # 读取数据列表
+        if paths is not None:
+            for im_path in paths:
+                assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path)
+                im = cv2.imread(im_path)
+                datas.append(im)
+
+        if images is not None:
+            datas = images
+
+        # 返回数据列表
+        return datas
+
+    # 数据预处理函数
+    def preprocess(self, datas):
+        input_datas = []
+
+        for img in datas:
+            # 归一化
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
+
+            # 图像变换
+            img = self.transform({"image": img})["image"]
+
+            # 新增维度
+            input_data = img[np.newaxis, ...]
+
+            input_datas.append(input_data)
+        
+        # 拼接数据
+        input_datas = np.concatenate(input_datas, 0)
+
+        return input_datas
+
+    # 数据后处理函数
+    @staticmethod
+    def postprocess(datas, results, output_dir='output', visualization=False):
+        # 检查输出目录
+        if visualization:
+            if not os.path.exists(output_dir):
+                os.mkdir(output_dir)
+
+        outputs = []
+
+        for img, result, count in zip(datas, results, range(len(datas))):
+            # 缩放回原尺寸
+            output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
+
+            # 可视化输出
+            if visualization:
+                pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2)
+
+            outputs.append(output)
+
+        return outputs
+
+    # 深度估计函数
+    def depth_estimation(self,
+                       images=None,
+                       paths=None,
+                       batch_size=1,
+                       output_dir='output',
+                       visualization=False):
+        # 加载数据
+        datas = self.load_datas(paths, images)
+
+        # 数据预处理
+        input_datas = self.preprocess(datas)
+
+        # 模型预测
+        results = self.model(input_datas, batch_size=batch_size)[0]
+
+        # 结果后处理
+        outputs = self.postprocess(datas, results, output_dir, visualization)
+        
+        return outputs
\ No newline at end of file
--- a/modules/image/depth_estimation/MiDaS_Large/transforms.py
+++ b/modules/image/depth_estimation/MiDaS_Large/transforms.py
+# Refer https://github.com/intel-isl/MiDaS
+
+import numpy as np
+import cv2
+
+
+class Resize(object):
+    """Resize sample to given size (width, height).
+    """
+    def __init__(self,
+                 width,
+                 height,
+                 resize_target=True,
+                 keep_aspect_ratio=False,
+                 ensure_multiple_of=1,
+                 resize_method="lower_bound",
+                 image_interpolation_method=cv2.INTER_AREA):
+        """Init.
+
+        Args:
+            width (int): desired output width
+            height (int): desired output height
+            resize_target (bool, optional):
+                True: Resize the full sample (image, mask, target).
+                False: Resize image only.
+                Defaults to True.
+            keep_aspect_ratio (bool, optional):
+                True: Keep the aspect ratio of the input sample.
+                Output sample might not have the given width and height, and
+                resize behaviour depends on the parameter 'resize_method'.
+                Defaults to False.
+            ensure_multiple_of (int, optional):
+                Output width and height is constrained to be multiple of this parameter.
+                Defaults to 1.
+            resize_method (str, optional):
+                "lower_bound": Output will be at least as large as the given size.
+                "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
+                "minimal": Scale as least as possible.  (Output size might be smaller than given size.)
+                Defaults to "lower_bound".
+        """
+        self.__width = width
+        self.__height = height
+
+        self.__resize_target = resize_target
+        self.__keep_aspect_ratio = keep_aspect_ratio
+        self.__multiple_of = ensure_multiple_of
+        self.__resize_method = resize_method
+        self.__image_interpolation_method = image_interpolation_method
+
+    def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
+        y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
+
+        if max_val is not None and y > max_val:
+            y = (np.floor(x / self.__multiple_of) *
+                 self.__multiple_of).astype(int)
+
+        if y < min_val:
+            y = (np.ceil(x / self.__multiple_of) *
+                 self.__multiple_of).astype(int)
+
+        return y
+
+    def get_size(self, width, height):
+        # determine new height and width
+        scale_height = self.__height / height
+        scale_width = self.__width / width
+
+        if self.__keep_aspect_ratio:
+            if self.__resize_method == "lower_bound":
+                # scale such that output size is lower bound
+                if scale_width > scale_height:
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            elif self.__resize_method == "upper_bound":
+                # scale such that output size is upper bound
+                if scale_width < scale_height:
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            elif self.__resize_method == "minimal":
+                # scale as least as possbile
+                if abs(1 - scale_width) < abs(1 - scale_height):
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            else:
+                raise ValueError(
+                    f"resize_method {self.__resize_method} not implemented")
+
+        if self.__resize_method == "lower_bound":
+            new_height = self.constrain_to_multiple_of(scale_height * height,
+                                                       min_val=self.__height)
+            new_width = self.constrain_to_multiple_of(scale_width * width,
+                                                      min_val=self.__width)
+        elif self.__resize_method == "upper_bound":
+            new_height = self.constrain_to_multiple_of(scale_height * height,
+                                                       max_val=self.__height)
+            new_width = self.constrain_to_multiple_of(scale_width * width,
+                                                      max_val=self.__width)
+        elif self.__resize_method == "minimal":
+            new_height = self.constrain_to_multiple_of(scale_height * height)
+            new_width = self.constrain_to_multiple_of(scale_width * width)
+        else:
+            raise ValueError(
+                f"resize_method {self.__resize_method} not implemented")
+
+        return (new_width, new_height)
+
+    def __call__(self, sample):
+        width, height = self.get_size(sample["image"].shape[1],
+                                      sample["image"].shape[0])
+
+        # resize sample
+        sample["image"] = cv2.resize(
+            sample["image"],
+            (width, height),
+            interpolation=self.__image_interpolation_method,
+        )
+
+        if self.__resize_target:
+            if "disparity" in sample:
+                sample["disparity"] = cv2.resize(
+                    sample["disparity"],
+                    (width, height),
+                    interpolation=cv2.INTER_NEAREST,
+                )
+
+            if "depth" in sample:
+                sample["depth"] = cv2.resize(sample["depth"], (width, height),
+                                             interpolation=cv2.INTER_NEAREST)
+
+            sample["mask"] = cv2.resize(
+                sample["mask"].astype(np.float32),
+                (width, height),
+                interpolation=cv2.INTER_NEAREST,
+            )
+            sample["mask"] = sample["mask"].astype(bool)
+
+        return sample
+
+
+class NormalizeImage(object):
+    """Normlize image by given mean and std.
+    """
+    def __init__(self, mean, std):
+        self.__mean = mean
+        self.__std = std
+
+    def __call__(self, sample):
+        sample["image"] = (sample["image"] - self.__mean) / self.__std
+
+        return sample
+
+
+class PrepareForNet(object):
+    """Prepare sample for usage as network input.
+    """
+    def __init__(self):
+        pass
+
+    def __call__(self, sample):
+        image = np.transpose(sample["image"], (2, 0, 1))
+        sample["image"] = np.ascontiguousarray(image).astype(np.float32)
+
+        if "mask" in sample:
+            sample["mask"] = sample["mask"].astype(np.float32)
+            sample["mask"] = np.ascontiguousarray(sample["mask"])
+
+        if "disparity" in sample:
+            disparity = sample["disparity"].astype(np.float32)
+            sample["disparity"] = np.ascontiguousarray(disparity)
+
+        if "depth" in sample:
+            depth = sample["depth"].astype(np.float32)
+            sample["depth"] = np.ascontiguousarray(depth)
+
+        return sample
--- a/modules/image/depth_estimation/MiDaS_Large/utils.py
+++ b/modules/image/depth_estimation/MiDaS_Large/utils.py
+# Refer https://github.com/intel-isl/MiDaS
+"""Utils for monoDepth.
+"""
+import sys
+import numpy as np
+import cv2
+
+
+def write_pfm(path, image, scale=1):
+    """Write pfm file.
+
+    Args:
+        path (str): pathto file
+        image (array): data
+        scale (int, optional): Scale. Defaults to 1.
+    """
+
+    with open(path, "wb") as file:
+        color = None
+
+        if image.dtype.name != "float32":
+            raise Exception("Image dtype must be float32.")
+
+        image = np.flipud(image)
+
+        if len(image.shape) == 3 and image.shape[2] == 3:  # color image
+            color = True
+        elif (len(image.shape) == 2
+              or len(image.shape) == 3 and image.shape[2] == 1):  # greyscale
+            color = False
+        else:
+            raise Exception(
+                "Image must have H x W x 3, H x W x 1 or H x W dimensions.")
+
+        file.write("PF\n" if color else "Pf\n".encode())
+        file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
+
+        endian = image.dtype.byteorder
+
+        if endian == "<" or endian == "=" and sys.byteorder == "little":
+            scale = -scale
+
+        file.write("%f\n".encode() % scale)
+
+        image.tofile(file)
+
+
+def read_image(path):
+    """Read image and output RGB image (0-1).
+
+    Args:
+        path (str): path to file
+
+    Returns:
+        array: RGB image (0-1)
+    """
+    img = cv2.imread(path)
+    if img.ndim == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
+    return img
+
+
+def write_depth(path, depth, bits=1):
+    """Write depth map to pfm and png file.
+
+    Args:
+        path (str): filepath without extension
+        depth (array): depth
+    """
+    write_pfm(path + ".pfm", depth.astype(np.float32))
+
+    depth_min = depth.min()
+    depth_max = depth.max()
+
+    max_val = (2**(8 * bits)) - 1
+
+    if depth_max - depth_min > np.finfo("float").eps:
+        out = max_val * (depth - depth_min) / (depth_max - depth_min)
+    else:
+        out = np.zeros(depth.shape, dtype=depth.type)
+
+    if bits == 1:
+        cv2.imwrite(path + ".png", out.astype("uint8"))
+    elif bits == 2:
+        cv2.imwrite(path + ".png", out.astype("uint16"))
+    return path + '.pfm', path + ".png"
--- a/modules/image/depth_estimation/MiDaS_Small/README.md
+++ b/modules/image/depth_estimation/MiDaS_Small/README.md
+## 模型概述
+MiDas v2.1 small 单目深度估计模型
+
+模型可通过输入图像估计其中的深度信息
+
+模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目
+
+
+## 模型安装
+
+```shell
+$hub install MiDaS_Small
+```
+
+## 效果展示
+![效果展示](https://img-blog.csdnimg.cn/20201227112553903.jpg)
+
+## API 说明
+
+```python
+def depth_estimation(
+    images=None,
+    paths=None,
+    batch_size=1,
+    output_dir='output',
+    visualization=False
+)
+```
+
+深度估计API
+
+**参数**
+
+* images (list\[numpy.ndarray\]): 图片数据，ndarray.shape 为 \[H, W, C\]，默认为 None；
+* paths (list\[str\]): 图片的路径，默认为 None；
+* batch\_size (int): batch 的大小，默认设为 1；
+* visualization (bool): 是否将识别结果保存为图片文件，默认设为 False；
+* output\_dir (str): 图片的保存路径，默认设为 output。
+
+
+**返回**
+
+* res (list\[numpy.ndarray\]): 图像深度数据，ndarray.shape 为 \[H, W\]。
+
+
+## 预测代码示例
+
+```python
+import cv2
+import paddlehub as hub
+
+# 模型加载
+# use_gpu：是否使用GPU进行预测
+model = hub.Module(name='MiDaS_Small', use_gpu=False)
+
+# 模型预测
+result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')])
+
+# or
+# result = model.style_transfer(paths=['/PATH/TO/IMAGE'])
+```
+
+## 模型相关信息
+
+### 模型代码
+
+https://github.com/intel-isl/MiDaS
+
+### 依赖
+
+paddlepaddle >= 2.0.0rc0
+
+paddlehub >= 2.0.0b1
--- a/modules/image/depth_estimation/MiDaS_Small/inference.py
+++ b/modules/image/depth_estimation/MiDaS_Small/inference.py
+import os
+import numpy as np
+
+from paddle.inference import create_predictor, Config
+
+__all__ = ['InferenceModel']
+
+class InferenceModel():
+    # 初始化函数
+    def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True):
+        '''
+        init the inference model
+
+        modelpath: inference model path
+
+        use_gpu: use gpu or not
+
+        use_mkldnn: use mkldnn or not
+
+        combined: inference model format is combined or not
+        '''
+        # 加载模型配置
+        self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined)
+
+    # 打印函数
+    def __repr__(self):
+        '''
+        get the numbers and name of inputs and outputs 
+        '''
+        return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % (
+            len(self.input_handles), 
+            str(self.input_names), 
+            len(self.output_handles), 
+            str(self.output_names)
+        )
+
+    # 类调用函数
+    def __call__(self, *input_datas, batch_size=1):
+        '''
+        call function
+        '''
+        return self.forward(*input_datas, batch_size=batch_size)
+
+    # 模型参数加载函数
+    def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
+        '''
+        load the model config
+
+        modelpath: inference model path
+
+        use_gpu: use gpu or not
+
+        use_mkldnn: use mkldnn or not
+
+        combined: inference model format is combined or not
+        '''
+        # 对运行位置进行配置
+        if use_gpu:
+            try:
+                int(os.environ.get('CUDA_VISIBLE_DEVICES'))
+            except Exception:
+                print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.')
+                use_gpu = False
+                
+        # 加载模型参数
+        if combined:
+            model = os.path.join(modelpath, "__model__")
+            params = os.path.join(modelpath, "__params__")
+            config = Config(model, params)
+        else:
+            config = Config(modelpath)
+
+        # 设置参数
+        if use_gpu:   
+            config.enable_use_gpu(100, 0)
+        else:
+            config.disable_gpu()
+            if use_mkldnn:
+                config.enable_mkldnn()
+
+        # 返回配置
+        return config
+
+    # 预测器创建函数
+    def eval(self):
+        '''
+        create the model predictor by model config
+        '''
+        # 创建预测器
+        self.predictor = create_predictor(self.config)
+
+        # 获取模型的输入输出名称
+        self.input_names = self.predictor.get_input_names()
+        self.output_names = self.predictor.get_output_names()
+
+        # 获取输入
+        self.input_handles = []
+        for input_name in self.input_names:
+            self.input_handles.append(self.predictor.get_input_handle(input_name))
+
+        # 获取输出
+        self.output_handles = []
+        for output_name in self.output_names:
+            self.output_handles.append(self.predictor.get_output_handle(output_name))
+
+    # 前向计算函数
+    def forward(self, *input_datas, batch_size=1):
+        """
+        model inference
+
+        batch_size: batch size
+
+        *input_datas: x1, x2, ..., xn
+        """
+        # 切分输入数据
+        datas_num = input_datas[0].shape[0]
+        split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size
+        input_datas = [np.array_split(input_data, split_num) for input_data in input_datas]
+
+        # 遍历输入数据进行预测
+        outputs = {}
+        for step in range(split_num):
+            for i in range(len(self.input_handles)):
+                input_data = input_datas[i][step].copy()
+                self.input_handles[i].copy_from_cpu(input_data)
+
+            self.predictor.run()
+            
+            for i in range(len(self.output_handles)):
+                output = self.output_handles[i].copy_to_cpu()
+                if i in outputs:
+                    outputs[i].append(output)
+                else:
+                    outputs[i] = [output]
+        
+        # 预测结果合并
+        for key in outputs.keys():
+            outputs[key] = np.concatenate(outputs[key], 0)
+
+        # 返回预测结果
+        return outputs
\ No newline at end of file
--- a/modules/image/depth_estimation/MiDaS_Small/module.py
+++ b/modules/image/depth_estimation/MiDaS_Small/module.py
+import os
+import cv2
+import numpy as np
+
+from paddlehub import Module
+from paddlehub.module.module import moduleinfo
+
+from paddle.vision.transforms import Compose
+from MiDaS_Small.utils import write_depth
+from MiDaS_Small.inference import InferenceModel
+from MiDaS_Small.transforms import Resize, NormalizeImage, PrepareForNet
+
+
+@moduleinfo(
+    name="MiDaS_Small",  # 模型名称
+    type="CV/style_transfer",  # 模型类型
+    author="jm12138",  # 作者名称
+    author_email="jm12138@qq.com",  # 作者邮箱
+    summary="MiDaS_Small",  # 模型介绍
+    version="1.0.0"  # 版本号
+)
+class MiDaS_Small(Module):
+    # 初始化函数
+    def __init__(self, name=None, directory=None, use_gpu=False):
+        # 设置模型路径
+        model_path = os.path.join(self.directory, "model-small")
+
+        # 加载模型
+        self.model = InferenceModel(
+            modelpath=model_path, 
+            use_gpu=use_gpu, 
+            use_mkldnn=False, 
+            combined=True
+        )
+        self.model.eval()
+
+        # 数据预处理配置
+        self.net_h, self.net_w = 256, 256
+        self.transform = Compose([
+            Resize(
+                self.net_w,
+                self.net_h,
+                resize_target=None,
+                keep_aspect_ratio=False,
+                ensure_multiple_of=32,
+                resize_method="upper_bound",
+                image_interpolation_method=cv2.INTER_CUBIC,
+            ),
+            NormalizeImage(mean=[0.485, 0.456, 0.406],
+                           std=[0.229, 0.224, 0.225]),
+            PrepareForNet()
+        ])
+    
+    # 数据读取函数
+    @staticmethod
+    def load_datas(paths, images):
+        datas = []
+
+        # 读取数据列表
+        if paths is not None:
+            for im_path in paths:
+                assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path)
+                im = cv2.imread(im_path)
+                datas.append(im)
+
+        if images is not None:
+            datas = images
+
+        # 返回数据列表
+        return datas
+
+    # 数据预处理函数
+    def preprocess(self, datas):
+        input_datas = []
+
+        for img in datas:
+            # 归一化
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
+
+            # 图像变换
+            img = self.transform({"image": img})["image"]
+
+            # 新增维度
+            input_data = img[np.newaxis, ...]
+
+            input_datas.append(input_data)
+        
+        # 拼接数据
+        input_datas = np.concatenate(input_datas, 0)
+
+        return input_datas
+
+    # 数据后处理函数
+    @staticmethod
+    def postprocess(datas, results, output_dir='output', visualization=False):
+        # 检查输出目录
+        if visualization:
+            if not os.path.exists(output_dir):
+                os.mkdir(output_dir)
+
+        outputs = []
+
+        for img, result, count in zip(datas, results, range(len(datas))):
+            # 缩放回原尺寸
+            output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
+
+            # 可视化输出
+            if visualization:
+                pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2)
+
+            outputs.append(output)
+
+        return outputs
+
+    # 深度估计函数
+    def depth_estimation(self,
+                       images=None,
+                       paths=None,
+                       batch_size=1,
+                       output_dir='output',
+                       visualization=False):
+        # 加载数据
+        datas = self.load_datas(paths, images)
+
+        # 数据预处理
+        input_datas = self.preprocess(datas)
+
+        # 模型预测
+        results = self.model(input_datas, batch_size=batch_size)[0]
+
+        # 结果后处理
+        outputs = self.postprocess(datas, results, output_dir, visualization)
+        
+        return outputs
\ No newline at end of file
--- a/modules/image/depth_estimation/MiDaS_Small/transforms.py
+++ b/modules/image/depth_estimation/MiDaS_Small/transforms.py
+# Refer https://github.com/intel-isl/MiDaS
+
+import numpy as np
+import cv2
+
+
+class Resize(object):
+    """Resize sample to given size (width, height).
+    """
+    def __init__(self,
+                 width,
+                 height,
+                 resize_target=True,
+                 keep_aspect_ratio=False,
+                 ensure_multiple_of=1,
+                 resize_method="lower_bound",
+                 image_interpolation_method=cv2.INTER_AREA):
+        """Init.
+
+        Args:
+            width (int): desired output width
+            height (int): desired output height
+            resize_target (bool, optional):
+                True: Resize the full sample (image, mask, target).
+                False: Resize image only.
+                Defaults to True.
+            keep_aspect_ratio (bool, optional):
+                True: Keep the aspect ratio of the input sample.
+                Output sample might not have the given width and height, and
+                resize behaviour depends on the parameter 'resize_method'.
+                Defaults to False.
+            ensure_multiple_of (int, optional):
+                Output width and height is constrained to be multiple of this parameter.
+                Defaults to 1.
+            resize_method (str, optional):
+                "lower_bound": Output will be at least as large as the given size.
+                "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
+                "minimal": Scale as least as possible.  (Output size might be smaller than given size.)
+                Defaults to "lower_bound".
+        """
+        self.__width = width
+        self.__height = height
+
+        self.__resize_target = resize_target
+        self.__keep_aspect_ratio = keep_aspect_ratio
+        self.__multiple_of = ensure_multiple_of
+        self.__resize_method = resize_method
+        self.__image_interpolation_method = image_interpolation_method
+
+    def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
+        y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
+
+        if max_val is not None and y > max_val:
+            y = (np.floor(x / self.__multiple_of) *
+                 self.__multiple_of).astype(int)
+
+        if y < min_val:
+            y = (np.ceil(x / self.__multiple_of) *
+                 self.__multiple_of).astype(int)
+
+        return y
+
+    def get_size(self, width, height):
+        # determine new height and width
+        scale_height = self.__height / height
+        scale_width = self.__width / width
+
+        if self.__keep_aspect_ratio:
+            if self.__resize_method == "lower_bound":
+                # scale such that output size is lower bound
+                if scale_width > scale_height:
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            elif self.__resize_method == "upper_bound":
+                # scale such that output size is upper bound
+                if scale_width < scale_height:
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            elif self.__resize_method == "minimal":
+                # scale as least as possbile
+                if abs(1 - scale_width) < abs(1 - scale_height):
+                    # fit width
+                    scale_height = scale_width
+                else:
+                    # fit height
+                    scale_width = scale_height
+            else:
+                raise ValueError(
+                    f"resize_method {self.__resize_method} not implemented")
+
+        if self.__resize_method == "lower_bound":
+            new_height = self.constrain_to_multiple_of(scale_height * height,
+                                                       min_val=self.__height)
+            new_width = self.constrain_to_multiple_of(scale_width * width,
+                                                      min_val=self.__width)
+        elif self.__resize_method == "upper_bound":
+            new_height = self.constrain_to_multiple_of(scale_height * height,
+                                                       max_val=self.__height)
+            new_width = self.constrain_to_multiple_of(scale_width * width,
+                                                      max_val=self.__width)
+        elif self.__resize_method == "minimal":
+            new_height = self.constrain_to_multiple_of(scale_height * height)
+            new_width = self.constrain_to_multiple_of(scale_width * width)
+        else:
+            raise ValueError(
+                f"resize_method {self.__resize_method} not implemented")
+
+        return (new_width, new_height)
+
+    def __call__(self, sample):
+        width, height = self.get_size(sample["image"].shape[1],
+                                      sample["image"].shape[0])
+
+        # resize sample
+        sample["image"] = cv2.resize(
+            sample["image"],
+            (width, height),
+            interpolation=self.__image_interpolation_method,
+        )
+
+        if self.__resize_target:
+            if "disparity" in sample:
+                sample["disparity"] = cv2.resize(
+                    sample["disparity"],
+                    (width, height),
+                    interpolation=cv2.INTER_NEAREST,
+                )
+
+            if "depth" in sample:
+                sample["depth"] = cv2.resize(sample["depth"], (width, height),
+                                             interpolation=cv2.INTER_NEAREST)
+
+            sample["mask"] = cv2.resize(
+                sample["mask"].astype(np.float32),
+                (width, height),
+                interpolation=cv2.INTER_NEAREST,
+            )
+            sample["mask"] = sample["mask"].astype(bool)
+
+        return sample
+
+
+class NormalizeImage(object):
+    """Normlize image by given mean and std.
+    """
+    def __init__(self, mean, std):
+        self.__mean = mean
+        self.__std = std
+
+    def __call__(self, sample):
+        sample["image"] = (sample["image"] - self.__mean) / self.__std
+
+        return sample
+
+
+class PrepareForNet(object):
+    """Prepare sample for usage as network input.
+    """
+    def __init__(self):
+        pass
+
+    def __call__(self, sample):
+        image = np.transpose(sample["image"], (2, 0, 1))
+        sample["image"] = np.ascontiguousarray(image).astype(np.float32)
+
+        if "mask" in sample:
+            sample["mask"] = sample["mask"].astype(np.float32)
+            sample["mask"] = np.ascontiguousarray(sample["mask"])
+
+        if "disparity" in sample:
+            disparity = sample["disparity"].astype(np.float32)
+            sample["disparity"] = np.ascontiguousarray(disparity)
+
+        if "depth" in sample:
+            depth = sample["depth"].astype(np.float32)
+            sample["depth"] = np.ascontiguousarray(depth)
+
+        return sample
--- a/modules/image/depth_estimation/MiDaS_Small/utils.py
+++ b/modules/image/depth_estimation/MiDaS_Small/utils.py
+# Refer https://github.com/intel-isl/MiDaS
+"""Utils for monoDepth.
+"""
+import sys
+import numpy as np
+import cv2
+
+
+def write_pfm(path, image, scale=1):
+    """Write pfm file.
+
+    Args:
+        path (str): pathto file
+        image (array): data
+        scale (int, optional): Scale. Defaults to 1.
+    """
+
+    with open(path, "wb") as file:
+        color = None
+
+        if image.dtype.name != "float32":
+            raise Exception("Image dtype must be float32.")
+
+        image = np.flipud(image)
+
+        if len(image.shape) == 3 and image.shape[2] == 3:  # color image
+            color = True
+        elif (len(image.shape) == 2
+              or len(image.shape) == 3 and image.shape[2] == 1):  # greyscale
+            color = False
+        else:
+            raise Exception(
+                "Image must have H x W x 3, H x W x 1 or H x W dimensions.")
+
+        file.write("PF\n" if color else "Pf\n".encode())
+        file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
+
+        endian = image.dtype.byteorder
+
+        if endian == "<" or endian == "=" and sys.byteorder == "little":
+            scale = -scale
+
+        file.write("%f\n".encode() % scale)
+
+        image.tofile(file)
+
+
+def read_image(path):
+    """Read image and output RGB image (0-1).
+
+    Args:
+        path (str): path to file
+
+    Returns:
+        array: RGB image (0-1)
+    """
+    img = cv2.imread(path)
+    if img.ndim == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
+    return img
+
+
+def write_depth(path, depth, bits=1):
+    """Write depth map to pfm and png file.
+
+    Args:
+        path (str): filepath without extension
+        depth (array): depth
+    """
+    write_pfm(path + ".pfm", depth.astype(np.float32))
+
+    depth_min = depth.min()
+    depth_max = depth.max()
+
+    max_val = (2**(8 * bits)) - 1
+
+    if depth_max - depth_min > np.finfo("float").eps:
+        out = max_val * (depth - depth_min) / (depth_max - depth_min)
+    else:
+        out = np.zeros(depth.shape, dtype=depth.type)
+
+    if bits == 1:
+        cv2.imwrite(path + ".png", out.astype("uint8"))
+    elif bits == 2:
+        cv2.imwrite(path + ".png", out.astype("uint16"))
+    return path + '.pfm', path + ".png"