未验证 提交 7cad8e4e 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

Add a depth estimation module MiDaS (#1148)

Add a depth estimation module MiDaS
上级 32df7f6e
## 模型概述
MiDas v2.1 large 单目深度估计模型
模型可通过输入图像估计其中的深度信息
模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目
## 模型安装
```shell
$hub install MiDaS_Large
```
## 效果展示
![效果展示](https://img-blog.csdnimg.cn/20201227112600975.jpg)
## API 说明
```python
def depth_estimation(
images=None,
paths=None,
batch_size=1,
output_dir='output',
visualization=False
)
```
深度估计API
**参数**
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None;
* paths (list\[str\]): 图片的路径,默认为 None;
* batch\_size (int): batch 的大小,默认设为 1;
* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False;
* output\_dir (str): 图片的保存路径,默认设为 output。
**返回**
* res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\]
## 预测代码示例
```python
import cv2
import paddlehub as hub
# 模型加载
# use_gpu:是否使用GPU进行预测
model = hub.Module(name='MiDaS_Large', use_gpu=False)
# 模型预测
result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = model.style_transfer(paths=['/PATH/TO/IMAGE'])
```
## 模型相关信息
### 模型代码
https://github.com/intel-isl/MiDaS
### 依赖
paddlepaddle >= 2.0.0rc0
paddlehub >= 2.0.0b1
import os
import numpy as np
from paddle.inference import create_predictor, Config
__all__ = ['InferenceModel']
class InferenceModel():
# 初始化函数
def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True):
'''
init the inference model
modelpath: inference model path
use_gpu: use gpu or not
use_mkldnn: use mkldnn or not
combined: inference model format is combined or not
'''
# 加载模型配置
self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined)
# 打印函数
def __repr__(self):
'''
get the numbers and name of inputs and outputs
'''
return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % (
len(self.input_handles),
str(self.input_names),
len(self.output_handles),
str(self.output_names)
)
# 类调用函数
def __call__(self, *input_datas, batch_size=1):
'''
call function
'''
return self.forward(*input_datas, batch_size=batch_size)
# 模型参数加载函数
def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
'''
load the model config
modelpath: inference model path
use_gpu: use gpu or not
use_mkldnn: use mkldnn or not
combined: inference model format is combined or not
'''
# 对运行位置进行配置
if use_gpu:
try:
int(os.environ.get('CUDA_VISIBLE_DEVICES'))
except Exception:
print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.')
use_gpu = False
# 加载模型参数
if combined:
model = os.path.join(modelpath, "__model__")
params = os.path.join(modelpath, "__params__")
config = Config(model, params)
else:
config = Config(modelpath)
# 设置参数
if use_gpu:
config.enable_use_gpu(100, 0)
else:
config.disable_gpu()
if use_mkldnn:
config.enable_mkldnn()
# 返回配置
return config
# 预测器创建函数
def eval(self):
'''
create the model predictor by model config
'''
# 创建预测器
self.predictor = create_predictor(self.config)
# 获取模型的输入输出名称
self.input_names = self.predictor.get_input_names()
self.output_names = self.predictor.get_output_names()
# 获取输入
self.input_handles = []
for input_name in self.input_names:
self.input_handles.append(self.predictor.get_input_handle(input_name))
# 获取输出
self.output_handles = []
for output_name in self.output_names:
self.output_handles.append(self.predictor.get_output_handle(output_name))
# 前向计算函数
def forward(self, *input_datas, batch_size=1):
"""
model inference
batch_size: batch size
*input_datas: x1, x2, ..., xn
"""
# 切分输入数据
datas_num = input_datas[0].shape[0]
split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size
input_datas = [np.array_split(input_data, split_num) for input_data in input_datas]
# 遍历输入数据进行预测
outputs = {}
for step in range(split_num):
for i in range(len(self.input_handles)):
input_data = input_datas[i][step].copy()
self.input_handles[i].copy_from_cpu(input_data)
self.predictor.run()
for i in range(len(self.output_handles)):
output = self.output_handles[i].copy_to_cpu()
if i in outputs:
outputs[i].append(output)
else:
outputs[i] = [output]
# 预测结果合并
for key in outputs.keys():
outputs[key] = np.concatenate(outputs[key], 0)
# 返回预测结果
return outputs
\ No newline at end of file
import os
import cv2
import numpy as np
from paddlehub import Module
from paddlehub.module.module import moduleinfo
from paddle.vision.transforms import Compose
from MiDaS_Large.utils import write_depth
from MiDaS_Large.inference import InferenceModel
from MiDaS_Large.transforms import Resize, NormalizeImage, PrepareForNet
@moduleinfo(
name="MiDaS_Large", # 模型名称
type="CV/style_transfer", # 模型类型
author="jm12138", # 作者名称
author_email="jm12138@qq.com", # 作者邮箱
summary="MiDaS_Large", # 模型介绍
version="1.0.0" # 版本号
)
class MiDaS_Large(Module):
# 初始化函数
def __init__(self, name=None, directory=None, use_gpu=False):
# 设置模型路径
model_path = os.path.join(self.directory, "model-f6b98070")
# 加载模型
self.model = InferenceModel(
modelpath=model_path,
use_gpu=use_gpu,
use_mkldnn=False,
combined=True
)
self.model.eval()
# 数据预处理配置
self.net_h, self.net_w = 384, 384
self.transform = Compose([
Resize(
self.net_w,
self.net_h,
resize_target=None,
keep_aspect_ratio=False,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
PrepareForNet()
])
# 数据读取函数
@staticmethod
def load_datas(paths, images):
datas = []
# 读取数据列表
if paths is not None:
for im_path in paths:
assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path)
im = cv2.imread(im_path)
datas.append(im)
if images is not None:
datas = images
# 返回数据列表
return datas
# 数据预处理函数
def preprocess(self, datas):
input_datas = []
for img in datas:
# 归一化
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
# 图像变换
img = self.transform({"image": img})["image"]
# 新增维度
input_data = img[np.newaxis, ...]
input_datas.append(input_data)
# 拼接数据
input_datas = np.concatenate(input_datas, 0)
return input_datas
# 数据后处理函数
@staticmethod
def postprocess(datas, results, output_dir='output', visualization=False):
# 检查输出目录
if visualization:
if not os.path.exists(output_dir):
os.mkdir(output_dir)
outputs = []
for img, result, count in zip(datas, results, range(len(datas))):
# 缩放回原尺寸
output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
# 可视化输出
if visualization:
pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2)
outputs.append(output)
return outputs
# 深度估计函数
def depth_estimation(self,
images=None,
paths=None,
batch_size=1,
output_dir='output',
visualization=False):
# 加载数据
datas = self.load_datas(paths, images)
# 数据预处理
input_datas = self.preprocess(datas)
# 模型预测
results = self.model(input_datas, batch_size=batch_size)[0]
# 结果后处理
outputs = self.postprocess(datas, results, output_dir, visualization)
return outputs
\ No newline at end of file
# Refer https://github.com/intel-isl/MiDaS
import numpy as np
import cv2
class Resize(object):
"""Resize sample to given size (width, height).
"""
def __init__(self,
width,
height,
resize_target=True,
keep_aspect_ratio=False,
ensure_multiple_of=1,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_AREA):
"""Init.
Args:
width (int): desired output width
height (int): desired output height
resize_target (bool, optional):
True: Resize the full sample (image, mask, target).
False: Resize image only.
Defaults to True.
keep_aspect_ratio (bool, optional):
True: Keep the aspect ratio of the input sample.
Output sample might not have the given width and height, and
resize behaviour depends on the parameter 'resize_method'.
Defaults to False.
ensure_multiple_of (int, optional):
Output width and height is constrained to be multiple of this parameter.
Defaults to 1.
resize_method (str, optional):
"lower_bound": Output will be at least as large as the given size.
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
Defaults to "lower_bound".
"""
self.__width = width
self.__height = height
self.__resize_target = resize_target
self.__keep_aspect_ratio = keep_aspect_ratio
self.__multiple_of = ensure_multiple_of
self.__resize_method = resize_method
self.__image_interpolation_method = image_interpolation_method
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
if max_val is not None and y > max_val:
y = (np.floor(x / self.__multiple_of) *
self.__multiple_of).astype(int)
if y < min_val:
y = (np.ceil(x / self.__multiple_of) *
self.__multiple_of).astype(int)
return y
def get_size(self, width, height):
# determine new height and width
scale_height = self.__height / height
scale_width = self.__width / width
if self.__keep_aspect_ratio:
if self.__resize_method == "lower_bound":
# scale such that output size is lower bound
if scale_width > scale_height:
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
elif self.__resize_method == "upper_bound":
# scale such that output size is upper bound
if scale_width < scale_height:
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
elif self.__resize_method == "minimal":
# scale as least as possbile
if abs(1 - scale_width) < abs(1 - scale_height):
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
else:
raise ValueError(
f"resize_method {self.__resize_method} not implemented")
if self.__resize_method == "lower_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
min_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
min_val=self.__width)
elif self.__resize_method == "upper_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
max_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
max_val=self.__width)
elif self.__resize_method == "minimal":
new_height = self.constrain_to_multiple_of(scale_height * height)
new_width = self.constrain_to_multiple_of(scale_width * width)
else:
raise ValueError(
f"resize_method {self.__resize_method} not implemented")
return (new_width, new_height)
def __call__(self, sample):
width, height = self.get_size(sample["image"].shape[1],
sample["image"].shape[0])
# resize sample
sample["image"] = cv2.resize(
sample["image"],
(width, height),
interpolation=self.__image_interpolation_method,
)
if self.__resize_target:
if "disparity" in sample:
sample["disparity"] = cv2.resize(
sample["disparity"],
(width, height),
interpolation=cv2.INTER_NEAREST,
)
if "depth" in sample:
sample["depth"] = cv2.resize(sample["depth"], (width, height),
interpolation=cv2.INTER_NEAREST)
sample["mask"] = cv2.resize(
sample["mask"].astype(np.float32),
(width, height),
interpolation=cv2.INTER_NEAREST,
)
sample["mask"] = sample["mask"].astype(bool)
return sample
class NormalizeImage(object):
"""Normlize image by given mean and std.
"""
def __init__(self, mean, std):
self.__mean = mean
self.__std = std
def __call__(self, sample):
sample["image"] = (sample["image"] - self.__mean) / self.__std
return sample
class PrepareForNet(object):
"""Prepare sample for usage as network input.
"""
def __init__(self):
pass
def __call__(self, sample):
image = np.transpose(sample["image"], (2, 0, 1))
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
if "mask" in sample:
sample["mask"] = sample["mask"].astype(np.float32)
sample["mask"] = np.ascontiguousarray(sample["mask"])
if "disparity" in sample:
disparity = sample["disparity"].astype(np.float32)
sample["disparity"] = np.ascontiguousarray(disparity)
if "depth" in sample:
depth = sample["depth"].astype(np.float32)
sample["depth"] = np.ascontiguousarray(depth)
return sample
# Refer https://github.com/intel-isl/MiDaS
"""Utils for monoDepth.
"""
import sys
import numpy as np
import cv2
def write_pfm(path, image, scale=1):
"""Write pfm file.
Args:
path (str): pathto file
image (array): data
scale (int, optional): Scale. Defaults to 1.
"""
with open(path, "wb") as file:
color = None
if image.dtype.name != "float32":
raise Exception("Image dtype must be float32.")
image = np.flipud(image)
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif (len(image.shape) == 2
or len(image.shape) == 3 and image.shape[2] == 1): # greyscale
color = False
else:
raise Exception(
"Image must have H x W x 3, H x W x 1 or H x W dimensions.")
file.write("PF\n" if color else "Pf\n".encode())
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == "<" or endian == "=" and sys.byteorder == "little":
scale = -scale
file.write("%f\n".encode() % scale)
image.tofile(file)
def read_image(path):
"""Read image and output RGB image (0-1).
Args:
path (str): path to file
Returns:
array: RGB image (0-1)
"""
img = cv2.imread(path)
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
return img
def write_depth(path, depth, bits=1):
"""Write depth map to pfm and png file.
Args:
path (str): filepath without extension
depth (array): depth
"""
write_pfm(path + ".pfm", depth.astype(np.float32))
depth_min = depth.min()
depth_max = depth.max()
max_val = (2**(8 * bits)) - 1
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (depth - depth_min) / (depth_max - depth_min)
else:
out = np.zeros(depth.shape, dtype=depth.type)
if bits == 1:
cv2.imwrite(path + ".png", out.astype("uint8"))
elif bits == 2:
cv2.imwrite(path + ".png", out.astype("uint16"))
return path + '.pfm', path + ".png"
## 模型概述
MiDas v2.1 small 单目深度估计模型
模型可通过输入图像估计其中的深度信息
模型权重转换自 [MiDas](https://github.com/intel-isl/MiDaS) 官方开源项目
## 模型安装
```shell
$hub install MiDaS_Small
```
## 效果展示
![效果展示](https://img-blog.csdnimg.cn/20201227112553903.jpg)
## API 说明
```python
def depth_estimation(
images=None,
paths=None,
batch_size=1,
output_dir='output',
visualization=False
)
```
深度估计API
**参数**
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None;
* paths (list\[str\]): 图片的路径,默认为 None;
* batch\_size (int): batch 的大小,默认设为 1;
* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False;
* output\_dir (str): 图片的保存路径,默认设为 output。
**返回**
* res (list\[numpy.ndarray\]): 图像深度数据,ndarray.shape 为 \[H, W\]
## 预测代码示例
```python
import cv2
import paddlehub as hub
# 模型加载
# use_gpu:是否使用GPU进行预测
model = hub.Module(name='MiDaS_Small', use_gpu=False)
# 模型预测
result = model.depth_estimation(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = model.style_transfer(paths=['/PATH/TO/IMAGE'])
```
## 模型相关信息
### 模型代码
https://github.com/intel-isl/MiDaS
### 依赖
paddlepaddle >= 2.0.0rc0
paddlehub >= 2.0.0b1
import os
import numpy as np
from paddle.inference import create_predictor, Config
__all__ = ['InferenceModel']
class InferenceModel():
# 初始化函数
def __init__(self, modelpath, use_gpu=False, use_mkldnn=False, combined=True):
'''
init the inference model
modelpath: inference model path
use_gpu: use gpu or not
use_mkldnn: use mkldnn or not
combined: inference model format is combined or not
'''
# 加载模型配置
self.config = self.load_config(modelpath, use_gpu, use_mkldnn, combined)
# 打印函数
def __repr__(self):
'''
get the numbers and name of inputs and outputs
'''
return 'inputs_num: %d\ninputs_names: %s\noutputs_num: %d\noutputs_names: %s' % (
len(self.input_handles),
str(self.input_names),
len(self.output_handles),
str(self.output_names)
)
# 类调用函数
def __call__(self, *input_datas, batch_size=1):
'''
call function
'''
return self.forward(*input_datas, batch_size=batch_size)
# 模型参数加载函数
def load_config(self, modelpath, use_gpu, use_mkldnn, combined):
'''
load the model config
modelpath: inference model path
use_gpu: use gpu or not
use_mkldnn: use mkldnn or not
combined: inference model format is combined or not
'''
# 对运行位置进行配置
if use_gpu:
try:
int(os.environ.get('CUDA_VISIBLE_DEVICES'))
except Exception:
print('Error! Unable to use GPU. Please set the environment variables "CUDA_VISIBLE_DEVICES=GPU_id" to use GPU.')
use_gpu = False
# 加载模型参数
if combined:
model = os.path.join(modelpath, "__model__")
params = os.path.join(modelpath, "__params__")
config = Config(model, params)
else:
config = Config(modelpath)
# 设置参数
if use_gpu:
config.enable_use_gpu(100, 0)
else:
config.disable_gpu()
if use_mkldnn:
config.enable_mkldnn()
# 返回配置
return config
# 预测器创建函数
def eval(self):
'''
create the model predictor by model config
'''
# 创建预测器
self.predictor = create_predictor(self.config)
# 获取模型的输入输出名称
self.input_names = self.predictor.get_input_names()
self.output_names = self.predictor.get_output_names()
# 获取输入
self.input_handles = []
for input_name in self.input_names:
self.input_handles.append(self.predictor.get_input_handle(input_name))
# 获取输出
self.output_handles = []
for output_name in self.output_names:
self.output_handles.append(self.predictor.get_output_handle(output_name))
# 前向计算函数
def forward(self, *input_datas, batch_size=1):
"""
model inference
batch_size: batch size
*input_datas: x1, x2, ..., xn
"""
# 切分输入数据
datas_num = input_datas[0].shape[0]
split_num = datas_num // batch_size + 1 if datas_num % batch_size != 0 else datas_num // batch_size
input_datas = [np.array_split(input_data, split_num) for input_data in input_datas]
# 遍历输入数据进行预测
outputs = {}
for step in range(split_num):
for i in range(len(self.input_handles)):
input_data = input_datas[i][step].copy()
self.input_handles[i].copy_from_cpu(input_data)
self.predictor.run()
for i in range(len(self.output_handles)):
output = self.output_handles[i].copy_to_cpu()
if i in outputs:
outputs[i].append(output)
else:
outputs[i] = [output]
# 预测结果合并
for key in outputs.keys():
outputs[key] = np.concatenate(outputs[key], 0)
# 返回预测结果
return outputs
\ No newline at end of file
import os
import cv2
import numpy as np
from paddlehub import Module
from paddlehub.module.module import moduleinfo
from paddle.vision.transforms import Compose
from MiDaS_Small.utils import write_depth
from MiDaS_Small.inference import InferenceModel
from MiDaS_Small.transforms import Resize, NormalizeImage, PrepareForNet
@moduleinfo(
name="MiDaS_Small", # 模型名称
type="CV/style_transfer", # 模型类型
author="jm12138", # 作者名称
author_email="jm12138@qq.com", # 作者邮箱
summary="MiDaS_Small", # 模型介绍
version="1.0.0" # 版本号
)
class MiDaS_Small(Module):
# 初始化函数
def __init__(self, name=None, directory=None, use_gpu=False):
# 设置模型路径
model_path = os.path.join(self.directory, "model-small")
# 加载模型
self.model = InferenceModel(
modelpath=model_path,
use_gpu=use_gpu,
use_mkldnn=False,
combined=True
)
self.model.eval()
# 数据预处理配置
self.net_h, self.net_w = 256, 256
self.transform = Compose([
Resize(
self.net_w,
self.net_h,
resize_target=None,
keep_aspect_ratio=False,
ensure_multiple_of=32,
resize_method="upper_bound",
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
PrepareForNet()
])
# 数据读取函数
@staticmethod
def load_datas(paths, images):
datas = []
# 读取数据列表
if paths is not None:
for im_path in paths:
assert os.path.isfile(im_path), "The {} isn't a valid file path.".format(im_path)
im = cv2.imread(im_path)
datas.append(im)
if images is not None:
datas = images
# 返回数据列表
return datas
# 数据预处理函数
def preprocess(self, datas):
input_datas = []
for img in datas:
# 归一化
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
# 图像变换
img = self.transform({"image": img})["image"]
# 新增维度
input_data = img[np.newaxis, ...]
input_datas.append(input_data)
# 拼接数据
input_datas = np.concatenate(input_datas, 0)
return input_datas
# 数据后处理函数
@staticmethod
def postprocess(datas, results, output_dir='output', visualization=False):
# 检查输出目录
if visualization:
if not os.path.exists(output_dir):
os.mkdir(output_dir)
outputs = []
for img, result, count in zip(datas, results, range(len(datas))):
# 缩放回原尺寸
output = cv2.resize(result, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC)
# 可视化输出
if visualization:
pfm_f, png_f = write_depth(os.path.join(output_dir, str(count)), output, bits=2)
outputs.append(output)
return outputs
# 深度估计函数
def depth_estimation(self,
images=None,
paths=None,
batch_size=1,
output_dir='output',
visualization=False):
# 加载数据
datas = self.load_datas(paths, images)
# 数据预处理
input_datas = self.preprocess(datas)
# 模型预测
results = self.model(input_datas, batch_size=batch_size)[0]
# 结果后处理
outputs = self.postprocess(datas, results, output_dir, visualization)
return outputs
\ No newline at end of file
# Refer https://github.com/intel-isl/MiDaS
import numpy as np
import cv2
class Resize(object):
"""Resize sample to given size (width, height).
"""
def __init__(self,
width,
height,
resize_target=True,
keep_aspect_ratio=False,
ensure_multiple_of=1,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_AREA):
"""Init.
Args:
width (int): desired output width
height (int): desired output height
resize_target (bool, optional):
True: Resize the full sample (image, mask, target).
False: Resize image only.
Defaults to True.
keep_aspect_ratio (bool, optional):
True: Keep the aspect ratio of the input sample.
Output sample might not have the given width and height, and
resize behaviour depends on the parameter 'resize_method'.
Defaults to False.
ensure_multiple_of (int, optional):
Output width and height is constrained to be multiple of this parameter.
Defaults to 1.
resize_method (str, optional):
"lower_bound": Output will be at least as large as the given size.
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
Defaults to "lower_bound".
"""
self.__width = width
self.__height = height
self.__resize_target = resize_target
self.__keep_aspect_ratio = keep_aspect_ratio
self.__multiple_of = ensure_multiple_of
self.__resize_method = resize_method
self.__image_interpolation_method = image_interpolation_method
def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
if max_val is not None and y > max_val:
y = (np.floor(x / self.__multiple_of) *
self.__multiple_of).astype(int)
if y < min_val:
y = (np.ceil(x / self.__multiple_of) *
self.__multiple_of).astype(int)
return y
def get_size(self, width, height):
# determine new height and width
scale_height = self.__height / height
scale_width = self.__width / width
if self.__keep_aspect_ratio:
if self.__resize_method == "lower_bound":
# scale such that output size is lower bound
if scale_width > scale_height:
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
elif self.__resize_method == "upper_bound":
# scale such that output size is upper bound
if scale_width < scale_height:
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
elif self.__resize_method == "minimal":
# scale as least as possbile
if abs(1 - scale_width) < abs(1 - scale_height):
# fit width
scale_height = scale_width
else:
# fit height
scale_width = scale_height
else:
raise ValueError(
f"resize_method {self.__resize_method} not implemented")
if self.__resize_method == "lower_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
min_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
min_val=self.__width)
elif self.__resize_method == "upper_bound":
new_height = self.constrain_to_multiple_of(scale_height * height,
max_val=self.__height)
new_width = self.constrain_to_multiple_of(scale_width * width,
max_val=self.__width)
elif self.__resize_method == "minimal":
new_height = self.constrain_to_multiple_of(scale_height * height)
new_width = self.constrain_to_multiple_of(scale_width * width)
else:
raise ValueError(
f"resize_method {self.__resize_method} not implemented")
return (new_width, new_height)
def __call__(self, sample):
width, height = self.get_size(sample["image"].shape[1],
sample["image"].shape[0])
# resize sample
sample["image"] = cv2.resize(
sample["image"],
(width, height),
interpolation=self.__image_interpolation_method,
)
if self.__resize_target:
if "disparity" in sample:
sample["disparity"] = cv2.resize(
sample["disparity"],
(width, height),
interpolation=cv2.INTER_NEAREST,
)
if "depth" in sample:
sample["depth"] = cv2.resize(sample["depth"], (width, height),
interpolation=cv2.INTER_NEAREST)
sample["mask"] = cv2.resize(
sample["mask"].astype(np.float32),
(width, height),
interpolation=cv2.INTER_NEAREST,
)
sample["mask"] = sample["mask"].astype(bool)
return sample
class NormalizeImage(object):
"""Normlize image by given mean and std.
"""
def __init__(self, mean, std):
self.__mean = mean
self.__std = std
def __call__(self, sample):
sample["image"] = (sample["image"] - self.__mean) / self.__std
return sample
class PrepareForNet(object):
"""Prepare sample for usage as network input.
"""
def __init__(self):
pass
def __call__(self, sample):
image = np.transpose(sample["image"], (2, 0, 1))
sample["image"] = np.ascontiguousarray(image).astype(np.float32)
if "mask" in sample:
sample["mask"] = sample["mask"].astype(np.float32)
sample["mask"] = np.ascontiguousarray(sample["mask"])
if "disparity" in sample:
disparity = sample["disparity"].astype(np.float32)
sample["disparity"] = np.ascontiguousarray(disparity)
if "depth" in sample:
depth = sample["depth"].astype(np.float32)
sample["depth"] = np.ascontiguousarray(depth)
return sample
# Refer https://github.com/intel-isl/MiDaS
"""Utils for monoDepth.
"""
import sys
import numpy as np
import cv2
def write_pfm(path, image, scale=1):
"""Write pfm file.
Args:
path (str): pathto file
image (array): data
scale (int, optional): Scale. Defaults to 1.
"""
with open(path, "wb") as file:
color = None
if image.dtype.name != "float32":
raise Exception("Image dtype must be float32.")
image = np.flipud(image)
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif (len(image.shape) == 2
or len(image.shape) == 3 and image.shape[2] == 1): # greyscale
color = False
else:
raise Exception(
"Image must have H x W x 3, H x W x 1 or H x W dimensions.")
file.write("PF\n" if color else "Pf\n".encode())
file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == "<" or endian == "=" and sys.byteorder == "little":
scale = -scale
file.write("%f\n".encode() % scale)
image.tofile(file)
def read_image(path):
"""Read image and output RGB image (0-1).
Args:
path (str): path to file
Returns:
array: RGB image (0-1)
"""
img = cv2.imread(path)
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
return img
def write_depth(path, depth, bits=1):
"""Write depth map to pfm and png file.
Args:
path (str): filepath without extension
depth (array): depth
"""
write_pfm(path + ".pfm", depth.astype(np.float32))
depth_min = depth.min()
depth_max = depth.max()
max_val = (2**(8 * bits)) - 1
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (depth - depth_min) / (depth_max - depth_min)
else:
out = np.zeros(depth.shape, dtype=depth.type)
if bits == 1:
cv2.imwrite(path + ".png", out.astype("uint8"))
elif bits == 2:
cv2.imwrite(path + ".png", out.astype("uint16"))
return path + '.pfm', path + ".png"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册