跳过主要内容

MiDaS (Mixed Frequency Data Sampling Regression Models, 混频数据采样回归模型)

用于从单个图像计算相对深度的 MiDaS 模型 (Mixed Frequency Data Sampling Regression Models, 混频数据采样回归模型) 。

MiDaS 从单个图像计算相对反向深度。 该存储库提供了多个模型,涵盖不同的用例,从小型高速模型到提供最高精度的超大型模型。 这些模型已使用多目标优化在 10 个不同的数据集上进行了训练,以确保在各种输入上获得高质量。

安装:

midas 离线安装:

cd $DORA_DEP_HOME/dependencies/
git clone git@github.com:isl-org/MiDaS.git
cd MiDaS/weights
# 如果不想添加手动下载,程序将会自动下载模型文件
wget https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt
cp midas_v21_small_256.pt $HOME/.cache/torch/hub/checkpoints/

输入

  • 图像: 高 x 宽 x BGR array.

输出

  • bbox: 高 x 宽 x 相对深度数组。

示例输出

Imgur

添加以下数据流配置

  - id: midas_op
operator:
outputs:
- depth_frame
inputs:
image: webcam/image
python: ../../operators/midas_op.py
env:
PYTORCH_DEVICE: "cuda"
MIDAS_PATH: $DORA_DEP_HOME/dependencies/MiDaS/
MIDAS_WEIGHT_PATH: $DORA_DEP_HOME/dependencies/MiDaS/weights/midas_v21_small_256.pt
MODEL_TYPE: "MiDaS_small"
MODEL_NAME: "MiDaS_small"
  • model_type = "DPT_Large" # MiDaS v3 - Large (最高精度,最慢的推理速度)
  • model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (中等精度,中等推理速度)
  • model_type = "MiDaS_small" # MiDaS v2.1 - Small (最低精度,最高推理速度)

方法

__init__()

源码
    def __init__(self):
if MIDAS_PATH is None:
# 可能需要互联网
self.model = torch.hub.load(
"intel-isl/MiDaS",
MODEL_TYPE,
)
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
else:
# 大概不需要互联网
self.model = torch.hub.load(
repo_or_dir=MIDAS_PATH,
model=MODEL_NAME,
weights=MIDAS_WEIGHT_PATH,
source="local",
)
midas_transforms = torch.hub.load(
repo_or_dir=MIDAS_PATH, model="transforms", source="local"
)
if MODEL_TYPE == "DPT_Large" or MODEL_TYPE == "DPT_Hybrid":
self.transform = midas_transforms.dpt_transform
else:
self.transform = midas_transforms.small_transform
self.model.to(torch.device(DEVICE))
self.model.eval()


.on_event(...)

源码

def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE


.on_input(...)

图像句柄 参数: dora_input["id"] (str): yaml 配置中声明的输入的Id dora_input["data"] (bytes): 字节形式的输入消息 send_output (Callable[[str, bytes]]): 函数使输出发送回dora。

源码

def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""图像句柄
参数:
dora_input["id"] (str): yaml 配置中声明的输入的Id
dora_input["data"] (bytes): 字节形式的输入消息
send_output (Callable[[str, bytes]]): 函数使输出发送回dora。
"""
if dora_input["id"] == "image":
# 转换 bytes 类型 至 numpy array 类型
frame = np.frombuffer(
dora_input["data"],
np.uint8,
).reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 4))

with torch.no_grad():
image = frame[:, :, :3]
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
input_batch = self.transform(img).to(DEVICE)
prediction = self.model(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_output = prediction.cpu().numpy()
depth_min = depth_output.min()
depth_max = depth_output.max()
normalized_depth = (
255 * (depth_output - depth_min) / (depth_max - depth_min)
)
normalized_depth *= 3
depth_frame = (
np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
)
depth_frame = cv2.applyColorMap(
np.uint8(depth_frame), cv2.COLORMAP_INFERNO
)
height, width = depth_frame.shape[:2]
depth_frame_4 = np.dstack(
[depth_frame, np.ones((height, width), dtype="uint8") * 255]
)

send_output(
"depth_frame",
depth_frame_4.tobytes(),
dora_input["metadata"],
)
return DoraStatus.CONTINUE