Skip to main content

MiDaS

MiDaS models for computing relative depth from a single image.

MiDaS computes relative inverse depth from a single image. The repository provides multiple models that cover different use cases ranging from a small, high-speed model to a very large model that provide the highest accuracy. The models have been trained on 10 distinct datasets using multi-objective optimization to ensure high quality on a wide range of inputs.

Installation:

To install midas offline:

cd $DORA_DEP_HOME/dependencies/
git clone git@github.com:isl-org/MiDaS.git
cd MiDaS/weights
# If you don't want to add manual download, the program will also automatically download the model file
wget https://github.com/isl-org/MiDaS/releases/download/v2_1/midas_v21_small_256.pt
cp midas_v21_small_256.pt $HOME/.cache/torch/hub/checkpoints/

Inputs

  • image: HEIGHT x WIDTH x BGR array.

Outputs

  • bbox: HEIGHT x WIDTH x Relative depth array.

Example output

Imgur

Add the following dataflow configuration

  - id: midas_op
operator:
outputs:
- depth_frame
inputs:
image: webcam/image
python: ../../operators/midas_op.py
env:
PYTORCH_DEVICE: "cuda"
MIDAS_PATH: $DORA_DEP_HOME/dependencies/MiDaS/
MIDAS_WEIGHT_PATH: $DORA_DEP_HOME/dependencies/MiDaS/weights/midas_v21_small_256.pt
MODEL_TYPE: "MiDaS_small"
MODEL_NAME: "MiDaS_small"
  • model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest inference speed)
  • model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
  • model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)

Methods

__init__()

Source Code
    def __init__(self):
if MIDAS_PATH is None:
# With internet
self.model = torch.hub.load(
"intel-isl/MiDaS",
MODEL_TYPE,
)
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
else:
# Without internet
self.model = torch.hub.load(
repo_or_dir=MIDAS_PATH,
model=MODEL_NAME,
weights=MIDAS_WEIGHT_PATH,
source="local",
)
midas_transforms = torch.hub.load(
repo_or_dir=MIDAS_PATH, model="transforms", source="local"
)
if MODEL_TYPE == "DPT_Large" or MODEL_TYPE == "DPT_Hybrid":
self.transform = midas_transforms.dpt_transform
else:
self.transform = midas_transforms.small_transform
self.model.to(torch.device(DEVICE))
self.model.eval()


.on_event(...)

Source Code

def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE


.on_input(...)

Handle image Args: dora_input["id"] (str): Id of the input declared in the yaml configuration dora_input["data"] (bytes): Bytes message of the input send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.

Source Code

def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""Handle image
Args:
dora_input["id"] (str): Id of the input declared in the yaml configuration
dora_input["data"] (bytes): Bytes message of the input
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""
if dora_input["id"] == "image":
# Convert bytes to numpy array
frame = np.frombuffer(
dora_input["data"],
np.uint8,
).reshape((IMAGE_HEIGHT, IMAGE_WIDTH, 4))

with torch.no_grad():
image = frame[:, :, :3]
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
input_batch = self.transform(img).to(DEVICE)
prediction = self.model(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_output = prediction.cpu().numpy()
depth_min = depth_output.min()
depth_max = depth_output.max()
normalized_depth = (
255 * (depth_output - depth_min) / (depth_max - depth_min)
)
normalized_depth *= 3
depth_frame = (
np.repeat(np.expand_dims(normalized_depth, 2), 3, axis=2) / 3
)
depth_frame = cv2.applyColorMap(
np.uint8(depth_frame), cv2.COLORMAP_INFERNO
)
height, width = depth_frame.shape[:2]
depth_frame_4 = np.dstack(
[depth_frame, np.ones((height, width), dtype="uint8") * 255]
)

send_output(
"depth_frame",
depth_frame_4.tobytes(),
dora_input["metadata"],
)
return DoraStatus.CONTINUE