"""Utility functions for Semantic-Object-Context modality."""
from typing import Optional
import cv2
import numpy as np
import seaborn as sns
# from loguru import logger
[docs]
def semantic_mask_to_instances(
mask: np.ndarray,
area_threshold: Optional[int] = 10,
labels_whitelist: Optional[list] = None,
) -> dict:
"""Get instance labels from semantic mask.
Instances are defined as connected components of the same class.
Connected components found using opencv connectedComponentsWithStats opencv algorithm
in class-wise manner.
Args:
mask (ndarray): semantic mask in opencv image format (ndarray)
area_threshold (int, optional): minimum area of instance to be considered. Defaults to 10.
labels_whitelist (list, optional): list of labels to consider. Defaults to None.
Returns:
instances (dict): dict of instances with keys as instance labels and values as instance masks.
"""
instances = {}
# logger.debug(f"Labels whitelist: {labels_whitelist}")
for label in labels_whitelist:
instances[label] = []
binary_mask = (mask == label).astype(np.uint8)
(
totalLabels,
label_ids,
stats,
centroid,
) = cv2.connectedComponentsWithStats(binary_mask, connectivity=8)
components = []
for label_id in range(1, totalLabels):
area = stats[label_id, cv2.CC_STAT_AREA]
if area > area_threshold:
components.append(label_ids == label_id)
instances[label] = components
return instances
[docs]
def instance_masks_to_objects(
instance_masks: dict,
points_2d: np.ndarray,
point_labels: np.ndarray,
points_3d: np.ndarray,
) -> dict:
"""Get objects from instance masks.
Args:
instance_masks (dict): dict of instances with keys as instance labels and values as instance masks.
points_2d (np.ndarray): 2d points of pointcloud projected to image plane
point_labels (np.ndarray): labels of points
points_3d (np.ndarray): 3d points of pointcloud
Returns:
objects (dict): dict of objects with keys as object labels and values as object properties.
"""
objects = {}
for label in instance_masks:
for mask_id, mask in enumerate(instance_masks[label]):
x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
objects[(label, mask_id)] = {"points": [], "x": x, "y": y, "width": w, "height": h}
for img_point, label, point_3d in zip(points_2d.T, point_labels, points_3d): # points.T
if label not in instance_masks:
continue
for mask_id, mask in enumerate(instance_masks[label]):
if mask[img_point[1], img_point[0]]:
objects[(label, mask_id)]["points"].append(point_3d)
continue
# if label in instance_masks:
# logger.debug(f"Point {img_point} with label {label} not in any mask")
for obj in objects:
objects[obj]["points"] = np.array(objects[obj]["points"]).T
if len(objects[obj]["points"]) == 0:
continue
objects[obj]["centroid"] = np.mean(objects[obj]["points"], axis=1)
objects[obj]["num_points"] = objects[obj]["points"].shape[1]
return objects
[docs]
def generate_color_sequence(num_colors: int, palette: Optional[str] = "husl") -> list:
"""Generate color sequence.
Args:
num_colors (int): number of colors to generate
palette (str, optional): palette to use. Defaults to "husl".
Returns:
colors (list): list of colors in RGB format.
"""
# Using Seaborn's color_palette function to generate a sequence of high-contrast colors
colors = sns.color_palette(
palette,
num_colors,
)
# Convert to RGB
# colors = [(int(r * 255), int(g * 255), int(b * 255)) for r, g, b in colors]
return colors
[docs]
def get_points_labels_by_mask(points: np.ndarray, mask: np.ndarray) -> np.ndarray:
"""Get point labels from semantic mask.
Args:
points (np.ndarray): array of 2D coordinates of projected points with shape (n, 2).
Coordinates should match with cam_resolution.
mask (np.ndarray): semantic mask in opencv image format (ndarray)
Returns:
labels (np.ndarray): point labels taken from the mask.
"""
labels = []
for img_point in points.T: # points.T
labels.append(mask[img_point[1], img_point[0]])
return np.asarray(labels)
[docs]
def pack_objects(objects: dict, top_k: int, max_distance: float, special_classes: list) -> np.ndarray:
"""Pack objects into a single array.
Args:
objects (dict): dict of objects with keys as object labels and values as object properties.
top_k (int): maximum number of each class objects to pack
max_distance (float): maximum distance between objects
special_classes (list): list of special classes to pack
Returns:
packed_objects (np.mdarray): array of packed objects with shape (N, K, 3), where N - number of classes,
K - number of objects of each class, 3 - 3DoF coords.
"""
classes_num = len(special_classes)
packed_objects = [[] for _ in range(classes_num)]
for key, obj in objects.items():
if "centroid" not in obj:
continue
dist = np.linalg.norm(obj["centroid"])
if dist > max_distance:
continue
idx = special_classes.index(key[0])
packed_objects[idx].append(obj["centroid"])
for i in range(classes_num):
packed_objects[i] = np.array(sorted(packed_objects[i], key=lambda x: np.linalg.norm(x), reverse=True))
if packed_objects[i].shape[0] > top_k:
packed_objects[i] = packed_objects[i][:top_k]
elif packed_objects[i].shape[0] < top_k:
if packed_objects[i].shape[0] == 0:
packed_objects[i] = np.zeros((top_k, 3))
else:
packed_objects[i] = np.vstack(
(
packed_objects[i],
np.zeros((top_k - packed_objects[i].shape[0], 3)),
)
)
packed_objects = np.array(packed_objects)
return packed_objects
[docs]
def euclidean_to_cylindrical(points: np.ndarray, to_2d: bool = False) -> np.ndarray:
"""Convert euclidean coordinates to cylindrical.
Args:
points (np.ndarray): array of 3D coordinates with shape (n, 3).
to_2d (bool, optional): whether to return 2D cylindrical coordinates. Defaults to False.
Returns:
points (np.ndarray): array of cylindrical coordinates with shape (n, 3) or (n, 2) if to_2d is True.
"""
points = np.atleast_2d(points) # Ensure points are in a 2D array
x, y, z = points[:, 0], points[:, 1], points[:, 2]
r = np.sqrt(x**2 + y**2)
theta = np.arctan2(y, x)
if to_2d:
return np.column_stack((r, theta))
else:
return np.column_stack((r, theta, z))
[docs]
def cylindrical_to_euclidean(points: np.ndarray) -> np.ndarray:
"""Convert cylindrical coordinates to euclidean.
Args:
points (np.ndarray): array of cylindrical coordinates with shape (n, 3).
Returns:
points (np.ndarray): array of euclidean coordinates with shape (n, 3).
"""
points = np.atleast_2d(points) # Ensure points are in a 2D array
r, theta, z = points[:, 0], points[:, 1], points[:, 2]
x = r * np.cos(theta)
y = r * np.sin(theta)
return np.column_stack((x, y, z))
[docs]
def euclidean_to_spherical(points: np.ndarray) -> np.ndarray:
"""Convert euclidean coordinates to spherical.
Args:
points (np.ndarray): array of 3D coordinates with shape (n, 3).
Returns:
points (np.ndarray): array of spherical coordinates with shape (n, 3).
"""
points = np.atleast_2d(points) # Ensure points are in a 2D array
x, y, z = points[:, 0], points[:, 1], points[:, 2]
rho = np.sqrt(x**2 + y**2 + z**2)
theta = np.arccos(z / rho) # polar angle
phi = np.arctan2(y, x) # azimuthal angle
return np.column_stack((rho, theta, phi))
[docs]
def spherical_to_euclidean(points: np.ndarray) -> np.ndarray:
"""Convert spherical coordinates to euclidean.
Args:
points (np.ndarray): array of spherical coordinates with shape (n, 3).
Returns:
points (np.ndarray): array of euclidean coordinates with shape (n, 3).
"""
points = np.atleast_2d(points) # Ensure points are in a 2D array
rho, theta, phi = points[:, 0], points[:, 1], points[:, 2]
x = rho * np.sin(theta) * np.cos(phi)
y = rho * np.sin(theta) * np.sin(phi)
z = rho * np.cos(theta)
return np.column_stack((x, y, z))