Source code for opr.models.place_recognition.minkloc

"""Implementations of MinkLoc models."""
from typing import Tuple

from opr.modules import Concat, MinkGeM
from opr.modules.feature_extractors import MinkResNetFPNFeatureExtractor

from .base import CloudModel, LateFusionModel
from .resnet import ResNet18



[docs]
class MinkLoc3D(CloudModel):
    """MinkLoc3D: Point Cloud Based Large-Scale Place Recognition.

    Paper: https://arxiv.org/abs/2011.04530
    Code is adopted from the original repository: https://github.com/jac99/MinkLoc3Dv2, MIT License
    """

    def __init__(
        self,
        in_channels: int = 1,
        out_channels: int = 256,
        num_top_down: int = 1,
        conv0_kernel_size: int = 5,
        block: str = "BasicBlock",
        layers: Tuple[int, ...] = (1, 1, 1),
        planes: Tuple[int, ...] = (32, 64, 64),
        pooling: str = "gem",
    ) -> None:
        """MinkLoc3D: Point Cloud Based Large-Scale Place Recognition.

        Paper: https://arxiv.org/abs/2011.04530
        Code is adopted from the original repository: https://github.com/jac99/MinkLoc3Dv2, MIT License

        Args:
            in_channels (int): Number of input channels. Defaults to 1.
            out_channels (int): Number of output channels. Defaults to 256.
            num_top_down (int): Number of top-down blocks. Defaults to 1.
            conv0_kernel_size (int): Kernel size of the first convolution. Defaults to 5.
            block (str): Type of the network block. Defaults to "BasicBlock".
            layers (Tuple[int, ...]): Number of blocks in each layer. Defaults to (1, 1, 1).
            planes (Tuple[int, ...]): Number of channels in each layer. Defaults to (32, 64, 64).
            pooling (str): Type of pooling. Defaults to "gem".

        Raises:
            NotImplementedError: If given pooling method is unknown.
        """
        feature_extractor = MinkResNetFPNFeatureExtractor(
            in_channels, out_channels, num_top_down, conv0_kernel_size, block, layers, planes
        )
        if pooling == "gem":
            pooling = MinkGeM()
        else:
            raise NotImplementedError("Unknown pooling method: {}".format(pooling))

        super().__init__(
            backbone=feature_extractor,
            head=pooling,
        )




[docs]
class MinkLoc3Dv2(MinkLoc3D):
    """Improving Point Cloud Based Place Recognition with Ranking-based Loss and Large Batch Training.

    Paper: https://arxiv.org/abs/2203.00972
    Code is adopted from the original repository: https://github.com/jac99/MinkLoc3Dv2, MIT License
    """

    def __init__(
        self,
        in_channels: int = 1,
        out_channels: int = 256,
        num_top_down: int = 2,
        conv0_kernel_size: int = 5,
        block: str = "ECABasicBlock",
        layers: Tuple[int, ...] = (1, 1, 1, 1),
        planes: Tuple[int, ...] = (64, 128, 64, 32),
        pooling: str = "gem",
    ) -> None:
        """Improving Point Cloud Based Place Recognition with Ranking-based Loss and Large Batch Training.

        Paper: https://arxiv.org/abs/2203.00972
        Code is adopted from the original repository: https://github.com/jac99/MinkLoc3Dv2, MIT License

        Args:
            in_channels (int): Number of input channels. Defaults to 1.
            out_channels (int): Number of output channels. Defaults to 256.
            num_top_down (int): Number of top-down blocks. Defaults to 2.
            conv0_kernel_size (int): Kernel size of the first convolution. Defaults to 5.
            block (str): Type of the network block. Defaults to "ECABasicBlock".
            layers (Tuple[int, ...]): Number of blocks in each layer. Defaults to (1, 1, 1, 1).
            planes (Tuple[int, ...]): Number of channels in each layer. Defaults to (64, 128, 64, 32).
            pooling (str): Type of pooling. Defaults to "gem".
        """
        super().__init__(
            in_channels,
            out_channels,
            num_top_down,
            conv0_kernel_size,
            block,
            layers,
            planes,
            pooling,
        )




[docs]
class MinkLocMultimodal(LateFusionModel):
    """MinkLoc++: Lidar and Monocular Image Fusion for Place Recognition.

    Paper: https://arxiv.org/pdf/2104.05327.pdf
    Code is adopted from the original repository: https://github.com/jac99/MinkLocMultimodal, MIT License
    """

    def __init__(
        self,
        lidar_in_channels: int = 1,
        lidar_out_channels: int = 256,
        lidar_num_top_down: int = 2,
        lidar_conv0_kernel_size: int = 5,
        lidar_block: str = "ECABasicBlock",
        lidar_layers: Tuple[int, ...] = (1, 1, 1, 1),
        lidar_planes: Tuple[int, ...] = (64, 128, 64, 32),
        lidar_pooling: str = "gem",
        image_in_channels: int = 3,
        image_out_channels: int = 256,
        image_num_top_down: int = 0,
        image_pooling: str = "gem",
        image_pretrained: bool = True,
        fusion_type: str = "concat",
    ) -> None:
        """MinkLoc++: Lidar and Monocular Image Fusion for Place Recognition.

        Paper: https://arxiv.org/pdf/2104.05327.pdf
        Code is adopted from the original repository: https://github.com/jac99/MinkLocMultimodal, MIT License

        Args:
            lidar_in_channels (int): Number of input channels. Defaults to 1.
            lidar_out_channels (int): Number of output channels. Defaults to 256.
            lidar_num_top_down (int): Number of top-down blocks. Defaults to 2.
            lidar_conv0_kernel_size (int): Kernel size of the first convolution. Defaults to 5.
            lidar_block (str): Type of the network block. Defaults to "ECABasicBlock".
            lidar_layers (Tuple[int, ...]): Number of blocks in each layer. Defaults to (1, 1, 1, 1).
            lidar_planes (Tuple[int, ...]): Number of channels in each layer. Defaults to (64, 128, 64, 32).
            lidar_pooling (str): Type of pooling. Defaults to "gem".
            image_in_channels (int): Number of input channels. Defaults to 3.
            image_out_channels (int): Number of output channels. Defaults to 256.
            image_num_top_down (int): Number of top-down layers. Defaults to 0.
            image_pooling (str): Pooling method to use. Currently only "gem" is supported. Defaults to "gem".
            image_pretrained (bool): Whether to use pretrained weights. Defaults to True.

        Raises:
            NotImplementedError: If given pooling method is unknown.
        """

        cloud_module = MinkLoc3Dv2(
            in_channels=lidar_in_channels,
            out_channels=lidar_out_channels,
            num_top_down=lidar_num_top_down,
            conv0_kernel_size=lidar_conv0_kernel_size,
            block=lidar_block,
            layers=lidar_layers,
            planes=lidar_planes,
            pooling=lidar_pooling,
        )
        image_module = ResNet18(
            in_channels=image_in_channels,
            out_channels=image_out_channels,
            num_top_down=image_num_top_down,
            pooling=image_pooling,
            pretrained=image_pretrained,
        )
        if fusion_type == "concat":
            fusion_module = Concat()
        else:
            raise NotImplementedError("Unknown fusion type in MinkLocMultimodal: {}".format(fusion_type))
        super().__init__(
            image_module=image_module,
            cloud_module=cloud_module,
            fusion_module=fusion_module,
        )