Source code for flash.image.segmentation.output

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from typing import Any, Dict, Optional, Tuple, Union

import numpy as np
import torch
from torch import Tensor

import flash
from flash.core.data.io.input import DataKeys
from flash.core.data.io.output import Output
from flash.core.registry import FlashRegistry
from flash.core.utilities.imports import (
    _FIFTYONE_AVAILABLE,
    _MATPLOTLIB_AVAILABLE,
    _TORCHVISION_AVAILABLE,
    lazy_import,
    requires,
)
from flash.core.utilities.providers import _FIFTYONE

if _FIFTYONE_AVAILABLE:
    fol = lazy_import("fiftyone.core.labels")
    Segmentation = "fiftyone.core.labels.Segmentation"
else:
    fol = None
    Segmentation = None

if _MATPLOTLIB_AVAILABLE:
    import matplotlib.pyplot as plt
else:
    plt = None

if _TORCHVISION_AVAILABLE:
    from torchvision import transforms as T
else:
    T = None


SEMANTIC_SEGMENTATION_OUTPUTS = FlashRegistry("outputs")


[docs]@SEMANTIC_SEGMENTATION_OUTPUTS(name="labels")
class SegmentationLabelsOutput(Output):
    """A :class:`.Output` which converts the model outputs to the label of the argmax classification per pixel in the
    image for semantic segmentation tasks.

    Args:
        labels_map: A dictionary that map the labels ids to pixel intensities.
        visualize: Whether to visualize the image labels.
    """

    @requires("image")
    def __init__(self, labels_map: Optional[Dict[int, Tuple[int, int, int]]] = None, visualize: bool = False):
        super().__init__()
        self.labels_map = labels_map
        self.visualize = visualize

[docs]    @staticmethod
    def labels_to_image(img_labels: Tensor, labels_map: Dict[int, Tuple[int, int, int]]) -> Tensor:
        """Function that given an image with labels ids and their pixel intensity mapping, creates an RGB representation
        for visualisation purposes."""
        assert len(img_labels.shape) == 2, img_labels.shape
        H, W = img_labels.shape
        out = torch.empty(3, H, W, dtype=torch.uint8)
        for label_id, label_val in labels_map.items():
            mask = img_labels == label_id
            for i in range(3):
                out[i].masked_fill_(mask, label_val[i])
        return out

    @staticmethod
    def create_random_labels_map(num_classes: int) -> Dict[int, Tuple[int, int, int]]:
        labels_map: Dict[int, Tuple[int, int, int]] = {}
        for i in range(num_classes):
            labels_map[i] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        return labels_map

    @requires("matplotlib")
    def _visualize(self, labels):
        labels_vis = self.labels_to_image(labels, self.labels_map)
        labels_vis = T.ToPILImage(labels_vis)
        plt.imshow(labels_vis)
        plt.show()

    def transform(self, sample: Dict[str, Tensor]) -> Tensor:
        preds = sample[DataKeys.PREDS]
        assert len(preds.shape) == 3, preds.shape
        labels = torch.argmax(preds, dim=-3)  # HxW

        if self.visualize and not flash._IS_TESTING:
            self._visualize(labels)
        return labels.tolist()


[docs]@SEMANTIC_SEGMENTATION_OUTPUTS(name="fiftyone", providers=_FIFTYONE)
class FiftyOneSegmentationLabelsOutput(SegmentationLabelsOutput):
    """A :class:`.Output` which converts the model outputs to FiftyOne segmentation format.

    Args:
        labels_map: A dictionary that map the labels ids to pixel intensities.
        visualize: whether to visualize the image labels.
        return_filepath: Boolean determining whether to return a dict
            containing filepath and FiftyOne labels (True) or only a list of
            FiftyOne labels (False).
    """

    @requires("fiftyone")
    def __init__(
        self,
        labels_map: Optional[Dict[int, Tuple[int, int, int]]] = None,
        visualize: bool = False,
        return_filepath: bool = True,
    ):
        super().__init__(labels_map=labels_map, visualize=visualize)

        self.return_filepath = return_filepath

    def transform(self, sample: Dict[str, Tensor]) -> Union[Segmentation, Dict[str, Any]]:
        labels = super().transform(sample)
        fo_predictions = fol.Segmentation(mask=np.array(labels))
        if self.return_filepath:
            filepath = sample[DataKeys.METADATA]["filepath"]
            return {"filepath": filepath, "predictions": fo_predictions}
        return fo_predictions