Shortcuts

Source code for flash.image.detection.data

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
from typing import Any, Callable, Collection, Dict, List, Optional, Sequence, Type, Union

import numpy as np
import torch

from flash.core.data.data_module import DataModule
from flash.core.data.io.input import Input
from flash.core.data.utilities.classification import TargetFormatter
from flash.core.data.utilities.sort import sorted_alphanumeric
from flash.core.integrations.icevision.data import IceVisionInput
from flash.core.integrations.icevision.transforms import IceVisionInputTransform
from flash.core.utilities.imports import (
    _FIFTYONE_AVAILABLE,
    _ICEVISION_AVAILABLE,
    _TOPIC_IMAGE_AVAILABLE,
    Image,
    requires,
)
from flash.core.utilities.stages import RunningStage
from flash.core.utilities.types import INPUT_TRANSFORM_TYPE
from flash.image.detection.input import (
    ObjectDetectionFiftyOneInput,
    ObjectDetectionFilesInput,
    ObjectDetectionImageInput,
    ObjectDetectionNumpyInput,
    ObjectDetectionTensorInput,
)

SampleCollection = "fiftyone.core.collections.SampleCollection" if _FIFTYONE_AVAILABLE else None

if _ICEVISION_AVAILABLE:
    from icevision.core import ClassMap
    from icevision.parsers import COCOBBoxParser, Parser, VIABBoxParser, VOCBBoxParser
else:
    COCOBBoxParser = object
    VIABBoxParser = object
    VOCBBoxParser = object
    Parser = object

__doctest_skip__ = []
# Skip doctests if requirements aren't available
if not _TOPIC_IMAGE_AVAILABLE:
    __doctest_skip__ += ["ObjectDetectionData", "ObjectDetectionData.*"]


if not _FIFTYONE_AVAILABLE:
    __doctest_skip__ += ["ObjectDetectionData.from_fiftyone"]


[docs]class ObjectDetectionData(DataModule): """The ``ObjectDetectionData`` class is a :class:`~flash.core.data.data_module.DataModule` with a set of classmethods for loading data for object detection.""" input_transform_cls = IceVisionInputTransform
[docs] @classmethod def from_files( cls, train_files: Optional[Sequence[str]] = None, train_targets: Optional[Sequence[Sequence[Any]]] = None, train_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, val_files: Optional[Sequence[str]] = None, val_targets: Optional[Sequence[Sequence[Any]]] = None, val_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, test_files: Optional[Sequence[str]] = None, test_targets: Optional[Sequence[Sequence[Any]]] = None, test_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, predict_files: Optional[Sequence[str]] = None, target_formatter: Optional[TargetFormatter] = None, input_cls: Type[Input] = ObjectDetectionFilesInput, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data list of image files, bounding boxes, and targets. The supported file extensions are: ``.jpg``, ``.jpeg``, ``.png``, ``.ppm``, ``.bmp``, ``.pgm``, ``.tif``, ``.tiff``, ``.webp``, and ``.npy``. The targets can be in any of our :ref:`supported classification target formats <formatting_classification_targets>`. The bounding boxes are expected to be dictionaries with integer values (representing pixels) and the following keys: ``xmin``, ``ymin``, ``width``, ``height``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_files: The list of image files to use when training. train_targets: The list of lists of targets to use when training. train_bboxes: The list of lists of bounding boxes to use when training. val_files: The list of image files to use when validating. val_targets: The list of lists of targets to use when validating. val_bboxes: The list of lists of bounding boxes to use when validating. test_files: The list of image files to use when testing. test_targets: The list of lists of targets to use when testing. test_bboxes: The list of lists of bounding boxes to use when testing. predict_files: The list of image files to use when predicting. target_formatter: Optionally provide a :class:`~flash.core.data.utilities.classification.TargetFormatter` to control how targets are handled. See :ref:`formatting_classification_targets` for more details. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. testsetup:: >>> import numpy as np >>> from PIL import Image >>> rand_image = Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")) >>> _ = [rand_image.save(f"image_{i}.png") for i in range(1, 4)] >>> _ = [rand_image.save(f"predict_image_{i}.png") for i in range(1, 4)] .. doctest:: >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_files( ... train_files=["image_1.png", "image_2.png", "image_3.png"], ... train_targets=[["cat"], ["dog"], ["cat"]], ... train_bboxes=[ ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 10}], ... [{"xmin": 20, "ymin": 30, "width": 10, "height": 10}], ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 25}], ... ], ... predict_files=["predict_image_1.png", "predict_image_2.png", "predict_image_3.png"], ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(labels=datamodule.labels) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... .. testcleanup:: >>> import os >>> _ = [os.remove(f"image_{i}.png") for i in range(1, 4)] >>> _ = [os.remove(f"predict_image_{i}.png") for i in range(1, 4)] """ ds_kw = { "target_formatter": target_formatter, } train_input = input_cls( RunningStage.TRAINING, train_files, train_targets, train_bboxes, **ds_kw, ) ds_kw["target_formatter"] = getattr(train_input, "target_formatter", None) return cls( train_input, input_cls( RunningStage.VALIDATING, val_files, val_targets, val_bboxes, **ds_kw, ), input_cls( RunningStage.TESTING, test_files, test_targets, test_bboxes, **ds_kw, ), input_cls(RunningStage.PREDICTING, predict_files, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_numpy( cls, train_data: Optional[Collection[np.ndarray]] = None, train_targets: Optional[Sequence[Sequence[Any]]] = None, train_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, val_data: Optional[Collection[np.ndarray]] = None, val_targets: Optional[Sequence[Sequence[Any]]] = None, val_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, test_data: Optional[Collection[np.ndarray]] = None, test_targets: Optional[Sequence[Sequence[Any]]] = None, test_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, predict_data: Optional[Collection[np.ndarray]] = None, target_formatter: Optional[TargetFormatter] = None, input_cls: Type[Input] = ObjectDetectionNumpyInput, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given from numpy arrays (or lists of arrays) and corresponding lists of bounding boxes and targets. The targets can be in any of our :ref:`supported classification target formats <formatting_classification_targets>`. The bounding boxes are expected to be dictionaries with integer values (representing pixels) and the following keys: ``xmin``, ``ymin``, ``width``, ``height``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_data: The numpy array or list of arrays to use when training. train_targets: The list of lists of targets to use when training. train_bboxes: The list of lists of bounding boxes to use when training. val_data: The numpy array or list of arrays to use when validating. val_targets: The list of lists of targets to use when validating. val_bboxes: The list of lists of bounding boxes to use when validating. test_data: The numpy array or list of arrays to use when testing. test_targets: The list of lists of targets to use when testing. test_bboxes: The list of lists of bounding boxes to use when testing. predict_data: The numpy array or list of arrays to use when predicting. target_formatter: Optionally provide a :class:`~flash.core.data.utilities.classification.TargetFormatter` to control how targets are handled. See :ref:`formatting_classification_targets` for more details. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. doctest:: >>> import numpy as np >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_numpy( ... train_data=[np.random.rand(3, 64, 64), np.random.rand(3, 64, 64), np.random.rand(3, 64, 64)], ... train_targets=[["cat"], ["dog"], ["cat"]], ... train_bboxes=[ ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 10}], ... [{"xmin": 20, "ymin": 30, "width": 10, "height": 10}], ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 25}], ... ], ... predict_data=[np.random.rand(3, 64, 64)], ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(labels=datamodule.labels) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... """ ds_kw = { "target_formatter": target_formatter, } train_input = input_cls( RunningStage.TRAINING, train_data, train_targets, train_bboxes, **ds_kw, ) ds_kw["target_formatter"] = getattr(train_input, "target_formatter", None) return cls( train_input, input_cls( RunningStage.VALIDATING, val_data, val_targets, val_bboxes, **ds_kw, ), input_cls( RunningStage.TESTING, test_data, test_targets, test_bboxes, **ds_kw, ), input_cls(RunningStage.PREDICTING, predict_data, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_images( cls, train_images: Optional[List[Image.Image]] = None, train_targets: Optional[Sequence[Sequence[Any]]] = None, train_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, val_images: Optional[List[Image.Image]] = None, val_targets: Optional[Sequence[Sequence[Any]]] = None, val_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, test_images: Optional[List[Image.Image]] = None, test_targets: Optional[Sequence[Sequence[Any]]] = None, test_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, predict_images: Optional[List[Image.Image]] = None, target_formatter: Optional[TargetFormatter] = None, input_cls: Type[Input] = ObjectDetectionImageInput, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given lists of PIL images and corresponding lists of bounding boxes and targets. The targets can be in any of our :ref:`supported classification target formats <formatting_classification_targets>`. The bounding boxes are expected to be dictionaries with integer values (representing pixels) and the following keys: ``xmin``, ``ymin``, ``width``, ``height``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_images: The list of PIL images to use when training. train_targets: The list of lists of targets to use when training. train_bboxes: The list of lists of bounding boxes to use when training. val_images: The list of PIL images to use when validating. val_targets: The list of lists of targets to use when validating. val_bboxes: The list of lists of bounding boxes to use when validating. test_images: The list of PIL images to use when testing. test_targets: The list of lists of targets to use when testing. test_bboxes: The list of lists of bounding boxes to use when testing. predict_images: The list of PIL images to use when predicting. target_formatter: Optionally provide a :class:`~flash.core.data.utilities.classification.TargetFormatter` to control how targets are handled. See :ref:`formatting_classification_targets` for more details. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. doctest:: >>> from PIL import Image >>> import numpy as np >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_images( ... train_images=[ ... Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")), ... Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")), ... Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")), ... ], ... train_targets=[["cat"], ["dog"], ["cat"]], ... train_bboxes=[ ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 10}], ... [{"xmin": 20, "ymin": 30, "width": 10, "height": 10}], ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 25}], ... ], ... predict_images=[Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8"))], ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(labels=datamodule.labels) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... """ ds_kw = { "target_formatter": target_formatter, } train_input = input_cls( RunningStage.TRAINING, train_images, train_targets, train_bboxes, **ds_kw, ) ds_kw["target_formatter"] = getattr(train_input, "target_formatter", None) return cls( train_input, input_cls( RunningStage.VALIDATING, val_images, val_targets, val_bboxes, **ds_kw, ), input_cls( RunningStage.TESTING, test_images, test_targets, test_bboxes, **ds_kw, ), input_cls(RunningStage.PREDICTING, predict_images, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_tensors( cls, train_data: Optional[Collection[torch.Tensor]] = None, train_targets: Optional[Sequence[Sequence[Any]]] = None, train_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, val_data: Optional[Collection[torch.Tensor]] = None, val_targets: Optional[Sequence[Sequence[Any]]] = None, val_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, test_data: Optional[Collection[torch.Tensor]] = None, test_targets: Optional[Sequence[Sequence[Any]]] = None, test_bboxes: Optional[Sequence[Sequence[Dict[str, int]]]] = None, predict_data: Optional[Collection[torch.Tensor]] = None, target_formatter: Optional[TargetFormatter] = None, input_cls: Type[Input] = ObjectDetectionTensorInput, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given from torch tensors (or lists of tensors) and corresponding lists of bounding boxes and targets. The targets can be in any of our :ref:`supported classification target formats <formatting_classification_targets>`. The bounding boxes are expected to be dictionaries with integer values (representing pixels) and the following keys: ``xmin``, ``ymin``, ``width``, ``height``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_data: The torch tensor or list of tensors to use when training. train_targets: The list of lists of targets to use when training. train_bboxes: The list of lists of bounding boxes to use when training. val_data: The torch tensor or list of tensors to use when validating. val_targets: The list of lists of targets to use when validating. val_bboxes: The list of lists of bounding boxes to use when validating. test_data: The torch tensor or list of tensors to use when testing. test_targets: The list of lists of targets to use when testing. test_bboxes: The list of lists of bounding boxes to use when testing. predict_data: The torch tensor or list of tensors to use when predicting. target_formatter: Optionally provide a :class:`~flash.core.data.utilities.classification.TargetFormatter` to control how targets are handled. See :ref:`formatting_classification_targets` for more details. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. doctest:: >>> import torch >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_tensors( ... train_data=[torch.rand(3, 64, 64), torch.rand(3, 64, 64), torch.rand(3, 64, 64)], ... train_targets=[["cat"], ["dog"], ["cat"]], ... train_bboxes=[ ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 10}], ... [{"xmin": 20, "ymin": 30, "width": 10, "height": 10}], ... [{"xmin": 10, "ymin": 20, "width": 5, "height": 25}], ... ], ... predict_data=[torch.rand(3, 64, 64)], ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(labels=datamodule.labels) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... """ ds_kw = { "target_formatter": target_formatter, } train_input = input_cls( RunningStage.TRAINING, train_data, train_targets, train_bboxes, **ds_kw, ) ds_kw["target_formatter"] = getattr(train_input, "target_formatter", None) return cls( train_input, input_cls( RunningStage.VALIDATING, val_data, val_targets, val_bboxes, **ds_kw, ), input_cls( RunningStage.TESTING, test_data, test_targets, test_bboxes, **ds_kw, ), input_cls(RunningStage.PREDICTING, predict_data, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
@classmethod def from_icedata( cls, train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, train_parser_kwargs: Optional[Dict[str, Any]] = None, val_folder: Optional[str] = None, val_ann_file: Optional[str] = None, val_parser_kwargs: Optional[Dict[str, Any]] = None, test_folder: Optional[str] = None, test_ann_file: Optional[str] = None, test_parser_kwargs: Optional[Dict[str, Any]] = None, predict_folder: Optional[str] = None, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, parser: Optional[Union[Callable, Type[Parser]]] = None, input_cls: Type[Input] = IceVisionInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs, ) -> "ObjectDetectionData": ds_kw = {"parser": parser} return cls( input_cls( RunningStage.TRAINING, train_folder, train_ann_file, parser_kwargs=train_parser_kwargs, **ds_kw, ), input_cls( RunningStage.VALIDATING, val_folder, val_ann_file, parser_kwargs=val_parser_kwargs, **ds_kw, ), input_cls( RunningStage.TESTING, test_folder, test_ann_file, parser_kwargs=test_parser_kwargs, **ds_kw, ), input_cls(RunningStage.PREDICTING, predict_folder, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_coco( cls, train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, val_folder: Optional[str] = None, val_ann_file: Optional[str] = None, test_folder: Optional[str] = None, test_ann_file: Optional[str] = None, predict_folder: Optional[str] = None, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, input_cls: Type[Input] = IceVisionInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders and annotation files in the `COCO JSON format <https://cocodataset.org/#format-data>`_. For help understanding and using the COCO format, take a look at this tutorial: `Create COCO annotations from scratch <COCO>`__. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_folder: The folder containing images to use when training. train_ann_file: The COCO format annotation file to use when training. val_folder: The folder containing images to use when validating. val_ann_file: The COCO format annotation file to use when validating. test_folder: The folder containing images to use when testing. test_ann_file: The COCO format annotation file to use when testing. predict_folder: The folder containing images to use when predicting. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. testsetup:: >>> import os >>> import json >>> import numpy as np >>> from PIL import Image >>> rand_image = Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")) >>> os.makedirs("train_folder", exist_ok=True) >>> os.makedirs("predict_folder", exist_ok=True) >>> _ = [rand_image.save(os.path.join("train_folder", f"image_{i}.png")) for i in range(1, 4)] >>> _ = [rand_image.save(os.path.join("predict_folder", f"predict_image_{i}.png")) for i in range(1, 4)] >>> annotations = {"annotations": [ ... {"area": 50, "bbox": [10, 20, 5, 10], "category_id": 1, "id": 1, "image_id": 1, "iscrowd": 0}, ... {"area": 100, "bbox": [20, 30, 10, 10], "category_id": 2, "id": 2, "image_id": 2, "iscrowd": 0}, ... {"area": 125, "bbox": [10, 20, 5, 25], "category_id": 1, "id": 3, "image_id": 3, "iscrowd": 0}, ... ], "categories": [ ... {"id": 1, "name": "cat", "supercategory": "cat"}, ... {"id": 2, "name": "dog", "supercategory": "dog"}, ... ], "images": [ ... {"file_name": "image_1.png", "height": 64, "width": 64, "id": 1}, ... {"file_name": "image_2.png", "height": 64, "width": 64, "id": 2}, ... {"file_name": "image_3.png", "height": 64, "width": 64, "id": 3}, ... ]} >>> with open("train_annotations.json", "w") as annotation_file: ... json.dump(annotations, annotation_file) The folder ``train_folder`` has the following contents: .. code-block:: train_folder ├── image_1.png ├── image_2.png ├── image_3.png ... The file ``train_annotations.json`` contains the following: .. code-block:: { "annotations": [ {"area": 50, "bbox": [10, 20, 5, 10], "category_id": 1, "id": 1, "image_id": 1, "iscrowd": 0}, {"area": 100, "bbox": [20, 30, 10, 10], "category_id": 2, "id": 2, "image_id": 2, "iscrowd": 0}, {"area": 125, "bbox": [10, 20, 5, 25], "category_id": 1, "id": 3, "image_id": 3, "iscrowd": 0} ], "categories": [ {"id": 1, "name": "cat", "supercategory": "cat"}, {"id": 2, "name": "dog", "supercategory": "dog"} ], "images": [ {"file_name": "image_1.png", "height": 64, "width": 64, "id": 1}, {"file_name": "image_2.png", "height": 64, "width": 64, "id": 2}, {"file_name": "image_3.png", "height": 64, "width": 64, "id": 3} ] } .. doctest:: >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_coco( ... train_folder="train_folder", ... train_ann_file="train_annotations.json", ... predict_folder="predict_folder", ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(num_classes=datamodule.num_classes) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... .. testcleanup:: >>> import shutil >>> shutil.rmtree("train_folder") >>> shutil.rmtree("predict_folder") >>> os.remove("train_annotations.json") .. _COCO: https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch """ return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, val_ann_file=val_ann_file, test_folder=test_folder, test_ann_file=test_ann_file, predict_folder=predict_folder, parser=COCOBBoxParser, input_cls=input_cls, transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_voc( cls, labels: List[str], train_folder: Optional[str] = None, train_ann_folder: Optional[str] = None, val_folder: Optional[str] = None, val_ann_folder: Optional[str] = None, test_folder: Optional[str] = None, test_ann_folder: Optional[str] = None, predict_folder: Optional[str] = None, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, input_cls: Type[Input] = IceVisionInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders and annotation files in the `PASCAL VOC (Visual Object Challenge) XML format <PASCAL>`_. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: labels: A list of class labels. Note that the list should not include a label for the background class which will be added automatically as class zero (additional labels will be sorted). train_folder: The folder containing images to use when training. train_ann_folder: The folder containing VOC format annotation files to use when training. val_folder: The folder containing images to use when validating. val_ann_folder: The folder containing VOC format annotation files to use when validating. test_folder: The folder containing images to use when testing. test_ann_folder: The folder containing VOC format annotation files to use when testing. predict_folder: The folder containing images to use when predicting. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. testsetup:: >>> import os >>> import numpy as np >>> from PIL import Image >>> rand_image = Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")) >>> os.makedirs("train_folder", exist_ok=True) >>> os.makedirs("train_annotations", exist_ok=True) >>> os.makedirs("predict_folder", exist_ok=True) >>> _ = [rand_image.save(os.path.join("train_folder", f"image_{i}.png")) for i in range(1, 4)] >>> _ = [rand_image.save(os.path.join("predict_folder", f"predict_image_{i}.png")) for i in range(1, 4)] >>> bboxes = [[10, 20, 15, 30], [20, 30, 30, 40], [10, 20, 15, 45]] >>> labels = ["cat", "dog", "cat"] >>> for i, (bbox, label) in enumerate(zip(bboxes, labels)): ... xmin, ymin, xmax, ymax = bbox ... annotation = ( ... f"<annotation><filename>image_{i + 1}.png</filename>" ... f"<path>image_{i + 1}.png</path><source><database>example</database></source>" ... "<size><width>64</width><height>64</height><depth>3</depth></size>" ... f"<object><name>{label}</name><pose>Unspecified</pose><truncated>0</truncated>" ... f"<difficult>0</difficult><occluded>0</occluded><bndbox><xmin>{xmin}</xmin><xmax>{xmax}</xmax>" ... f"<ymin>{ymin}</ymin><ymax>{ymax}</ymax></bndbox></object></annotation>" ... ) ... with open(os.path.join("train_annotations", f"image_{i+1}.xml"), "w") as file: ... _ = file.write(annotation) The folder ``train_folder`` has the following contents: .. code-block:: train_folder ├── image_1.png ├── image_2.png ├── image_3.png ... The folder ``train_annotations`` has the following contents: .. code-block:: train_annotations ├── image_1.xml ├── image_2.xml ├── image_3.xml ... The file ``image_1.xml`` contains the following: .. code-block:: <annotation> <filename>image_0.png</filename> <path>image_0.png</path> <source><database>example</database></source> <size><width>64</width><height>64</height><depth>3</depth></size> <object> <name>cat</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <occluded>0</occluded> <bndbox><xmin>10</xmin><xmax>15</xmax><ymin>20</ymin><ymax>30</ymax></bndbox> </object> </annotation> .. doctest:: >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_voc( ... ["cat", "dog"], ... train_folder="train_folder", ... train_ann_folder="train_annotations", ... predict_folder="predict_folder", ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(num_classes=datamodule.num_classes) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... .. testcleanup:: >>> import shutil >>> shutil.rmtree("train_folder") >>> shutil.rmtree("predict_folder") >>> shutil.rmtree("train_annotations") .. _PASCAL: http://host.robots.ox.ac.uk/pascal/VOC/ """ return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_folder, val_folder=val_folder, val_ann_file=val_ann_folder, test_folder=test_folder, test_ann_file=test_ann_folder, predict_folder=predict_folder, parser=partial(VOCBBoxParser, class_map=ClassMap(list(sorted_alphanumeric(labels)))), input_cls=input_cls, transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_via( cls, labels: List[str], label_field: str = "label", train_folder: Optional[str] = None, train_ann_file: Optional[str] = None, val_folder: Optional[str] = None, val_ann_file: Optional[str] = None, test_folder: Optional[str] = None, test_ann_file: Optional[str] = None, predict_folder: Optional[str] = None, transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, input_cls: Type[Input] = IceVisionInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders and annotation files in the VIA (`VGG Image Annotator <https://www.robots.ox.ac.uk/~vgg/software/via/>`_) `JSON format <https://gitlab.com/vgg/via/-/blob/via-3.x.y/via-3.x.y/CodeDoc.md#structure-of-via-project- json-file>`_. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: labels: A list of class labels. Not that the list should not include a label for the background class which will be added automatically as class zero (additional labels will be sorted). label_field: The field within ``region_attributes`` which corresponds to the region label. train_folder: The folder containing images to use when training. train_ann_file: The VIA format annotation file to use when training. val_folder: The folder containing images to use when validating. val_ann_file: The VIA format annotation file to use when validating. test_folder: The folder containing images to use when testing. test_ann_file: The VIA format annotation file to use when testing. predict_folder: The folder containing images to use when predicting. input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. testsetup:: >>> import os >>> import json >>> import numpy as np >>> from PIL import Image >>> rand_image = Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")) >>> os.makedirs("train_folder", exist_ok=True) >>> os.makedirs("predict_folder", exist_ok=True) >>> _ = [rand_image.save(os.path.join("train_folder", f"image_{i}.png")) for i in range(1, 4)] >>> _ = [rand_image.save(os.path.join("predict_folder", f"predict_image_{i}.png")) for i in range(1, 4)] >>> annotations = { ... f"image_{i+1}.png": { ... "filename": f"image_{i+1}.png", ... "regions": [{ ... "shape_attributes": {"name": "rect", "x": 10, "y": 20, "width": 5, "height": 10}, ... "region_attributes": {"label": lb}, ... }] ... } for i, lb in enumerate(["cat", "dog", "cat"]) ... } >>> with open("train_annotations.json", "w") as annotation_file: ... json.dump(annotations, annotation_file) The folder ``train_folder`` has the following contents: .. code-block:: train_folder ├── image_1.png ├── image_2.png ├── image_3.png ... The file ``train_annotations.json`` contains the following: .. code-block:: { "image_1.png": { "filename": "image_1.png", "regions": [{ "shape_attributes": {"name": "rect", "x": 10, "y": 20, "width": 5, "height": 10}, "region_attributes": {"label": "cat"} }] }, "image_2.png": { "filename": "image_2.png", "regions": [{ "shape_attributes": {"name": "rect", "x": 20, "y": 30, "width": 10, "height": 10}, "region_attributes": {"label": "dog"}} ]}, "image_3.png": { "filename": "image_3.png", "regions": [{ "shape_attributes": {"name": "rect", "x": 10, "y": 20, "width": 5, "height": 25}, "region_attributes": {"label": "cat"} }] } } .. doctest:: >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> datamodule = ObjectDetectionData.from_via( ... ["cat", "dog"], ... train_folder="train_folder", ... train_ann_file="train_annotations.json", ... predict_folder="predict_folder", ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(num_classes=datamodule.num_classes) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... .. testcleanup:: >>> import shutil >>> shutil.rmtree("train_folder") >>> shutil.rmtree("predict_folder") >>> os.remove("train_annotations.json") """ return cls.from_icedata( train_folder=train_folder, train_ann_file=train_ann_file, val_folder=val_folder, val_ann_file=val_ann_file, test_folder=test_folder, test_ann_file=test_ann_file, predict_folder=predict_folder, parser=partial( VIABBoxParser, class_map=ClassMap(list(sorted_alphanumeric(labels))), label_field=label_field, ), input_cls=input_cls, transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod @requires("fiftyone") def from_fiftyone( cls, train_dataset: Optional[SampleCollection] = None, val_dataset: Optional[SampleCollection] = None, test_dataset: Optional[SampleCollection] = None, predict_dataset: Optional[SampleCollection] = None, label_field: str = "ground_truth", iscrowd: str = "iscrowd", transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, input_cls: Type[Input] = ObjectDetectionFiftyOneInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "ObjectDetectionData": """Load the :class:`~flash.image.detection.data.ObjectDetectionData` from FiftyOne ``SampleCollection`` objects. Targets will be extracted from the ``label_field`` in the ``SampleCollection`` objects. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide <customizing_transforms>`. Args: train_dataset: The ``SampleCollection`` to use when training. val_dataset: The ``SampleCollection`` to use when validating. test_dataset: The ``SampleCollection`` to use when testing. predict_dataset: The ``SampleCollection`` to use when predicting. label_field: The field in the ``SampleCollection`` objects containing the targets. iscrowd: The field in the ``SampleCollection`` objects containing the ``iscrowd`` annotation (if required). input_cls: The :class:`~flash.core.data.io.input.Input` type to use for loading the data. transform: The :class:`~flash.core.data.io.input_transform.InputTransform` type to use. transform_kwargs: Dict of keyword arguments to be provided when instantiating the transforms. data_module_kwargs: Additional keyword arguments to provide to the :class:`~flash.core.data.data_module.DataModule` constructor. Returns: The constructed :class:`~flash.image.detection.data.ObjectDetectionData`. Examples ________ .. testsetup:: >>> import numpy as np >>> from PIL import Image >>> rand_image = Image.fromarray(np.random.randint(0, 255, (64, 64, 3), dtype="uint8")) >>> _ = [rand_image.save(f"image_{i}.png") for i in range(1, 4)] >>> _ = [rand_image.save(f"predict_image_{i}.png") for i in range(1, 4)] .. doctest:: >>> import numpy as np >>> import fiftyone as fo >>> from flash import Trainer >>> from flash.image import ObjectDetector, ObjectDetectionData >>> train_dataset = fo.Dataset.from_images( ... ["image_1.png", "image_2.png", "image_3.png"] ... ) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE <BLANKLINE> ... >>> samples = [train_dataset[filepath] for filepath in train_dataset.values("filepath")] >>> for sample, label, bounding_box in zip( ... samples, ... ["cat", "dog", "cat"], ... [[0.1, 0.2, 0.15, 0.3], [0.2, 0.3, 0.3, 0.4], [0.1, 0.2, 0.15, 0.45]], ... ): ... sample["ground_truth"] = fo.Detections( ... detections=[fo.Detection(label=label, bounding_box=bounding_box)], ... ) ... sample.save() ... >>> predict_dataset = fo.Dataset.from_images( ... ["predict_image_1.png", "predict_image_2.png", "predict_image_3.png"] ... ) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE <BLANKLINE> ... >>> datamodule = ObjectDetectionData.from_fiftyone( ... train_dataset=train_dataset, ... predict_dataset=predict_dataset, ... transform_kwargs=dict(image_size=(128, 128)), ... batch_size=2, ... ) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE <BLANKLINE> ... >>> datamodule.num_classes 3 >>> datamodule.labels ['background', 'cat', 'dog'] >>> model = ObjectDetector(num_classes=datamodule.num_classes) >>> trainer = Trainer(fast_dev_run=True) >>> trainer.fit(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Training... >>> trainer.predict(model, datamodule=datamodule) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE Predicting... .. testcleanup:: >>> import os >>> _ = [os.remove(f"image_{i}.png") for i in range(1, 4)] >>> _ = [os.remove(f"predict_image_{i}.png") for i in range(1, 4)] """ ds_kw = {} return cls( input_cls(RunningStage.TRAINING, train_dataset, label_field, iscrowd, **ds_kw), input_cls(RunningStage.VALIDATING, val_dataset, label_field, iscrowd, **ds_kw), input_cls(RunningStage.TESTING, test_dataset, label_field, iscrowd, **ds_kw), input_cls(RunningStage.PREDICTING, predict_dataset, **ds_kw), transform=transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )
[docs] @classmethod def from_folders( cls, predict_folder: Optional[str] = None, predict_transform: INPUT_TRANSFORM_TYPE = IceVisionInputTransform, input_cls: Type[Input] = IceVisionInput, transform_kwargs: Optional[Dict] = None, **data_module_kwargs: Any, ) -> "DataModule": """Creates a :class:`~flash.image.detection.data.ObjectDetectionData` object from the given data folders This is currently support only for the predicting stage. Args: predict_folder: The folder containing the predict data. predict_transform: The dictionary of transforms to use during predicting which maps data_module_kwargs: The keywords arguments for creating the datamodule. Returns: The constructed data module. """ return cls( predict_input=input_cls(RunningStage.PREDICTING, predict_folder), transform=predict_transform, transform_kwargs=transform_kwargs, **data_module_kwargs, )

© Copyright 2020-2021, PyTorch Lightning. Revision a374dd4f.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: latest
Versions
latest
stable
0.8.2
0.8.1.post0
0.8.1
0.8.0
0.7.5
0.7.4
0.7.3
0.7.2
0.7.1
0.7.0
0.6.0
0.5.2
0.5.1
0.5.0
0.4.0
0.3.2
0.3.1
0.3.0
0.2.3
0.2.2
0.2.1
0.2.0
0.1.0post1
Downloads
html
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.