Skip to content

Image

The image module provides a flexible and powerful way to work with images using a composition-based design. This allows you to dynamically build image objects with the exact functionality you need.

Core Components

The image module is built around the following core components:

  • Image: The main class that represents an image. It is composed of other components to provide its functionality.
  • Reader: Responsible for loading image data from various sources.
  • Drawer: Provides methods for drawing shapes and text on the image.
  • Transformer: Allows you to apply various transformations to the image, such as resizing, cropping, and color adjustments.

Available Modules

Below is a list of available modules and their functionalities:

Base Image module for basic image processing. It only contains very low-level, basic and generic image methods.

BaseImage

Base Image class

Source code in otary/image/base.py
class BaseImage:
    """Base Image class"""

    # pylint: disable=too-many-public-methods

    def __init__(self, image: NDArray) -> None:
        self.__asarray: NDArray = image.copy()

    @property
    def asarray(self) -> NDArray:
        """Array representation of the image"""
        return self.__asarray

    @asarray.setter
    def asarray(self, value: NDArray) -> None:
        """Setter for the asarray property

        Args:
            value (np.ndarray): value of the asarray to be changed
        """
        self.__asarray = value

    @property
    def asarray_binary(self) -> NDArray:
        """Returns the representation of the image as a array with value not in
        [0, 255] but in [0, 1].

        Returns:
            NDArray: an array with value in [0, 1]
        """
        return (self.asarray / 255).astype(np.float32)

    @property
    def is_gray(self) -> bool:
        """Whether the image is a grayscale image or not

        Returns:
            bool: True if image is in grayscale, 0 otherwise
        """
        return bool(len(self.asarray.shape) == 2)

    @property
    def channels(self) -> int:
        """Number of channels in the image

        Returns:
            int: number of channels
        """
        if self.is_gray:
            return 1
        return self.asarray.shape[2]

    @property
    def shape_array(self) -> tuple:
        """Returns the array shape value (height, width, channel)

        Returns:
            tuple[int]: image shape
        """
        return self.asarray.shape

    @property
    def shape_xy(self) -> tuple:
        """Returns the array shape value (width, height, channel).
        Use this if you consider the image as pixels in a X-Y 2D coordinate system.

        Returns:
            tuple[int]: image shape
        """
        return (self.width, self.height, self.channels)

    @property
    def height(self) -> int:
        """Height of the image.

        Returns:
            int: image height
        """
        return self.asarray.shape[0]

    @property
    def width(self) -> int:
        """Width of the image.

        Returns:
            int: image width
        """
        return self.asarray.shape[1]

    @property
    def area(self) -> int:
        """Area of the image

        Returns:
            int: image area
        """
        return self.width * self.height

    @property
    def center(self) -> NDArray[np.int16]:
        """Center point of the image.

        Please note that it is returned as type int because the center is
        represented as a X-Y coords of a pixel.

        Returns:
            np.ndarray: center point of the image
        """
        return (np.array([self.width, self.height]) / 2).astype(np.int16)

    @property
    def norm_side_length(self) -> int:
        """Returns the normalized side length of the image.
        This is the side length if the image had the same area but
        the shape of a square (four sides of the same length).

        Returns:
            int: normalized side length
        """
        return int(np.sqrt(self.area))

    @property
    def corners(self) -> NDArray:
        """Returns the corners in clockwise order:

        0. top left corner
        1. top right corner
        2. bottom right corner
        3. bottom left corner

        Returns:
            NDArray: array containing the corners
        """
        return np.array(
            [self.top_left, self.top_right, self.bottom_right, self.bottom_left]
        )

    @property
    def bottom_right(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([self.width - 1, self.height - 1], dtype=int)

    @property
    def bottom_left(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([0, self.height - 1], dtype=int)

    @property
    def top_right(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([self.width - 1, 0], dtype=int)

    @property
    def top_left(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([0, 0], dtype=int)

    def as_pil(self) -> ImagePIL.Image:
        """Return the image as PIL Image

        Returns:
            ImagePIL: PIL Image
        """
        return ImagePIL.fromarray(self.asarray)

    def as_bytes(self, fmt: str = "PNG") -> bytes:
        """Return the image as bytes

        Args:
            fmt (str, optional): format of the image. Defaults to "PNG".

        Returns:
            bytes: image in bytes
        """
        pil_image = self.as_pil()
        with io.BytesIO() as output:
            pil_image.save(output, format=fmt)
            return output.getvalue()

    def as_api_file_input(
        self, fmt: str = "png", filename: str = "image"
    ) -> dict[str, tuple[str, bytes, str]]:
        """Return the image as a file input for API requests.

        Args:
            fmt (str, optional): format of the image. Defaults to "png".
            filename (str, optional): name of the file. Defaults to "image".

        Returns:
            dict[str, tuple[str, bytes, str]]: dictionary with file input
                for API requests, where the key is "file" and the value is a tuple
                containing the filename, image bytes, and content type.
        """
        fmt_lower = fmt.lower()
        files = {
            "file": (
                f"{filename}.{fmt_lower}",
                self.as_bytes(fmt=fmt),
                f"image/{fmt_lower}",
            )
        }
        return files

    def as_grayscale(self) -> Self:
        """Generate the image in grayscale of shape (height, width)

        Returns:
            Self: original image in grayscale
        """
        if self.is_gray:
            return self
        self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_BGR2GRAY)
        return self

    def as_colorscale(self) -> Self:
        """Generate the image in colorscale (height, width, 3).
        This property can be useful when we wish to draw objects in a given color
        on a grayscale image.

        Returns:
            Self: original image in color
        """
        if not self.is_gray:
            return self
        self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_GRAY2BGR)
        return self

    def as_filled(self, fill_value: int | np.ndarray = 255) -> Self:
        """Returns an entirely white image of the same size as the original.
        Can be useful to get an empty representation of the same image to paint
        and draw things on an image of the same dimension.

        Args:
            fill_value (int | np.ndarray, optional): color to fill the new empty image.
                Defaults to 255 which means that is returns a entirely white image.

        Returns:
            Self: new image with a single color of the same size as original.
        """
        self.asarray = np.full(
            shape=self.shape_array, fill_value=fill_value, dtype=np.uint8
        )
        return self

    def as_white(self) -> Self:
        """Returns an entirely white image with the same dimension as the original.

        Returns:
            Self: new white image
        """
        self.as_filled(fill_value=255)
        return self

    def as_black(self) -> Self:
        """Returns an entirely black image with the same dimension as the original.

        Returns:
            Self: new black image
        """
        self.as_filled(fill_value=0)
        return self

    def rev(self) -> Self:
        """Reverse the image colors. Each pixel color value V becomes |V - 255|.

        Applied on a grayscale image the black pixel becomes white and the
        white pixels become black.
        """
        self.asarray = np.abs(self.asarray.astype(np.int16) - 255).astype(np.uint8)
        return self

    def is_equal_shape(self, other: BaseImage, consider_channel: bool = True) -> bool:
        """Check whether two images have the same shape

        Args:
            other (BaseImage): BaseImage object

        Returns:
            bool: True if the objects have the same shape, False otherwise
        """
        if consider_channel:
            shape0 = self.shape_array
            shape1 = other.shape_array
        else:
            shape0 = (
                self.shape_array
                if len(self.shape_array) == 2
                else self.shape_array[:-1]
            )
            shape1 = (
                self.shape_array
                if len(self.shape_array) == 2
                else self.shape_array[:-1]
            )
        return shape0 == shape1

    def dist_pct(self, pct: float) -> float:
        """Distance percentage that can be used an acceptable distance error margin.
        It is calculated based on the normalized side length.

        Args:
            pct (float, optional): percentage of distance error. Defaults to 0.01,
                which means 1% of the normalized side length as the
                default margin distance error.

        Returns:
            float: margin distance error
        """
        return self.norm_side_length * pct

area property

Area of the image

Returns:

Name Type Description
int int

image area

asarray property writable

Array representation of the image

asarray_binary property

Returns the representation of the image as a array with value not in [0, 255] but in [0, 1].

Returns:

Name Type Description
NDArray NDArray

an array with value in [0, 1]

bottom_left property

Get the bottom right point coordinate of the image

Returns:

Name Type Description
NDArray NDArray

2D point

bottom_right property

Get the bottom right point coordinate of the image

Returns:

Name Type Description
NDArray NDArray

2D point

center property

Center point of the image.

Please note that it is returned as type int because the center is represented as a X-Y coords of a pixel.

Returns:

Type Description
NDArray[int16]

np.ndarray: center point of the image

channels property

Number of channels in the image

Returns:

Name Type Description
int int

number of channels

corners property

Returns the corners in clockwise order:

  1. top left corner
  2. top right corner
  3. bottom right corner
  4. bottom left corner

Returns:

Name Type Description
NDArray NDArray

array containing the corners

height property

Height of the image.

Returns:

Name Type Description
int int

image height

is_gray property

Whether the image is a grayscale image or not

Returns:

Name Type Description
bool bool

True if image is in grayscale, 0 otherwise

norm_side_length property

Returns the normalized side length of the image. This is the side length if the image had the same area but the shape of a square (four sides of the same length).

Returns:

Name Type Description
int int

normalized side length

shape_array property

Returns the array shape value (height, width, channel)

Returns:

Type Description
tuple

tuple[int]: image shape

shape_xy property

Returns the array shape value (width, height, channel). Use this if you consider the image as pixels in a X-Y 2D coordinate system.

Returns:

Type Description
tuple

tuple[int]: image shape

top_left property

Get the bottom right point coordinate of the image

Returns:

Name Type Description
NDArray NDArray

2D point

top_right property

Get the bottom right point coordinate of the image

Returns:

Name Type Description
NDArray NDArray

2D point

width property

Width of the image.

Returns:

Name Type Description
int int

image width

as_api_file_input(fmt='png', filename='image')

Return the image as a file input for API requests.

Parameters:

Name Type Description Default
fmt str

format of the image. Defaults to "png".

'png'
filename str

name of the file. Defaults to "image".

'image'

Returns:

Type Description
dict[str, tuple[str, bytes, str]]

dict[str, tuple[str, bytes, str]]: dictionary with file input for API requests, where the key is "file" and the value is a tuple containing the filename, image bytes, and content type.

Source code in otary/image/base.py
def as_api_file_input(
    self, fmt: str = "png", filename: str = "image"
) -> dict[str, tuple[str, bytes, str]]:
    """Return the image as a file input for API requests.

    Args:
        fmt (str, optional): format of the image. Defaults to "png".
        filename (str, optional): name of the file. Defaults to "image".

    Returns:
        dict[str, tuple[str, bytes, str]]: dictionary with file input
            for API requests, where the key is "file" and the value is a tuple
            containing the filename, image bytes, and content type.
    """
    fmt_lower = fmt.lower()
    files = {
        "file": (
            f"{filename}.{fmt_lower}",
            self.as_bytes(fmt=fmt),
            f"image/{fmt_lower}",
        )
    }
    return files

as_black()

Returns an entirely black image with the same dimension as the original.

Returns:

Name Type Description
Self Self

new black image

Source code in otary/image/base.py
def as_black(self) -> Self:
    """Returns an entirely black image with the same dimension as the original.

    Returns:
        Self: new black image
    """
    self.as_filled(fill_value=0)
    return self

as_bytes(fmt='PNG')

Return the image as bytes

Parameters:

Name Type Description Default
fmt str

format of the image. Defaults to "PNG".

'PNG'

Returns:

Name Type Description
bytes bytes

image in bytes

Source code in otary/image/base.py
def as_bytes(self, fmt: str = "PNG") -> bytes:
    """Return the image as bytes

    Args:
        fmt (str, optional): format of the image. Defaults to "PNG".

    Returns:
        bytes: image in bytes
    """
    pil_image = self.as_pil()
    with io.BytesIO() as output:
        pil_image.save(output, format=fmt)
        return output.getvalue()

as_colorscale()

Generate the image in colorscale (height, width, 3). This property can be useful when we wish to draw objects in a given color on a grayscale image.

Returns:

Name Type Description
Self Self

original image in color

Source code in otary/image/base.py
def as_colorscale(self) -> Self:
    """Generate the image in colorscale (height, width, 3).
    This property can be useful when we wish to draw objects in a given color
    on a grayscale image.

    Returns:
        Self: original image in color
    """
    if not self.is_gray:
        return self
    self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_GRAY2BGR)
    return self

as_filled(fill_value=255)

Returns an entirely white image of the same size as the original. Can be useful to get an empty representation of the same image to paint and draw things on an image of the same dimension.

Parameters:

Name Type Description Default
fill_value int | ndarray

color to fill the new empty image. Defaults to 255 which means that is returns a entirely white image.

255

Returns:

Name Type Description
Self Self

new image with a single color of the same size as original.

Source code in otary/image/base.py
def as_filled(self, fill_value: int | np.ndarray = 255) -> Self:
    """Returns an entirely white image of the same size as the original.
    Can be useful to get an empty representation of the same image to paint
    and draw things on an image of the same dimension.

    Args:
        fill_value (int | np.ndarray, optional): color to fill the new empty image.
            Defaults to 255 which means that is returns a entirely white image.

    Returns:
        Self: new image with a single color of the same size as original.
    """
    self.asarray = np.full(
        shape=self.shape_array, fill_value=fill_value, dtype=np.uint8
    )
    return self

as_grayscale()

Generate the image in grayscale of shape (height, width)

Returns:

Name Type Description
Self Self

original image in grayscale

Source code in otary/image/base.py
def as_grayscale(self) -> Self:
    """Generate the image in grayscale of shape (height, width)

    Returns:
        Self: original image in grayscale
    """
    if self.is_gray:
        return self
    self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_BGR2GRAY)
    return self

as_pil()

Return the image as PIL Image

Returns:

Name Type Description
ImagePIL Image

PIL Image

Source code in otary/image/base.py
def as_pil(self) -> ImagePIL.Image:
    """Return the image as PIL Image

    Returns:
        ImagePIL: PIL Image
    """
    return ImagePIL.fromarray(self.asarray)

as_white()

Returns an entirely white image with the same dimension as the original.

Returns:

Name Type Description
Self Self

new white image

Source code in otary/image/base.py
def as_white(self) -> Self:
    """Returns an entirely white image with the same dimension as the original.

    Returns:
        Self: new white image
    """
    self.as_filled(fill_value=255)
    return self

dist_pct(pct)

Distance percentage that can be used an acceptable distance error margin. It is calculated based on the normalized side length.

Parameters:

Name Type Description Default
pct float

percentage of distance error. Defaults to 0.01, which means 1% of the normalized side length as the default margin distance error.

required

Returns:

Name Type Description
float float

margin distance error

Source code in otary/image/base.py
def dist_pct(self, pct: float) -> float:
    """Distance percentage that can be used an acceptable distance error margin.
    It is calculated based on the normalized side length.

    Args:
        pct (float, optional): percentage of distance error. Defaults to 0.01,
            which means 1% of the normalized side length as the
            default margin distance error.

    Returns:
        float: margin distance error
    """
    return self.norm_side_length * pct

is_equal_shape(other, consider_channel=True)

Check whether two images have the same shape

Parameters:

Name Type Description Default
other BaseImage

BaseImage object

required

Returns:

Name Type Description
bool bool

True if the objects have the same shape, False otherwise

Source code in otary/image/base.py
def is_equal_shape(self, other: BaseImage, consider_channel: bool = True) -> bool:
    """Check whether two images have the same shape

    Args:
        other (BaseImage): BaseImage object

    Returns:
        bool: True if the objects have the same shape, False otherwise
    """
    if consider_channel:
        shape0 = self.shape_array
        shape1 = other.shape_array
    else:
        shape0 = (
            self.shape_array
            if len(self.shape_array) == 2
            else self.shape_array[:-1]
        )
        shape1 = (
            self.shape_array
            if len(self.shape_array) == 2
            else self.shape_array[:-1]
        )
    return shape0 == shape1

rev()

Reverse the image colors. Each pixel color value V becomes |V - 255|.

Applied on a grayscale image the black pixel becomes white and the white pixels become black.

Source code in otary/image/base.py
def rev(self) -> Self:
    """Reverse the image colors. Each pixel color value V becomes |V - 255|.

    Applied on a grayscale image the black pixel becomes white and the
    white pixels become black.
    """
    self.asarray = np.abs(self.asarray.astype(np.int16) - 255).astype(np.uint8)
    return self

Image Drawer module. It only contains methods to draw objects in images.

DrawerImage

Image Drawer class to draw objects on a given image

Source code in otary/image/components/drawer/drawer.py
class DrawerImage:
    """Image Drawer class to draw objects on a given image"""

    def __init__(self, base: BaseImage):
        self.base = base

    def _pre_draw(self, n_objects: int, render: Render) -> NDArray:
        render.adjust_colors_length(n=n_objects)
        return self.base.as_colorscale().asarray

    def draw_circles(
        self,
        circles: Sequence[geo.Circle],
        render: CirclesRender = CirclesRender(),
    ) -> None:
        """Draw circles in the image

        Args:
            circles (Sequence[Circle]): list of Circle geometry objects.
            render (CirclesRender): circle renderer
        """
        im_array = self._pre_draw(n_objects=len(circles), render=render)
        for circle, color in zip(circles, render.colors_processed):
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=circle.center.astype(int),
                radius=int(circle.radius),
                color=color,
                thickness=render.thickness if not render.is_filled else -1,
                lineType=render.line_type,
            )
            if render.is_draw_center_point_enabled:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=circle.center.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_ellipses(
        self,
        ellipses: Sequence[geo.Ellipse],
        render: EllipsesRender = EllipsesRender(),
    ) -> None:
        """Draw ellipses in the image

        Args:
            ellipses (Sequence[Ellipse]): list of Ellipse geometry objects.
            render (EllipseRender): renderer (uses EllipseRender for color/thickness)
        """
        im_array = self._pre_draw(n_objects=len(ellipses), render=render)
        for ellipse, color in zip(ellipses, render.colors_processed):
            axes = (int(ellipse.semi_major_axis), int(ellipse.semi_minor_axis))
            cv2.ellipse(  # type: ignore[call-overload]
                img=im_array,
                center=ellipse.centroid.astype(int),
                axes=axes,
                angle=ellipse.angle(degree=True),
                startAngle=0,
                endAngle=360,
                color=color,
                thickness=render.thickness if not render.is_filled else -1,
                lineType=render.line_type,
            )
            if render.is_draw_center_point_enabled:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=ellipse.centroid.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
            if render.is_draw_focis_enabled:
                for foci in [ellipse.foci1, ellipse.foci2]:
                    cv2.circle(  # type: ignore[call-overload]
                        img=im_array,
                        center=foci.astype(int),
                        radius=1,
                        color=color,
                        thickness=render.thickness,
                        lineType=render.line_type,
                    )
        self.base.asarray = im_array

    def draw_points(
        self,
        points: NDArray | Sequence[geo.Point],
        render: PointsRender = PointsRender(),
    ) -> None:
        """Draw points in the image

        Args:
            points (NDArray): list of points. It must be of shape (n, 2). This
                means n points of shape 2 (x and y coordinates).
            render (PointsRender): point renderer
        """
        _points = prep_obj_draw(objects=points, _type=geo.Point)
        im_array = self._pre_draw(n_objects=len(_points), render=render)
        for point, color in zip(_points, render.colors_processed):
            cv2.circle(
                img=im_array,
                center=point,
                radius=render.radius,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
        self.base.asarray = im_array

    def draw_segments(
        self,
        segments: NDArray | Sequence[geo.Segment],
        render: SegmentsRender = SegmentsRender(),
    ) -> None:
        """Draw segments in the image. It can be arrowed segments (vectors) too.

        Args:
            segments (NDArray): list of segments. Can be a numpy array of shape
                (n, 2, 2) which means n array of shape (2, 2) that define a segment
                by two 2D points.
            render (SegmentsRender): segment renderer
        """
        _segments = prep_obj_draw(objects=segments, _type=geo.Segment)
        im_array = self._pre_draw(n_objects=len(segments), render=render)
        if render.as_vectors:
            for segment, color in zip(_segments, render.colors_processed):
                cv2.arrowedLine(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    line_type=render.line_type,
                    tipLength=render.tip_length / geo.Segment(segment).length,
                )
        else:
            for segment, color in zip(_segments, render.colors_processed):
                cv2.line(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_splines(
        self,
        splines: Sequence[geo.LinearSpline],
        render: LinearSplinesRender = LinearSplinesRender(),
    ) -> None:
        """Draw linear splines in the image.

        Args:
            splines (Sequence[geo.LinearSpline]): linear splines to draw.
            render (LinearSplinesRender, optional): linear splines render.
                Defaults to LinearSplinesRender().
        """
        _splines = prep_obj_draw(objects=splines, _type=geo.LinearSpline)
        im_array = self._pre_draw(n_objects=len(_splines), render=render)
        for spline, color in zip(_splines, render.colors_processed):

            if render.as_vectors:
                cv2.polylines(
                    img=im_array,
                    pts=[spline[:-1]],
                    isClosed=False,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )

                # Draw the last edge as a vector
                ix = int(len(spline) * (1 - render.pct_ix_head))
                ix = ix - 1 if ix == len(spline) - 1 else ix
                segment = [spline[ix], spline[-1]]
                cv2.arrowedLine(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    tipLength=render.tip_length / geo.Segment(segment).length,
                )

            else:
                cv2.polylines(
                    img=im_array,
                    pts=[spline],
                    isClosed=False,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )

    def draw_polygons(
        self, polygons: Sequence[geo.Polygon], render: PolygonsRender = PolygonsRender()
    ) -> None:
        """Draw polygons in the image

        Args:
            polygons (Sequence[Polygon]): list of Polygon objects
            render (PolygonsRender): PolygonRender object
        """
        _polygons = prep_obj_draw(objects=polygons, _type=geo.Polygon)
        im_array = self._pre_draw(n_objects=len(_polygons), render=render)
        for polygon, color in zip(_polygons, render.colors_processed):
            if render.is_filled:
                cv2.fillPoly(
                    img=im_array,
                    pts=[polygon],
                    color=color,
                    lineType=render.line_type,
                )
            else:
                cv2.polylines(
                    img=im_array,
                    pts=[polygon],
                    isClosed=True,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_ocr_outputs(
        self,
        ocr_outputs: Sequence[OcrSingleOutput],
        render: OcrSingleOutputRender = OcrSingleOutputRender(),
    ) -> None:
        """Return the image with the bounding boxes displayed from a list of OCR
        single output. It allows you to show bounding boxes that can have an angle,
        not necessarily vertical or horizontal.

        Args:
            ocr_outputs (Sequence[OcrSingleOutput]): list of OcrSingleOutput objects
            render (OcrSingleOutputRender): OcrSingleOutputRender object
        """
        im_array = self._pre_draw(n_objects=len(ocr_outputs), render=render)
        for ocrso, color in zip(ocr_outputs, render.colors_processed):
            if not isinstance(ocrso, OcrSingleOutput) or ocrso.bbox is None:
                # warnings.warn(
                #     f"Object {ocrso} is not an OcrSingleOutput or has no bbox. "
                #     "Skipping it."
                # )
                continue
            cnt = [ocrso.bbox.asarray.reshape((-1, 1, 2)).astype(np.int32)]
            im_array = cv2.drawContours(
                image=im_array,
                contours=cnt,
                contourIdx=-1,
                thickness=render.thickness,
                color=color,
                lineType=render.line_type,
            )
        self.base.asarray = im_array

draw_circles(circles, render=CirclesRender())

Draw circles in the image

Parameters:

Name Type Description Default
circles Sequence[Circle]

list of Circle geometry objects.

required
render CirclesRender

circle renderer

CirclesRender()
Source code in otary/image/components/drawer/drawer.py
def draw_circles(
    self,
    circles: Sequence[geo.Circle],
    render: CirclesRender = CirclesRender(),
) -> None:
    """Draw circles in the image

    Args:
        circles (Sequence[Circle]): list of Circle geometry objects.
        render (CirclesRender): circle renderer
    """
    im_array = self._pre_draw(n_objects=len(circles), render=render)
    for circle, color in zip(circles, render.colors_processed):
        cv2.circle(  # type: ignore[call-overload]
            img=im_array,
            center=circle.center.astype(int),
            radius=int(circle.radius),
            color=color,
            thickness=render.thickness if not render.is_filled else -1,
            lineType=render.line_type,
        )
        if render.is_draw_center_point_enabled:
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=circle.center.astype(int),
                radius=1,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

draw_ellipses(ellipses, render=EllipsesRender())

Draw ellipses in the image

Parameters:

Name Type Description Default
ellipses Sequence[Ellipse]

list of Ellipse geometry objects.

required
render EllipseRender

renderer (uses EllipseRender for color/thickness)

EllipsesRender()
Source code in otary/image/components/drawer/drawer.py
def draw_ellipses(
    self,
    ellipses: Sequence[geo.Ellipse],
    render: EllipsesRender = EllipsesRender(),
) -> None:
    """Draw ellipses in the image

    Args:
        ellipses (Sequence[Ellipse]): list of Ellipse geometry objects.
        render (EllipseRender): renderer (uses EllipseRender for color/thickness)
    """
    im_array = self._pre_draw(n_objects=len(ellipses), render=render)
    for ellipse, color in zip(ellipses, render.colors_processed):
        axes = (int(ellipse.semi_major_axis), int(ellipse.semi_minor_axis))
        cv2.ellipse(  # type: ignore[call-overload]
            img=im_array,
            center=ellipse.centroid.astype(int),
            axes=axes,
            angle=ellipse.angle(degree=True),
            startAngle=0,
            endAngle=360,
            color=color,
            thickness=render.thickness if not render.is_filled else -1,
            lineType=render.line_type,
        )
        if render.is_draw_center_point_enabled:
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=ellipse.centroid.astype(int),
                radius=1,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
        if render.is_draw_focis_enabled:
            for foci in [ellipse.foci1, ellipse.foci2]:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=foci.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
    self.base.asarray = im_array

draw_ocr_outputs(ocr_outputs, render=OcrSingleOutputRender())

Return the image with the bounding boxes displayed from a list of OCR single output. It allows you to show bounding boxes that can have an angle, not necessarily vertical or horizontal.

Parameters:

Name Type Description Default
ocr_outputs Sequence[OcrSingleOutput]

list of OcrSingleOutput objects

required
render OcrSingleOutputRender

OcrSingleOutputRender object

OcrSingleOutputRender()
Source code in otary/image/components/drawer/drawer.py
def draw_ocr_outputs(
    self,
    ocr_outputs: Sequence[OcrSingleOutput],
    render: OcrSingleOutputRender = OcrSingleOutputRender(),
) -> None:
    """Return the image with the bounding boxes displayed from a list of OCR
    single output. It allows you to show bounding boxes that can have an angle,
    not necessarily vertical or horizontal.

    Args:
        ocr_outputs (Sequence[OcrSingleOutput]): list of OcrSingleOutput objects
        render (OcrSingleOutputRender): OcrSingleOutputRender object
    """
    im_array = self._pre_draw(n_objects=len(ocr_outputs), render=render)
    for ocrso, color in zip(ocr_outputs, render.colors_processed):
        if not isinstance(ocrso, OcrSingleOutput) or ocrso.bbox is None:
            # warnings.warn(
            #     f"Object {ocrso} is not an OcrSingleOutput or has no bbox. "
            #     "Skipping it."
            # )
            continue
        cnt = [ocrso.bbox.asarray.reshape((-1, 1, 2)).astype(np.int32)]
        im_array = cv2.drawContours(
            image=im_array,
            contours=cnt,
            contourIdx=-1,
            thickness=render.thickness,
            color=color,
            lineType=render.line_type,
        )
    self.base.asarray = im_array

draw_points(points, render=PointsRender())

Draw points in the image

Parameters:

Name Type Description Default
points NDArray

list of points. It must be of shape (n, 2). This means n points of shape 2 (x and y coordinates).

required
render PointsRender

point renderer

PointsRender()
Source code in otary/image/components/drawer/drawer.py
def draw_points(
    self,
    points: NDArray | Sequence[geo.Point],
    render: PointsRender = PointsRender(),
) -> None:
    """Draw points in the image

    Args:
        points (NDArray): list of points. It must be of shape (n, 2). This
            means n points of shape 2 (x and y coordinates).
        render (PointsRender): point renderer
    """
    _points = prep_obj_draw(objects=points, _type=geo.Point)
    im_array = self._pre_draw(n_objects=len(_points), render=render)
    for point, color in zip(_points, render.colors_processed):
        cv2.circle(
            img=im_array,
            center=point,
            radius=render.radius,
            color=color,
            thickness=render.thickness,
            lineType=render.line_type,
        )
    self.base.asarray = im_array

draw_polygons(polygons, render=PolygonsRender())

Draw polygons in the image

Parameters:

Name Type Description Default
polygons Sequence[Polygon]

list of Polygon objects

required
render PolygonsRender

PolygonRender object

PolygonsRender()
Source code in otary/image/components/drawer/drawer.py
def draw_polygons(
    self, polygons: Sequence[geo.Polygon], render: PolygonsRender = PolygonsRender()
) -> None:
    """Draw polygons in the image

    Args:
        polygons (Sequence[Polygon]): list of Polygon objects
        render (PolygonsRender): PolygonRender object
    """
    _polygons = prep_obj_draw(objects=polygons, _type=geo.Polygon)
    im_array = self._pre_draw(n_objects=len(_polygons), render=render)
    for polygon, color in zip(_polygons, render.colors_processed):
        if render.is_filled:
            cv2.fillPoly(
                img=im_array,
                pts=[polygon],
                color=color,
                lineType=render.line_type,
            )
        else:
            cv2.polylines(
                img=im_array,
                pts=[polygon],
                isClosed=True,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

draw_segments(segments, render=SegmentsRender())

Draw segments in the image. It can be arrowed segments (vectors) too.

Parameters:

Name Type Description Default
segments NDArray

list of segments. Can be a numpy array of shape (n, 2, 2) which means n array of shape (2, 2) that define a segment by two 2D points.

required
render SegmentsRender

segment renderer

SegmentsRender()
Source code in otary/image/components/drawer/drawer.py
def draw_segments(
    self,
    segments: NDArray | Sequence[geo.Segment],
    render: SegmentsRender = SegmentsRender(),
) -> None:
    """Draw segments in the image. It can be arrowed segments (vectors) too.

    Args:
        segments (NDArray): list of segments. Can be a numpy array of shape
            (n, 2, 2) which means n array of shape (2, 2) that define a segment
            by two 2D points.
        render (SegmentsRender): segment renderer
    """
    _segments = prep_obj_draw(objects=segments, _type=geo.Segment)
    im_array = self._pre_draw(n_objects=len(segments), render=render)
    if render.as_vectors:
        for segment, color in zip(_segments, render.colors_processed):
            cv2.arrowedLine(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                line_type=render.line_type,
                tipLength=render.tip_length / geo.Segment(segment).length,
            )
    else:
        for segment, color in zip(_segments, render.colors_processed):
            cv2.line(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

draw_splines(splines, render=LinearSplinesRender())

Draw linear splines in the image.

Parameters:

Name Type Description Default
splines Sequence[LinearSpline]

linear splines to draw.

required
render LinearSplinesRender

linear splines render. Defaults to LinearSplinesRender().

LinearSplinesRender()
Source code in otary/image/components/drawer/drawer.py
def draw_splines(
    self,
    splines: Sequence[geo.LinearSpline],
    render: LinearSplinesRender = LinearSplinesRender(),
) -> None:
    """Draw linear splines in the image.

    Args:
        splines (Sequence[geo.LinearSpline]): linear splines to draw.
        render (LinearSplinesRender, optional): linear splines render.
            Defaults to LinearSplinesRender().
    """
    _splines = prep_obj_draw(objects=splines, _type=geo.LinearSpline)
    im_array = self._pre_draw(n_objects=len(_splines), render=render)
    for spline, color in zip(_splines, render.colors_processed):

        if render.as_vectors:
            cv2.polylines(
                img=im_array,
                pts=[spline[:-1]],
                isClosed=False,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )

            # Draw the last edge as a vector
            ix = int(len(spline) * (1 - render.pct_ix_head))
            ix = ix - 1 if ix == len(spline) - 1 else ix
            segment = [spline[ix], spline[-1]]
            cv2.arrowedLine(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                tipLength=render.tip_length / geo.Segment(segment).length,
            )

        else:
            cv2.polylines(
                img=im_array,
                pts=[spline],
                isClosed=False,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )

Image Reader module

ReaderImage

ReaderImage class to facilitate the reading of images from different formats such as JPG, PNG, and PDF. It provides methods to load images from file paths.

Source code in otary/image/components/io/reader.py
class ReaderImage:
    """ReaderImage class to facilitate the reading of images from different formats
    such as JPG, PNG, and PDF. It provides methods to load images from file paths.
    """

    @staticmethod
    def from_fillvalue(value: int = 255, shape: tuple = (128, 128, 3)) -> NDArray:
        """Create an array image from a single value

        Args:
            value (int, optional): value in [0, 255]. Defaults to 255.
            shape (tuple, optional): image shape. If it has three elements then
                the last one must be a 3 for a coloscale image.
                Defaults to (128, 128, 3).

        Returns:
            NDArray: array with a single value
        """
        if value < 0 or value > 255:
            raise ValueError(f"The value {value} must be in [0, 255]")
        if len(shape) < 2 or len(shape) >= 4:
            raise ValueError(f"The shape {shape} must be of length 2 or 3")
        if len(shape) == 3 and shape[-1] != 3:
            raise ValueError(f"The last value of {shape} must be 3")
        return np.full(shape=shape, fill_value=value, dtype=np.uint8)

    @staticmethod
    def from_jpg(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image object from a JPG or JPEG file path

        Args:
            filepath (str): path to the JPG image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: numpy array
        """
        arr = np.asarray(cv2.imread(filepath, 1 - int(as_grayscale)))
        original_height, original_width = arr.shape[:2]

        if resolution is not None:
            # Calculate the aspect ratio
            aspect_ratio = original_width / original_height
            new_width = int(resolution * aspect_ratio)
            arr = cv2.resize(src=arr, dsize=(new_width, resolution))

        return arr

    @staticmethod
    def from_png(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image array from a PNG file image path

        Args:
            filepath (str): path to the image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: Image as array
        """
        return ReaderImage.from_jpg(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )

    @staticmethod
    def from_pdf(
        filepath: str,
        as_grayscale: bool = False,
        page_nb: int = 0,
        resolution: Optional[int] = None,
        clip_pct: Optional[pymupdf.Rect] = None,
    ) -> NDArray:
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        """Create an Image array from a pdf file.

        Args:
            filepath (str): path to the pdf file.
            as_grayscale (bool, optional): whether to turn the image in grayscale.
                Defaults to False.
            page_nb (int, optional): as we load only one image we have to select the
                page that will be turned into an image. Defaults to 0.
            resolution (Optional[int], optional): resolution of the loaded image.
                Defaults to 3508.
            clip_pct (pymmupdf.Rect, optional): optional zone to extract in the image.
                This is particularly useful to load into memory only a small part of the
                image without loading everything into memory. This reduces considerably
                the image loading time especially combined with a high resolution.

        Returns:
            NDArray: Image as array
        """
        arr = read_pdf_to_images(
            filepath_or_stream=filepath,
            resolution=resolution,
            page_nb=page_nb,
            clip_pct=clip_pct,
        )[0]

        if as_grayscale:
            arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

        return arr

    @staticmethod
    def from_file(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image array from a file image path

        Args:
            filepath (str): path to the image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: Image as array
        """
        valid_format = ["png", "jpg", "jpeg", "pdf"]

        file_format = filepath.split(".")[-1]

        if file_format in ["png"]:
            return ReaderImage.from_png(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )
        if file_format in ["jpg", "jpeg"]:
            return ReaderImage.from_jpg(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )
        if file_format in ["pdf"]:
            return ReaderImage.from_pdf(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )

        raise ValueError(f"The filepath is not in any valid format {valid_format}")

from_file(filepath, as_grayscale=False, resolution=None) staticmethod

Create a Image array from a file image path

Parameters:

Name Type Description Default
filepath str

path to the image file

required
as_grayscale bool

turn the image in grayscale. Defaults to False.

False

Returns:

Name Type Description
NDArray NDArray

Image as array

Source code in otary/image/components/io/reader.py
@staticmethod
def from_file(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image array from a file image path

    Args:
        filepath (str): path to the image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: Image as array
    """
    valid_format = ["png", "jpg", "jpeg", "pdf"]

    file_format = filepath.split(".")[-1]

    if file_format in ["png"]:
        return ReaderImage.from_png(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )
    if file_format in ["jpg", "jpeg"]:
        return ReaderImage.from_jpg(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )
    if file_format in ["pdf"]:
        return ReaderImage.from_pdf(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )

    raise ValueError(f"The filepath is not in any valid format {valid_format}")

from_fillvalue(value=255, shape=(128, 128, 3)) staticmethod

Create an array image from a single value

Parameters:

Name Type Description Default
value int

value in [0, 255]. Defaults to 255.

255
shape tuple

image shape. If it has three elements then the last one must be a 3 for a coloscale image. Defaults to (128, 128, 3).

(128, 128, 3)

Returns:

Name Type Description
NDArray NDArray

array with a single value

Source code in otary/image/components/io/reader.py
@staticmethod
def from_fillvalue(value: int = 255, shape: tuple = (128, 128, 3)) -> NDArray:
    """Create an array image from a single value

    Args:
        value (int, optional): value in [0, 255]. Defaults to 255.
        shape (tuple, optional): image shape. If it has three elements then
            the last one must be a 3 for a coloscale image.
            Defaults to (128, 128, 3).

    Returns:
        NDArray: array with a single value
    """
    if value < 0 or value > 255:
        raise ValueError(f"The value {value} must be in [0, 255]")
    if len(shape) < 2 or len(shape) >= 4:
        raise ValueError(f"The shape {shape} must be of length 2 or 3")
    if len(shape) == 3 and shape[-1] != 3:
        raise ValueError(f"The last value of {shape} must be 3")
    return np.full(shape=shape, fill_value=value, dtype=np.uint8)

from_jpg(filepath, as_grayscale=False, resolution=None) staticmethod

Create a Image object from a JPG or JPEG file path

Parameters:

Name Type Description Default
filepath str

path to the JPG image file

required
as_grayscale bool

turn the image in grayscale. Defaults to False.

False

Returns:

Name Type Description
NDArray NDArray

numpy array

Source code in otary/image/components/io/reader.py
@staticmethod
def from_jpg(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image object from a JPG or JPEG file path

    Args:
        filepath (str): path to the JPG image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: numpy array
    """
    arr = np.asarray(cv2.imread(filepath, 1 - int(as_grayscale)))
    original_height, original_width = arr.shape[:2]

    if resolution is not None:
        # Calculate the aspect ratio
        aspect_ratio = original_width / original_height
        new_width = int(resolution * aspect_ratio)
        arr = cv2.resize(src=arr, dsize=(new_width, resolution))

    return arr

from_pdf(filepath, as_grayscale=False, page_nb=0, resolution=None, clip_pct=None) staticmethod

Create an Image array from a pdf file.

Parameters:

Name Type Description Default
filepath str

path to the pdf file.

required
as_grayscale bool

whether to turn the image in grayscale. Defaults to False.

False
page_nb int

as we load only one image we have to select the page that will be turned into an image. Defaults to 0.

0
resolution Optional[int]

resolution of the loaded image. Defaults to 3508.

None
clip_pct Rect

optional zone to extract in the image. This is particularly useful to load into memory only a small part of the image without loading everything into memory. This reduces considerably the image loading time especially combined with a high resolution.

None

Returns:

Name Type Description
NDArray NDArray

Image as array

Source code in otary/image/components/io/reader.py
@staticmethod
def from_pdf(
    filepath: str,
    as_grayscale: bool = False,
    page_nb: int = 0,
    resolution: Optional[int] = None,
    clip_pct: Optional[pymupdf.Rect] = None,
) -> NDArray:
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    """Create an Image array from a pdf file.

    Args:
        filepath (str): path to the pdf file.
        as_grayscale (bool, optional): whether to turn the image in grayscale.
            Defaults to False.
        page_nb (int, optional): as we load only one image we have to select the
            page that will be turned into an image. Defaults to 0.
        resolution (Optional[int], optional): resolution of the loaded image.
            Defaults to 3508.
        clip_pct (pymmupdf.Rect, optional): optional zone to extract in the image.
            This is particularly useful to load into memory only a small part of the
            image without loading everything into memory. This reduces considerably
            the image loading time especially combined with a high resolution.

    Returns:
        NDArray: Image as array
    """
    arr = read_pdf_to_images(
        filepath_or_stream=filepath,
        resolution=resolution,
        page_nb=page_nb,
        clip_pct=clip_pct,
    )[0]

    if as_grayscale:
        arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

    return arr

from_png(filepath, as_grayscale=False, resolution=None) staticmethod

Create a Image array from a PNG file image path

Parameters:

Name Type Description Default
filepath str

path to the image file

required
as_grayscale bool

turn the image in grayscale. Defaults to False.

False

Returns:

Name Type Description
NDArray NDArray

Image as array

Source code in otary/image/components/io/reader.py
@staticmethod
def from_png(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image array from a PNG file image path

    Args:
        filepath (str): path to the image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: Image as array
    """
    return ReaderImage.from_jpg(
        filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
    )

WriterImage module

WriterImage

WriterImage class that provide methods to save and show the image

Source code in otary/image/components/io/writer.py
class WriterImage:
    """WriterImage class that provide methods to save and show the image"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def show(
        self,
        title: Optional[str] = None,
        figsize: tuple[float, float] = (8.0, 6.0),
        color_conversion: Optional[int] = cv2.COLOR_BGR2RGB,
        save_filepath: Optional[str] = None,
    ) -> None:
        """Show the image

        Args:
            title (Optional[str], optional): title of the image. Defaults to None.
            figsize (tuple[float, float], optional): size of the figure.
                Defaults to (8.0, 6.0).
            color_conversion (int, optional): color conversion parameter.
                Defaults to cv2.COLOR_BGR2RGB.
            save_filepath (Optional[str], optional): save the image if needed.
                Defaults to None.
        """
        # Converts from one colour space to the other. this is needed as RGB
        # is not the default colour space for OpenCV
        if color_conversion is not None:
            im = cv2.cvtColor(self.base.asarray, color_conversion)
        else:
            im = self.base.asarray

        plt.figure(figsize=figsize)

        # Show the image
        plt.imshow(im)

        # remove the axis / ticks for a clean looking image
        plt.xticks([])
        plt.yticks([])

        # if a title is provided, show it
        if title is not None:
            plt.title(title)

        if save_filepath is not None:
            plt.savefig(save_filepath)

        plt.show()

    def save(self, save_filepath: str) -> None:
        """Save the image in a local file

        Args:
            save_filepath (str): path to the file
        """
        self.show(save_filepath=save_filepath)

save(save_filepath)

Save the image in a local file

Parameters:

Name Type Description Default
save_filepath str

path to the file

required
Source code in otary/image/components/io/writer.py
def save(self, save_filepath: str) -> None:
    """Save the image in a local file

    Args:
        save_filepath (str): path to the file
    """
    self.show(save_filepath=save_filepath)

show(title=None, figsize=(8.0, 6.0), color_conversion=cv2.COLOR_BGR2RGB, save_filepath=None)

Show the image

Parameters:

Name Type Description Default
title Optional[str]

title of the image. Defaults to None.

None
figsize tuple[float, float]

size of the figure. Defaults to (8.0, 6.0).

(8.0, 6.0)
color_conversion int

color conversion parameter. Defaults to cv2.COLOR_BGR2RGB.

COLOR_BGR2RGB
save_filepath Optional[str]

save the image if needed. Defaults to None.

None
Source code in otary/image/components/io/writer.py
def show(
    self,
    title: Optional[str] = None,
    figsize: tuple[float, float] = (8.0, 6.0),
    color_conversion: Optional[int] = cv2.COLOR_BGR2RGB,
    save_filepath: Optional[str] = None,
) -> None:
    """Show the image

    Args:
        title (Optional[str], optional): title of the image. Defaults to None.
        figsize (tuple[float, float], optional): size of the figure.
            Defaults to (8.0, 6.0).
        color_conversion (int, optional): color conversion parameter.
            Defaults to cv2.COLOR_BGR2RGB.
        save_filepath (Optional[str], optional): save the image if needed.
            Defaults to None.
    """
    # Converts from one colour space to the other. this is needed as RGB
    # is not the default colour space for OpenCV
    if color_conversion is not None:
        im = cv2.cvtColor(self.base.asarray, color_conversion)
    else:
        im = self.base.asarray

    plt.figure(figsize=figsize)

    # Show the image
    plt.imshow(im)

    # remove the axis / ticks for a clean looking image
    plt.xticks([])
    plt.yticks([])

    # if a title is provided, show it
    if title is not None:
        plt.title(title)

    if save_filepath is not None:
        plt.savefig(save_filepath)

    plt.show()

Cropper Transformer component

CropperImage

CropperImage class

Source code in otary/image/components/transformer/components/cropper/cropper.py
class CropperImage:
    """CropperImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def __crop_with_padding(
        self, x0: int, y0: int, x1: int, y1: int, pad_value: int = 0
    ) -> NDArray:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1].

        This method is specific to crop with padding meaning that if the
        coordinates are out of the image bounds, the padding is added to the
        output cropped image with the pad value parameter, black by default.

        Args:
            x0 (int): x coordinate of the top-left point
            y0 (int): y coordinate of the top-left point
            x1 (int): x coordinate of the bottom-right point
            y1 (int): y coordinate of the bottom-right point
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            NDArray: output cropped image
        """
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        # Output size
        crop_width = x1 - x0
        crop_height = y1 - y0

        # Initialize output with black (zeros), same dtype and channel count
        channels = 1 if self.base.is_gray else self.base.asarray.shape[2]
        output_shape = (
            (crop_height, crop_width)
            if channels == 1
            else (crop_height, crop_width, channels)
        )
        result = np.full(shape=output_shape, fill_value=pad_value, dtype=np.uint8)

        # Compute the intersection of crop with image bounds
        ix0 = max(x0, 0)
        iy0 = max(y0, 0)
        ix1 = min(x1, self.base.width)
        iy1 = min(y1, self.base.height)

        # Compute corresponding position in output
        ox0 = ix0 - x0
        oy0 = iy0 - y0
        ox1 = ox0 + (ix1 - ix0)
        oy1 = oy0 + (iy1 - iy0)

        # Copy the valid region
        result[oy0:oy1, ox0:ox1] = self.base.asarray[iy0:iy1, ix0:ix1]

        return result

    def __crop_with_clipping(self, x0: int, y0: int, x1: int, y1: int) -> NDArray:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1].

        Crop by clipping meaning that if the coordinates are out of the image
        bounds the output is only the part of the image that is in the bounds.

        Args:
            x0 (int): x coordinate of the top-left point
            y0 (int): y coordinate of the top-left point
            x1 (int): x coordinate of the bottom-right point
            y1 (int): y coordinate of the bottom-right point

        Returns:
            Self: image cropped
        """
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        if x0 >= self.base.width or y0 >= self.base.height or x1 <= 0 or y1 <= 0:
            raise ValueError(
                f"The coordinates ({x0}, {y0}, {x1}, {y1}) are out of the image "
                f"boundaries (width={self.base.width}, height={self.base.height}). "
                "No crop is possible."
            )

        def clip(value: int, min_value: int, max_value: int) -> int:
            return int(max(min_value, min(value, max_value)))

        x0 = clip(x0, 0, self.base.width)
        y0 = clip(y0, 0, self.base.height)
        x1 = clip(x1, 0, self.base.width)
        y1 = clip(y1, 0, self.base.height)

        result = self.base.asarray[y0:y1, x0:x1]
        return result

    def crop(
        self,
        x0: int,
        y0: int,
        x1: int,
        y1: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1]

        This function inputs represents the top-left and bottom-right points.
        This method does not provide a way to extract a rotated rectangle or a
        different shape from the image.

        Remember that in this library the x coordinates represent the y coordinates of
        the image array (horizontal axis of the image).
        The array representation is always rows then columns.
        In this library this is the contrary like in opencv.

        Args:
            x0 (int): top-left x coordinate
            y0 (int): top-left y coordinate
            x1 (int): bottom-right x coordinate
            y1 (int): bottom-right y coordinate
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: cropped image if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        if (clip and pad) or (not clip and not pad):
            raise ValueError(f"Parameters clip and pad cannot be both {clip}")

        if clip and not pad:
            array_crop = self.__crop_with_clipping(
                x0=x0 - extra_border_size,
                y0=y0 - extra_border_size,
                x1=x1 + extra_border_size,
                y1=y1 + extra_border_size,
            )
        else:  # pad and not clip:
            array_crop = self.__crop_with_padding(
                x0=x0 - extra_border_size,
                y0=y0 - extra_border_size,
                x1=x1 + extra_border_size,
                y1=y1 + extra_border_size,
                pad_value=pad_value,
            )

        if copy:
            # really important feature to allow new image from original
            # without the user doing image.copy().crop()
            # which would be much more expensive if the image is large
            # this is why the output of the methods is Optional[Image] not None
            # pylint: disable=import-outside-toplevel
            from otary.image import Image

            return Image(image=array_crop)

        self.base.asarray = array_crop
        return None

    def crop_from_topleft(
        self,
        topleft: np.ndarray,
        width: int,
        height: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a rectangle defined by its top-left point, its width and
        its height.

        Args:
            topleft (np.ndarray): (x, y) coordinates of the top-left point
            width (int): width of the rectangle to crop
            height (int): height of the rectangle to crop
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: image cropped if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=topleft[0],
            y0=topleft[1],
            x1=topleft[0] + width,
            y1=topleft[1] + height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_center(
        self,
        center: NDArray,
        width: int,
        height: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a rectangle defined by its center point, its width and
        its height.

        Args:
            center (NDArray): (x, y) coordinates of the center point
            width (int): width of the rectangle to crop
            height (int): height of the rectangle to crop
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: image cropped if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop_from_topleft(
            topleft=center - np.array([width / 2, height / 2]),
            width=width,
            height=height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_polygon(
        self,
        polygon: geo.Polygon,
        copy: bool = False,
        clip: bool = True,
        pad: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a polygon"""
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=int(polygon.xmin),
            y0=int(polygon.ymin),
            x1=int(polygon.xmax),
            y1=int(polygon.ymax),
            copy=copy,
            clip=clip,
            pad=pad,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_linear_spline(
        self,
        spline: geo.LinearSpline,
        copy: bool = False,
        clip: bool = True,
        pad: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a linear spline"""
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=int(spline.xmin),
            y0=int(spline.ymin),
            x1=int(spline.xmax),
            y1=int(spline.ymax),
            copy=copy,
            clip=clip,
            pad=pad,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_axis_aligned_bbox(
        self,
        bbox: geo.Rectangle,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from an Axis-Aligned Bounding Box (AABB).
        Inclusive crops which means that the cropped image will have
        width and height equal to the width and height of the AABB.

        Args:
            bbox (geo.Rectangle): axis-aligned bounding box
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: cropped image if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        assert bbox.is_axis_aligned
        topleft = np.asarray([bbox.xmin, bbox.ymin])
        height = int(bbox.ymax - bbox.ymin + 1)
        width = int(bbox.xmax - bbox.xmin + 1)
        return self.crop_from_topleft(
            topleft=topleft,
            width=width,
            height=height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

__crop_with_clipping(x0, y0, x1, y1)

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1].

Crop by clipping meaning that if the coordinates are out of the image bounds the output is only the part of the image that is in the bounds.

Parameters:

Name Type Description Default
x0 int

x coordinate of the top-left point

required
y0 int

y coordinate of the top-left point

required
x1 int

x coordinate of the bottom-right point

required
y1 int

y coordinate of the bottom-right point

required

Returns:

Name Type Description
Self NDArray

image cropped

Source code in otary/image/components/transformer/components/cropper/cropper.py
def __crop_with_clipping(self, x0: int, y0: int, x1: int, y1: int) -> NDArray:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1].

    Crop by clipping meaning that if the coordinates are out of the image
    bounds the output is only the part of the image that is in the bounds.

    Args:
        x0 (int): x coordinate of the top-left point
        y0 (int): y coordinate of the top-left point
        x1 (int): x coordinate of the bottom-right point
        y1 (int): y coordinate of the bottom-right point

    Returns:
        Self: image cropped
    """
    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

    if x0 >= self.base.width or y0 >= self.base.height or x1 <= 0 or y1 <= 0:
        raise ValueError(
            f"The coordinates ({x0}, {y0}, {x1}, {y1}) are out of the image "
            f"boundaries (width={self.base.width}, height={self.base.height}). "
            "No crop is possible."
        )

    def clip(value: int, min_value: int, max_value: int) -> int:
        return int(max(min_value, min(value, max_value)))

    x0 = clip(x0, 0, self.base.width)
    y0 = clip(y0, 0, self.base.height)
    x1 = clip(x1, 0, self.base.width)
    y1 = clip(y1, 0, self.base.height)

    result = self.base.asarray[y0:y1, x0:x1]
    return result

__crop_with_padding(x0, y0, x1, y1, pad_value=0)

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1].

This method is specific to crop with padding meaning that if the coordinates are out of the image bounds, the padding is added to the output cropped image with the pad value parameter, black by default.

Parameters:

Name Type Description Default
x0 int

x coordinate of the top-left point

required
y0 int

y coordinate of the top-left point

required
x1 int

x coordinate of the bottom-right point

required
y1 int

y coordinate of the bottom-right point

required
pad_value int

pad fill value. Defaults to 0.

0

Returns:

Name Type Description
NDArray NDArray

output cropped image

Source code in otary/image/components/transformer/components/cropper/cropper.py
def __crop_with_padding(
    self, x0: int, y0: int, x1: int, y1: int, pad_value: int = 0
) -> NDArray:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1].

    This method is specific to crop with padding meaning that if the
    coordinates are out of the image bounds, the padding is added to the
    output cropped image with the pad value parameter, black by default.

    Args:
        x0 (int): x coordinate of the top-left point
        y0 (int): y coordinate of the top-left point
        x1 (int): x coordinate of the bottom-right point
        y1 (int): y coordinate of the bottom-right point
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        NDArray: output cropped image
    """
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

    # Output size
    crop_width = x1 - x0
    crop_height = y1 - y0

    # Initialize output with black (zeros), same dtype and channel count
    channels = 1 if self.base.is_gray else self.base.asarray.shape[2]
    output_shape = (
        (crop_height, crop_width)
        if channels == 1
        else (crop_height, crop_width, channels)
    )
    result = np.full(shape=output_shape, fill_value=pad_value, dtype=np.uint8)

    # Compute the intersection of crop with image bounds
    ix0 = max(x0, 0)
    iy0 = max(y0, 0)
    ix1 = min(x1, self.base.width)
    iy1 = min(y1, self.base.height)

    # Compute corresponding position in output
    ox0 = ix0 - x0
    oy0 = iy0 - y0
    ox1 = ox0 + (ix1 - ix0)
    oy1 = oy0 + (iy1 - iy0)

    # Copy the valid region
    result[oy0:oy1, ox0:ox1] = self.base.asarray[iy0:iy1, ix0:ix1]

    return result

crop(x0, y0, x1, y1, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1]

This function inputs represents the top-left and bottom-right points. This method does not provide a way to extract a rotated rectangle or a different shape from the image.

Remember that in this library the x coordinates represent the y coordinates of the image array (horizontal axis of the image). The array representation is always rows then columns. In this library this is the contrary like in opencv.

Parameters:

Name Type Description Default
x0 int

top-left x coordinate

required
y0 int

top-left y coordinate

required
x1 int

bottom-right x coordinate

required
y1 int

bottom-right y coordinate

required
clip bool

whether to clip or not. Defaults to True.

True
pad bool

whether to pad or not. Defaults to False.

False
copy bool

whether to copy or not. Defaults to False.

False
extra_border_size int

extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.

0
pad_value int

pad fill value. Defaults to 0.

0

Returns:

Type Description
Optional[Image]

Optional[Image]: cropped image if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop(
    self,
    x0: int,
    y0: int,
    x1: int,
    y1: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1]

    This function inputs represents the top-left and bottom-right points.
    This method does not provide a way to extract a rotated rectangle or a
    different shape from the image.

    Remember that in this library the x coordinates represent the y coordinates of
    the image array (horizontal axis of the image).
    The array representation is always rows then columns.
    In this library this is the contrary like in opencv.

    Args:
        x0 (int): top-left x coordinate
        y0 (int): top-left y coordinate
        x1 (int): bottom-right x coordinate
        y1 (int): bottom-right y coordinate
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: cropped image if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    if (clip and pad) or (not clip and not pad):
        raise ValueError(f"Parameters clip and pad cannot be both {clip}")

    if clip and not pad:
        array_crop = self.__crop_with_clipping(
            x0=x0 - extra_border_size,
            y0=y0 - extra_border_size,
            x1=x1 + extra_border_size,
            y1=y1 + extra_border_size,
        )
    else:  # pad and not clip:
        array_crop = self.__crop_with_padding(
            x0=x0 - extra_border_size,
            y0=y0 - extra_border_size,
            x1=x1 + extra_border_size,
            y1=y1 + extra_border_size,
            pad_value=pad_value,
        )

    if copy:
        # really important feature to allow new image from original
        # without the user doing image.copy().crop()
        # which would be much more expensive if the image is large
        # this is why the output of the methods is Optional[Image] not None
        # pylint: disable=import-outside-toplevel
        from otary.image import Image

        return Image(image=array_crop)

    self.base.asarray = array_crop
    return None

crop_from_axis_aligned_bbox(bbox, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

Crop the image from an Axis-Aligned Bounding Box (AABB). Inclusive crops which means that the cropped image will have width and height equal to the width and height of the AABB.

Parameters:

Name Type Description Default
bbox Rectangle

axis-aligned bounding box

required
clip bool

whether to clip or not. Defaults to True.

True
pad bool

whether to pad or not. Defaults to False.

False
copy bool

whether to copy or not. Defaults to False.

False
extra_border_size int

extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.

0
pad_value int

pad fill value. Defaults to 0.

0

Returns:

Type Description
Optional[Image]

Optional[Image]: cropped image if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop_from_axis_aligned_bbox(
    self,
    bbox: geo.Rectangle,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from an Axis-Aligned Bounding Box (AABB).
    Inclusive crops which means that the cropped image will have
    width and height equal to the width and height of the AABB.

    Args:
        bbox (geo.Rectangle): axis-aligned bounding box
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: cropped image if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    assert bbox.is_axis_aligned
    topleft = np.asarray([bbox.xmin, bbox.ymin])
    height = int(bbox.ymax - bbox.ymin + 1)
    width = int(bbox.xmax - bbox.xmin + 1)
    return self.crop_from_topleft(
        topleft=topleft,
        width=width,
        height=height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

crop_from_center(center, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

Crop the image from a rectangle defined by its center point, its width and its height.

Parameters:

Name Type Description Default
center NDArray

(x, y) coordinates of the center point

required
width int

width of the rectangle to crop

required
height int

height of the rectangle to crop

required
clip bool

whether to clip or not. Defaults to True.

True
pad bool

whether to pad or not. Defaults to False.

False
copy bool

whether to copy or not. Defaults to False.

False
extra_border_size int

extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.

0
pad_value int

pad fill value. Defaults to 0.

0

Returns:

Type Description
Optional[Image]

Optional[Image]: image cropped if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop_from_center(
    self,
    center: NDArray,
    width: int,
    height: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a rectangle defined by its center point, its width and
    its height.

    Args:
        center (NDArray): (x, y) coordinates of the center point
        width (int): width of the rectangle to crop
        height (int): height of the rectangle to crop
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: image cropped if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop_from_topleft(
        topleft=center - np.array([width / 2, height / 2]),
        width=width,
        height=height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

crop_from_linear_spline(spline, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)

Crop the image from a linear spline

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop_from_linear_spline(
    self,
    spline: geo.LinearSpline,
    copy: bool = False,
    clip: bool = True,
    pad: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a linear spline"""
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=int(spline.xmin),
        y0=int(spline.ymin),
        x1=int(spline.xmax),
        y1=int(spline.ymax),
        copy=copy,
        clip=clip,
        pad=pad,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

crop_from_polygon(polygon, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)

Crop the image from a polygon

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop_from_polygon(
    self,
    polygon: geo.Polygon,
    copy: bool = False,
    clip: bool = True,
    pad: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a polygon"""
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=int(polygon.xmin),
        y0=int(polygon.ymin),
        x1=int(polygon.xmax),
        y1=int(polygon.ymax),
        copy=copy,
        clip=clip,
        pad=pad,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

crop_from_topleft(topleft, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

Crop the image from a rectangle defined by its top-left point, its width and its height.

Parameters:

Name Type Description Default
topleft ndarray

(x, y) coordinates of the top-left point

required
width int

width of the rectangle to crop

required
height int

height of the rectangle to crop

required
clip bool

whether to clip or not. Defaults to True.

True
pad bool

whether to pad or not. Defaults to False.

False
copy bool

whether to copy or not. Defaults to False.

False
extra_border_size int

extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.

0
pad_value int

pad fill value. Defaults to 0.

0

Returns:

Type Description
Optional[Image]

Optional[Image]: image cropped if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py
def crop_from_topleft(
    self,
    topleft: np.ndarray,
    width: int,
    height: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a rectangle defined by its top-left point, its width and
    its height.

    Args:
        topleft (np.ndarray): (x, y) coordinates of the top-left point
        width (int): width of the rectangle to crop
        height (int): height of the rectangle to crop
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: image cropped if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=topleft[0],
        y0=topleft[1],
        x1=topleft[0] + width,
        y1=topleft[1] + height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

Binarizer component

BinarizerImage

BinarizerImage class

Source code in otary/image/components/transformer/components/binarizer/binarizer.py
class BinarizerImage:
    """BinarizerImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def threshold_simple(self, thresh: int) -> None:
        """Compute the image thesholded by a single value T.
        All pixels with value v <= T are turned black and those with value v > T are
        turned white.

        Args:
            thresh (int): value to separate the black from the white pixels.
        """
        self.base.as_grayscale()
        self.base.asarray = np.array((self.base.asarray > thresh) * 255, dtype=np.uint8)

    def threshold_adaptative(self) -> None:
        """Apply adaptive thresholding.

        A median blur is applied before for better thresholding results.
        See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.
        """
        self.base.as_grayscale()
        binary = cv2.adaptiveThreshold(
            self.base.asarray,
            255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            11,
            2,
        )
        self.base.asarray = binary

    def threshold_otsu(self) -> None:
        """Apply Ostu thresholding.

        A gaussian blur is applied before for better thresholding results.
        See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.
        """
        self.base.as_grayscale()
        _, img_thresholded = cv2.threshold(
            self.base.asarray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
        )
        self.base.asarray = img_thresholded

    def threshold_sauvola(
        self, window_size: int = 15, k: float = 0.2, r: float = 128.0
    ) -> None:
        """Apply Sauvola thresholding.
        See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
                plot_niblack_sauvola.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.

        Args:
            window_size (int, optional): sauvola window size to apply on the
                image. Defaults to 15.
            k (float, optional): sauvola k factor to apply to regulate the impact
                of the std. Defaults to 0.2.
            r (float, optional): sauvola r value. Defaults to 128.
        """
        self.base.as_grayscale()
        self.base.asarray = threshold_niblack_like(
            img=self.base.asarray, method="sauvola", window_size=window_size, k=k, r=r
        )[1]

    def threshold_niblack(self, window_size: int = 15, k: float = 0.2) -> None:
        """Apply Niblack thresholding.
        See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
                plot_niblack_sauvola.html

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.

        Args:
            window_size (int, optional): apply on the
                image. Defaults to 15.
            k (float, optional): factor to apply to regulate the impact
                of the std. Defaults to 0.2.
        """
        self.base.as_grayscale()
        self.base.asarray = threshold_niblack_like(
            img=self.base.asarray, method="niblack", window_size=window_size, k=k
        )[1]

    def binary(self, method: BinarizationMethods = "sauvola") -> NDArray:
        """Binary representation of the image with values that can be only 0 or 1.
        The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1.
        We can also talk about the mask of the image to refer to the binary
        representation of it.

        The sauvola is generally the best binarization method however it is
        way slower than the others methods. The adaptative or otsu method are the best
        method in terms of speed and quality.

        Args:
            method (str, optional): the binarization method to apply.
                Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"].
                Defaults to "sauvola".

        Returns:
            NDArray: array where its inner values are 0 or 1
        """
        if method not in list(get_args(BinarizationMethods)):
            raise ValueError(
                f"Invalid binarization method {method}. "
                f"Must be in {BinarizationMethods}"
            )
        getattr(self, f"threshold_{method}")()
        return self.base.asarray_binary

    def binaryrev(self, method: BinarizationMethods = "sauvola") -> NDArray:
        """Reversed binary representation of the image.
        The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0.
        This is why it is called the "binary rev" or "binary reversed".

        Args:
            method (str, optional): the binarization method to apply.
                Defaults to "adaptative".

        Returns:
            NDArray: array where its inner values are 0 or 1
        """
        return 1 - self.binary(method=method)

binary(method='sauvola')

Binary representation of the image with values that can be only 0 or 1. The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1. We can also talk about the mask of the image to refer to the binary representation of it.

The sauvola is generally the best binarization method however it is way slower than the others methods. The adaptative or otsu method are the best method in terms of speed and quality.

Parameters:

Name Type Description Default
method str

the binarization method to apply. Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"]. Defaults to "sauvola".

'sauvola'

Returns:

Name Type Description
NDArray NDArray

array where its inner values are 0 or 1

Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def binary(self, method: BinarizationMethods = "sauvola") -> NDArray:
    """Binary representation of the image with values that can be only 0 or 1.
    The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1.
    We can also talk about the mask of the image to refer to the binary
    representation of it.

    The sauvola is generally the best binarization method however it is
    way slower than the others methods. The adaptative or otsu method are the best
    method in terms of speed and quality.

    Args:
        method (str, optional): the binarization method to apply.
            Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"].
            Defaults to "sauvola".

    Returns:
        NDArray: array where its inner values are 0 or 1
    """
    if method not in list(get_args(BinarizationMethods)):
        raise ValueError(
            f"Invalid binarization method {method}. "
            f"Must be in {BinarizationMethods}"
        )
    getattr(self, f"threshold_{method}")()
    return self.base.asarray_binary

binaryrev(method='sauvola')

Reversed binary representation of the image. The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0. This is why it is called the "binary rev" or "binary reversed".

Parameters:

Name Type Description Default
method str

the binarization method to apply. Defaults to "adaptative".

'sauvola'

Returns:

Name Type Description
NDArray NDArray

array where its inner values are 0 or 1

Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def binaryrev(self, method: BinarizationMethods = "sauvola") -> NDArray:
    """Reversed binary representation of the image.
    The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0.
    This is why it is called the "binary rev" or "binary reversed".

    Args:
        method (str, optional): the binarization method to apply.
            Defaults to "adaptative".

    Returns:
        NDArray: array where its inner values are 0 or 1
    """
    return 1 - self.binary(method=method)

threshold_adaptative()

Apply adaptive thresholding.

A median blur is applied before for better thresholding results. See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def threshold_adaptative(self) -> None:
    """Apply adaptive thresholding.

    A median blur is applied before for better thresholding results.
    See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.
    """
    self.base.as_grayscale()
    binary = cv2.adaptiveThreshold(
        self.base.asarray,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11,
        2,
    )
    self.base.asarray = binary

threshold_niblack(window_size=15, k=0.2)

Apply Niblack thresholding. See https://scikit-image.org/docs/stable/auto_examples/segmentation/ plot_niblack_sauvola.html

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Parameters:

Name Type Description Default
window_size int

apply on the image. Defaults to 15.

15
k float

factor to apply to regulate the impact of the std. Defaults to 0.2.

0.2
Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def threshold_niblack(self, window_size: int = 15, k: float = 0.2) -> None:
    """Apply Niblack thresholding.
    See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
            plot_niblack_sauvola.html

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.

    Args:
        window_size (int, optional): apply on the
            image. Defaults to 15.
        k (float, optional): factor to apply to regulate the impact
            of the std. Defaults to 0.2.
    """
    self.base.as_grayscale()
    self.base.asarray = threshold_niblack_like(
        img=self.base.asarray, method="niblack", window_size=window_size, k=k
    )[1]

threshold_otsu()

Apply Ostu thresholding.

A gaussian blur is applied before for better thresholding results. See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def threshold_otsu(self) -> None:
    """Apply Ostu thresholding.

    A gaussian blur is applied before for better thresholding results.
    See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.
    """
    self.base.as_grayscale()
    _, img_thresholded = cv2.threshold(
        self.base.asarray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    self.base.asarray = img_thresholded

threshold_sauvola(window_size=15, k=0.2, r=128.0)

Apply Sauvola thresholding. See https://scikit-image.org/docs/stable/auto_examples/segmentation/ plot_niblack_sauvola.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Parameters:

Name Type Description Default
window_size int

sauvola window size to apply on the image. Defaults to 15.

15
k float

sauvola k factor to apply to regulate the impact of the std. Defaults to 0.2.

0.2
r float

sauvola r value. Defaults to 128.

128.0
Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def threshold_sauvola(
    self, window_size: int = 15, k: float = 0.2, r: float = 128.0
) -> None:
    """Apply Sauvola thresholding.
    See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
            plot_niblack_sauvola.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.

    Args:
        window_size (int, optional): sauvola window size to apply on the
            image. Defaults to 15.
        k (float, optional): sauvola k factor to apply to regulate the impact
            of the std. Defaults to 0.2.
        r (float, optional): sauvola r value. Defaults to 128.
    """
    self.base.as_grayscale()
    self.base.asarray = threshold_niblack_like(
        img=self.base.asarray, method="sauvola", window_size=window_size, k=k, r=r
    )[1]

threshold_simple(thresh)

Compute the image thesholded by a single value T. All pixels with value v <= T are turned black and those with value v > T are turned white.

Parameters:

Name Type Description Default
thresh int

value to separate the black from the white pixels.

required
Source code in otary/image/components/transformer/components/binarizer/binarizer.py
def threshold_simple(self, thresh: int) -> None:
    """Compute the image thesholded by a single value T.
    All pixels with value v <= T are turned black and those with value v > T are
    turned white.

    Args:
        thresh (int): value to separate the black from the white pixels.
    """
    self.base.as_grayscale()
    self.base.asarray = np.array((self.base.asarray > thresh) * 255, dtype=np.uint8)

Geometry Transformer component

GeometrizerImage

GeometrizerImage class

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
class GeometrizerImage:
    """GeometrizerImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def shift(self, shift: NDArray, fill_value: Sequence[float] = (0.0,)) -> None:
        """Shift the image by performing a translation operation

        Args:
            shift (NDArray): Vector for translation
            border_fill_value (int | tuple[int, int, int], optional): value to fill the
                border of the image after the rotation in case reshape is True.
                Can be a tuple of 3 integers for RGB image or a single integer for
                grayscale image. Defaults to (0.0,) which is black.
        """
        vector_shift = assert_transform_shift_vector(vector=shift)
        shift_matrix = np.asarray(
            [[1.0, 0.0, vector_shift[0]], [0.0, 1.0, vector_shift[1]]],
            dtype=np.float32,
        )

        self.base.asarray = cv2.warpAffine(
            src=self.base.asarray,
            M=shift_matrix,
            dsize=(self.base.width, self.base.height),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=fill_value,
        )  # type: ignore[call-overload]

    def __rotate_exact(
        self,
        angle: float,
        is_degree: bool = False,
        is_clockwise: bool = True,
        reshape: bool = True,
        border_fill_value: float = 0.0,
    ) -> None:
        """Rotate the image by a given angle.
        This method is more accurate than the rotate method but way slower
        (about 10 times slower).

        Args:
            angle (float): angle to rotate the image
            is_degree (bool, optional): whether the angle is in degree or not.
                If not it is considered to be in radians.
                Defaults to False which means radians.
            is_clockwise (bool, optional): whether the rotation is clockwise or
                counter-clockwise. Defaults to True.
            reshape (bool, optional): scipy reshape option. Defaults to True.
            border_fill_value (float, optional): value to fill the border of the image
                after the rotation in case reshape is True. Can only be a single
                integer. Does not support tuple of 3 integers for RGB image.
                Defaults to 0.0 which is black.
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        if not is_degree:
            angle = np.rad2deg(angle)
        if is_clockwise:
            # by default scipy rotate is counter-clockwise
            angle = -angle
        self.base.asarray = scipy.ndimage.rotate(
            input=self.base.asarray,
            angle=angle,
            reshape=reshape,
            cval=border_fill_value,
        )

    def rotate(
        self,
        angle: float,
        is_degree: bool = False,
        is_clockwise: bool = True,
        reshape: bool = True,
        fill_value: Sequence[float] = (0.0,),
        fast: bool = True,
    ) -> None:
        """Rotate the image by a given angle.

        For the rotation with reshape, meaning preserving the whole image,
        we used the code from the imutils library:
        https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

        Args:
            angle (float): angle to rotate the image
            is_degree (bool, optional): whether the angle is in degree or not.
                If not it is considered to be in radians.
                Defaults to False which means radians.
            is_clockwise (bool, optional): whether the rotation is clockwise or
                counter-clockwise. Defaults to True.
            reshape (bool, optional): whether to preserve the original image or not.
                If True, the complete image is preserved hence the width and height
                of the rotated image are different than in the original image.
                Defaults to True.
            border_fill_value (Sequence[float], optional): value to
                fill the border of the image after the rotation in case reshape is True.
                Can be a tuple of 3 integers for RGB image or a single integer for
                grayscale image. Defaults to (0.0,) which is black.
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        # pylint: disable=too-many-locals
        if not fast:  # using scipy rotate which is slower than cv2
            border_fill_value_scalar = fill_value[0]
            if not isinstance(border_fill_value_scalar, float):
                raise ValueError(
                    f"The border_fill_value {border_fill_value_scalar} is not a valid "
                    "value. It must be a single integer when fast mode is off"
                )
            self.__rotate_exact(
                angle=angle,
                is_degree=is_degree,
                is_clockwise=is_clockwise,
                reshape=reshape,
                border_fill_value=border_fill_value_scalar,
            )
            return None

        if not is_degree:
            angle = np.rad2deg(angle)
        if is_clockwise:
            angle = -angle

        h, w = self.base.asarray.shape[:2]
        center = (w / 2, h / 2)

        # Compute rotation matrix
        rotmat = cv2.getRotationMatrix2D(center, angle, 1.0)  # param angle in degree

        if reshape:
            # Compute new bounding dimensions
            cos_a = np.abs(rotmat[0, 0])
            sin_a = np.abs(rotmat[0, 1])
            new_w = int((h * sin_a) + (w * cos_a))
            new_h = int((h * cos_a) + (w * sin_a))
            w, h = new_w, new_h

            # Adjust the rotation matrix to shift the image center
            rotmat[0, 2] += (w / 2) - center[0]
            rotmat[1, 2] += (h / 2) - center[1]

        self.base.asarray = cv2.warpAffine(
            src=self.base.asarray,
            M=rotmat,
            dsize=(w, h),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=fill_value,
        )  # type: ignore[call-overload]
        return None

    def center_to_point(self, point: NDArray) -> NDArray:
        """Shift the image so that the input point ends up in the middle of the
        new image

        Args:
            point (NDArray): point as (2,) shape numpy array

        Returns:
            NDArray: translation Vector
        """
        shift_vector = self.base.center - point
        self.shift(shift=shift_vector)
        return shift_vector

    def center_to_segment(self, segment: NDArray) -> NDArray:
        """Shift the image so that the segment middle point ends up in the middle
        of the new image

        Args:
            segment (NDArray): segment as numpy array of shape (2, 2)

        Returns:
            NDArray: vector_shift
        """
        return self.center_to_point(point=geo.Segment(segment).centroid)

    def restrict_rect_in_frame(self, rectangle: geo.Rectangle) -> geo.Rectangle:
        """Create a new rectangle that is contained within the image borders.
        If the input rectangle is outside the image, the returned rectangle is a
        image frame-fitted rectangle that preserve the same shape.

        Args:
            rectangle (geo.Rectangle): input rectangle

        Returns:
            geo.Rectangle: new rectangle
        """
        # rectangle boundaries
        xmin, xmax = rectangle.xmin, rectangle.xmax
        ymin, ymax = rectangle.ymin, rectangle.ymax

        # recalculate boundaries based on image shape
        xmin = max(0, xmin)
        ymin = max(0, ymin)
        xmax = min(self.base.width, xmax)
        ymax = min(self.base.height, ymax)

        # recreate a rectangle with new coordinates
        rect_restricted = geo.Rectangle.from_topleft_bottomright(
            topleft=np.asarray([xmin, ymin]),
            bottomright=np.asarray([xmax, ymax]),
            is_cast_int=True,
        )
        return rect_restricted

__rotate_exact(angle, is_degree=False, is_clockwise=True, reshape=True, border_fill_value=0.0)

Rotate the image by a given angle. This method is more accurate than the rotate method but way slower (about 10 times slower).

Parameters:

Name Type Description Default
angle float

angle to rotate the image

required
is_degree bool

whether the angle is in degree or not. If not it is considered to be in radians. Defaults to False which means radians.

False
is_clockwise bool

whether the rotation is clockwise or counter-clockwise. Defaults to True.

True
reshape bool

scipy reshape option. Defaults to True.

True
border_fill_value float

value to fill the border of the image after the rotation in case reshape is True. Can only be a single integer. Does not support tuple of 3 integers for RGB image. Defaults to 0.0 which is black.

0.0
Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def __rotate_exact(
    self,
    angle: float,
    is_degree: bool = False,
    is_clockwise: bool = True,
    reshape: bool = True,
    border_fill_value: float = 0.0,
) -> None:
    """Rotate the image by a given angle.
    This method is more accurate than the rotate method but way slower
    (about 10 times slower).

    Args:
        angle (float): angle to rotate the image
        is_degree (bool, optional): whether the angle is in degree or not.
            If not it is considered to be in radians.
            Defaults to False which means radians.
        is_clockwise (bool, optional): whether the rotation is clockwise or
            counter-clockwise. Defaults to True.
        reshape (bool, optional): scipy reshape option. Defaults to True.
        border_fill_value (float, optional): value to fill the border of the image
            after the rotation in case reshape is True. Can only be a single
            integer. Does not support tuple of 3 integers for RGB image.
            Defaults to 0.0 which is black.
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    if not is_degree:
        angle = np.rad2deg(angle)
    if is_clockwise:
        # by default scipy rotate is counter-clockwise
        angle = -angle
    self.base.asarray = scipy.ndimage.rotate(
        input=self.base.asarray,
        angle=angle,
        reshape=reshape,
        cval=border_fill_value,
    )

center_to_point(point)

Shift the image so that the input point ends up in the middle of the new image

Parameters:

Name Type Description Default
point NDArray

point as (2,) shape numpy array

required

Returns:

Name Type Description
NDArray NDArray

translation Vector

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def center_to_point(self, point: NDArray) -> NDArray:
    """Shift the image so that the input point ends up in the middle of the
    new image

    Args:
        point (NDArray): point as (2,) shape numpy array

    Returns:
        NDArray: translation Vector
    """
    shift_vector = self.base.center - point
    self.shift(shift=shift_vector)
    return shift_vector

center_to_segment(segment)

Shift the image so that the segment middle point ends up in the middle of the new image

Parameters:

Name Type Description Default
segment NDArray

segment as numpy array of shape (2, 2)

required

Returns:

Name Type Description
NDArray NDArray

vector_shift

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def center_to_segment(self, segment: NDArray) -> NDArray:
    """Shift the image so that the segment middle point ends up in the middle
    of the new image

    Args:
        segment (NDArray): segment as numpy array of shape (2, 2)

    Returns:
        NDArray: vector_shift
    """
    return self.center_to_point(point=geo.Segment(segment).centroid)

restrict_rect_in_frame(rectangle)

Create a new rectangle that is contained within the image borders. If the input rectangle is outside the image, the returned rectangle is a image frame-fitted rectangle that preserve the same shape.

Parameters:

Name Type Description Default
rectangle Rectangle

input rectangle

required

Returns:

Type Description
Rectangle

geo.Rectangle: new rectangle

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def restrict_rect_in_frame(self, rectangle: geo.Rectangle) -> geo.Rectangle:
    """Create a new rectangle that is contained within the image borders.
    If the input rectangle is outside the image, the returned rectangle is a
    image frame-fitted rectangle that preserve the same shape.

    Args:
        rectangle (geo.Rectangle): input rectangle

    Returns:
        geo.Rectangle: new rectangle
    """
    # rectangle boundaries
    xmin, xmax = rectangle.xmin, rectangle.xmax
    ymin, ymax = rectangle.ymin, rectangle.ymax

    # recalculate boundaries based on image shape
    xmin = max(0, xmin)
    ymin = max(0, ymin)
    xmax = min(self.base.width, xmax)
    ymax = min(self.base.height, ymax)

    # recreate a rectangle with new coordinates
    rect_restricted = geo.Rectangle.from_topleft_bottomright(
        topleft=np.asarray([xmin, ymin]),
        bottomright=np.asarray([xmax, ymax]),
        is_cast_int=True,
    )
    return rect_restricted

rotate(angle, is_degree=False, is_clockwise=True, reshape=True, fill_value=(0.0,), fast=True)

Rotate the image by a given angle.

For the rotation with reshape, meaning preserving the whole image, we used the code from the imutils library: https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

Parameters:

Name Type Description Default
angle float

angle to rotate the image

required
is_degree bool

whether the angle is in degree or not. If not it is considered to be in radians. Defaults to False which means radians.

False
is_clockwise bool

whether the rotation is clockwise or counter-clockwise. Defaults to True.

True
reshape bool

whether to preserve the original image or not. If True, the complete image is preserved hence the width and height of the rotated image are different than in the original image. Defaults to True.

True
border_fill_value Sequence[float]

value to fill the border of the image after the rotation in case reshape is True. Can be a tuple of 3 integers for RGB image or a single integer for grayscale image. Defaults to (0.0,) which is black.

required
Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def rotate(
    self,
    angle: float,
    is_degree: bool = False,
    is_clockwise: bool = True,
    reshape: bool = True,
    fill_value: Sequence[float] = (0.0,),
    fast: bool = True,
) -> None:
    """Rotate the image by a given angle.

    For the rotation with reshape, meaning preserving the whole image,
    we used the code from the imutils library:
    https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

    Args:
        angle (float): angle to rotate the image
        is_degree (bool, optional): whether the angle is in degree or not.
            If not it is considered to be in radians.
            Defaults to False which means radians.
        is_clockwise (bool, optional): whether the rotation is clockwise or
            counter-clockwise. Defaults to True.
        reshape (bool, optional): whether to preserve the original image or not.
            If True, the complete image is preserved hence the width and height
            of the rotated image are different than in the original image.
            Defaults to True.
        border_fill_value (Sequence[float], optional): value to
            fill the border of the image after the rotation in case reshape is True.
            Can be a tuple of 3 integers for RGB image or a single integer for
            grayscale image. Defaults to (0.0,) which is black.
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    # pylint: disable=too-many-locals
    if not fast:  # using scipy rotate which is slower than cv2
        border_fill_value_scalar = fill_value[0]
        if not isinstance(border_fill_value_scalar, float):
            raise ValueError(
                f"The border_fill_value {border_fill_value_scalar} is not a valid "
                "value. It must be a single integer when fast mode is off"
            )
        self.__rotate_exact(
            angle=angle,
            is_degree=is_degree,
            is_clockwise=is_clockwise,
            reshape=reshape,
            border_fill_value=border_fill_value_scalar,
        )
        return None

    if not is_degree:
        angle = np.rad2deg(angle)
    if is_clockwise:
        angle = -angle

    h, w = self.base.asarray.shape[:2]
    center = (w / 2, h / 2)

    # Compute rotation matrix
    rotmat = cv2.getRotationMatrix2D(center, angle, 1.0)  # param angle in degree

    if reshape:
        # Compute new bounding dimensions
        cos_a = np.abs(rotmat[0, 0])
        sin_a = np.abs(rotmat[0, 1])
        new_w = int((h * sin_a) + (w * cos_a))
        new_h = int((h * cos_a) + (w * sin_a))
        w, h = new_w, new_h

        # Adjust the rotation matrix to shift the image center
        rotmat[0, 2] += (w / 2) - center[0]
        rotmat[1, 2] += (h / 2) - center[1]

    self.base.asarray = cv2.warpAffine(
        src=self.base.asarray,
        M=rotmat,
        dsize=(w, h),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=fill_value,
    )  # type: ignore[call-overload]
    return None

shift(shift, fill_value=(0.0,))

Shift the image by performing a translation operation

Parameters:

Name Type Description Default
shift NDArray

Vector for translation

required
border_fill_value int | tuple[int, int, int]

value to fill the border of the image after the rotation in case reshape is True. Can be a tuple of 3 integers for RGB image or a single integer for grayscale image. Defaults to (0.0,) which is black.

required
Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py
def shift(self, shift: NDArray, fill_value: Sequence[float] = (0.0,)) -> None:
    """Shift the image by performing a translation operation

    Args:
        shift (NDArray): Vector for translation
        border_fill_value (int | tuple[int, int, int], optional): value to fill the
            border of the image after the rotation in case reshape is True.
            Can be a tuple of 3 integers for RGB image or a single integer for
            grayscale image. Defaults to (0.0,) which is black.
    """
    vector_shift = assert_transform_shift_vector(vector=shift)
    shift_matrix = np.asarray(
        [[1.0, 0.0, vector_shift[0]], [0.0, 1.0, vector_shift[1]]],
        dtype=np.float32,
    )

    self.base.asarray = cv2.warpAffine(
        src=self.base.asarray,
        M=shift_matrix,
        dsize=(self.base.width, self.base.height),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=fill_value,
    )  # type: ignore[call-overload]

Morphologyzer Transformer component

MorphologyzerImage

MorphologyzerImage.

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
class MorphologyzerImage:
    """MorphologyzerImage."""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def resize_fixed(
        self,
        dim: tuple[int, int],
        interpolation: int = cv2.INTER_AREA,
        copy: bool = False,
    ) -> Optional[Image]:
        """Resize the image using a fixed dimension well defined.
        This function can result in a distorted image if the ratio between
        width and height is different in the original and the new image.

        If the dim argument has a negative value in height or width, then
        a proportional ratio is applied based on the one of the two dimension given.

        Args:
            dim (tuple[int, int]): a tuple with two integers in the following order
                (width, height).
            interpolation (int, optional): resize interpolation.
                Defaults to cv2.INTER_AREA.
            copy (bool, optional): whether to return a new image or not.
        """
        if dim[0] < 0 and dim[1] < 0:  # check that the dim is positive
            raise ValueError(f"The dim argument {dim} has two negative values.")

        _dim = list(dim)

        # compute width or height if needed
        if _dim[1] <= 0:
            _dim[1] = int(self.base.height * (_dim[0] / self.base.width))
        if dim[0] <= 0:
            _dim[0] = int(self.base.width * (_dim[1] / self.base.height))

        result = cv2.resize(
            src=self.base.asarray, dsize=_dim, interpolation=interpolation
        )

        if copy:
            # pylint: disable=import-outside-toplevel
            from otary.image import Image

            return Image(image=result)

        self.base.asarray = result
        return None

    def resize(
        self, factor: float, interpolation: int = cv2.INTER_AREA, copy: bool = False
    ) -> Optional[Image]:
        """Resize the image to a new size using a scaling factor value that
        will be applied to all dimensions (width and height).

        Applying this method can not result in a distorted image.

        Args:
            factor (float): factor in [0, 5] to resize the image.
                A value of 1 does not change the image.
                A value of 2 doubles the image size.
                A maximum value of 5 is set to avoid accidentally producing a gigantic
                image.
            interpolation (int, optional): resize interpolation.
                Defaults to cv2.INTER_AREA.
            copy (bool, optional): whether to return a new image or not.
        """
        if factor == 1:
            return None

        if factor < 0:
            raise ValueError(
                f"The resize factor value {factor} must be stricly positive"
            )

        max_scale_pct = 5
        if factor > max_scale_pct:
            raise ValueError(f"The resize factor value {factor} is probably too big")

        width = int(self.base.width * factor)
        height = int(self.base.height * factor)
        dim = (width, height)

        return self.resize_fixed(dim=dim, interpolation=interpolation, copy=copy)

    def blur(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        method: BlurMethods = "average",
        sigmax: float = 0,
    ) -> None:
        """Blur the image

        Args:
            kernel (tuple, optional): blur kernel size. Defaults to (5, 5).
            iterations (int, optional): number of iterations. Defaults to 1.
            method (str, optional): blur method.
                Must be in ["average", "median", "gaussian", "bilateral"].
                Defaults to "average".
            sigmax (float, optional): sigmaX value for the gaussian blur.
                Defaults to 0.
        """
        if method not in list(get_args(BlurMethods)):
            raise ValueError(f"Invalid blur method {method}. Must be in {BlurMethods}")

        for _ in range(iterations):
            if method == "average":
                self.base.asarray = cv2.blur(src=self.base.asarray, ksize=kernel)
            elif method == "median":
                self.base.asarray = cv2.medianBlur(
                    src=self.base.asarray, ksize=kernel[0]
                )
            elif method == "gaussian":
                self.base.asarray = cv2.GaussianBlur(
                    src=self.base.asarray, ksize=kernel, sigmaX=sigmax
                )
            elif method == "bilateral":
                self.base.asarray = cv2.bilateralFilter(
                    src=self.base.asarray, d=kernel[0], sigmaColor=75, sigmaSpace=75
                )

    def dilate(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        dilate_black_pixels: bool = True,
    ) -> None:
        """Dilate the image by making the black pixels expand in the image.
        The dilatation can be parametrize thanks to the kernel and iterations
        arguments.

        Args:
            kernel (tuple, optional): kernel to dilate. Defaults to (5, 5).
            iterations (int, optional): number of dilatation iterations. Defaults to 1.
            dilate_black_pixels (bool, optional): whether to dilate black pixels or not
        """
        if iterations == 0:
            return None

        if dilate_black_pixels:
            self.base.asarray = 255 - np.asarray(
                cv2.dilate(
                    self.base.rev().asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )
        else:  # dilate white pixels by default
            self.base.asarray = np.asarray(
                cv2.dilate(
                    self.base.asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )

        return None

    def erode(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        erode_black_pixels: bool = True,
    ) -> None:
        """Erode the image by making the black pixels shrink in the image.
        The anti-dilatation can be parametrize thanks to the kernel and iterations
        arguments.

        Args:
            kernel (tuple, optional): kernel to erode. Defaults to (5, 5).
            iterations (int, optional): number of iterations. Defaults to 1.
            erode_black_pixels (bool, optional): whether to erode black pixels or not
        """
        if iterations == 0:
            pass

        if erode_black_pixels:
            self.base.asarray = 255 - np.asarray(
                cv2.erode(
                    self.base.rev().asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )
        else:
            self.base.asarray = np.asarray(
                cv2.erode(
                    self.base.asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )

    def add_border(self, size: int, fill_value: int = 0) -> None:
        """Add a border to the image.

        Args:
            thickness (int): border thickness.
            color (int, optional): border color. Defaults to 0.
        """
        size = int(size)
        self.base.asarray = cv2.copyMakeBorder(
            src=self.base.asarray,
            top=size,
            bottom=size,
            left=size,
            right=size,
            borderType=cv2.BORDER_CONSTANT,
            value=fill_value,
        )  # type: ignore[call-overload]

add_border(size, fill_value=0)

Add a border to the image.

Parameters:

Name Type Description Default
thickness int

border thickness.

required
color int

border color. Defaults to 0.

required
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def add_border(self, size: int, fill_value: int = 0) -> None:
    """Add a border to the image.

    Args:
        thickness (int): border thickness.
        color (int, optional): border color. Defaults to 0.
    """
    size = int(size)
    self.base.asarray = cv2.copyMakeBorder(
        src=self.base.asarray,
        top=size,
        bottom=size,
        left=size,
        right=size,
        borderType=cv2.BORDER_CONSTANT,
        value=fill_value,
    )  # type: ignore[call-overload]

blur(kernel=(5, 5), iterations=1, method='average', sigmax=0)

Blur the image

Parameters:

Name Type Description Default
kernel tuple

blur kernel size. Defaults to (5, 5).

(5, 5)
iterations int

number of iterations. Defaults to 1.

1
method str

blur method. Must be in ["average", "median", "gaussian", "bilateral"]. Defaults to "average".

'average'
sigmax float

sigmaX value for the gaussian blur. Defaults to 0.

0
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def blur(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    method: BlurMethods = "average",
    sigmax: float = 0,
) -> None:
    """Blur the image

    Args:
        kernel (tuple, optional): blur kernel size. Defaults to (5, 5).
        iterations (int, optional): number of iterations. Defaults to 1.
        method (str, optional): blur method.
            Must be in ["average", "median", "gaussian", "bilateral"].
            Defaults to "average".
        sigmax (float, optional): sigmaX value for the gaussian blur.
            Defaults to 0.
    """
    if method not in list(get_args(BlurMethods)):
        raise ValueError(f"Invalid blur method {method}. Must be in {BlurMethods}")

    for _ in range(iterations):
        if method == "average":
            self.base.asarray = cv2.blur(src=self.base.asarray, ksize=kernel)
        elif method == "median":
            self.base.asarray = cv2.medianBlur(
                src=self.base.asarray, ksize=kernel[0]
            )
        elif method == "gaussian":
            self.base.asarray = cv2.GaussianBlur(
                src=self.base.asarray, ksize=kernel, sigmaX=sigmax
            )
        elif method == "bilateral":
            self.base.asarray = cv2.bilateralFilter(
                src=self.base.asarray, d=kernel[0], sigmaColor=75, sigmaSpace=75
            )

dilate(kernel=(5, 5), iterations=1, dilate_black_pixels=True)

Dilate the image by making the black pixels expand in the image. The dilatation can be parametrize thanks to the kernel and iterations arguments.

Parameters:

Name Type Description Default
kernel tuple

kernel to dilate. Defaults to (5, 5).

(5, 5)
iterations int

number of dilatation iterations. Defaults to 1.

1
dilate_black_pixels bool

whether to dilate black pixels or not

True
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def dilate(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    dilate_black_pixels: bool = True,
) -> None:
    """Dilate the image by making the black pixels expand in the image.
    The dilatation can be parametrize thanks to the kernel and iterations
    arguments.

    Args:
        kernel (tuple, optional): kernel to dilate. Defaults to (5, 5).
        iterations (int, optional): number of dilatation iterations. Defaults to 1.
        dilate_black_pixels (bool, optional): whether to dilate black pixels or not
    """
    if iterations == 0:
        return None

    if dilate_black_pixels:
        self.base.asarray = 255 - np.asarray(
            cv2.dilate(
                self.base.rev().asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )
    else:  # dilate white pixels by default
        self.base.asarray = np.asarray(
            cv2.dilate(
                self.base.asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )

    return None

erode(kernel=(5, 5), iterations=1, erode_black_pixels=True)

Erode the image by making the black pixels shrink in the image. The anti-dilatation can be parametrize thanks to the kernel and iterations arguments.

Parameters:

Name Type Description Default
kernel tuple

kernel to erode. Defaults to (5, 5).

(5, 5)
iterations int

number of iterations. Defaults to 1.

1
erode_black_pixels bool

whether to erode black pixels or not

True
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def erode(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    erode_black_pixels: bool = True,
) -> None:
    """Erode the image by making the black pixels shrink in the image.
    The anti-dilatation can be parametrize thanks to the kernel and iterations
    arguments.

    Args:
        kernel (tuple, optional): kernel to erode. Defaults to (5, 5).
        iterations (int, optional): number of iterations. Defaults to 1.
        erode_black_pixels (bool, optional): whether to erode black pixels or not
    """
    if iterations == 0:
        pass

    if erode_black_pixels:
        self.base.asarray = 255 - np.asarray(
            cv2.erode(
                self.base.rev().asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )
    else:
        self.base.asarray = np.asarray(
            cv2.erode(
                self.base.asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )

resize(factor, interpolation=cv2.INTER_AREA, copy=False)

Resize the image to a new size using a scaling factor value that will be applied to all dimensions (width and height).

Applying this method can not result in a distorted image.

Parameters:

Name Type Description Default
factor float

factor in [0, 5] to resize the image. A value of 1 does not change the image. A value of 2 doubles the image size. A maximum value of 5 is set to avoid accidentally producing a gigantic image.

required
interpolation int

resize interpolation. Defaults to cv2.INTER_AREA.

INTER_AREA
copy bool

whether to return a new image or not.

False
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def resize(
    self, factor: float, interpolation: int = cv2.INTER_AREA, copy: bool = False
) -> Optional[Image]:
    """Resize the image to a new size using a scaling factor value that
    will be applied to all dimensions (width and height).

    Applying this method can not result in a distorted image.

    Args:
        factor (float): factor in [0, 5] to resize the image.
            A value of 1 does not change the image.
            A value of 2 doubles the image size.
            A maximum value of 5 is set to avoid accidentally producing a gigantic
            image.
        interpolation (int, optional): resize interpolation.
            Defaults to cv2.INTER_AREA.
        copy (bool, optional): whether to return a new image or not.
    """
    if factor == 1:
        return None

    if factor < 0:
        raise ValueError(
            f"The resize factor value {factor} must be stricly positive"
        )

    max_scale_pct = 5
    if factor > max_scale_pct:
        raise ValueError(f"The resize factor value {factor} is probably too big")

    width = int(self.base.width * factor)
    height = int(self.base.height * factor)
    dim = (width, height)

    return self.resize_fixed(dim=dim, interpolation=interpolation, copy=copy)

resize_fixed(dim, interpolation=cv2.INTER_AREA, copy=False)

Resize the image using a fixed dimension well defined. This function can result in a distorted image if the ratio between width and height is different in the original and the new image.

If the dim argument has a negative value in height or width, then a proportional ratio is applied based on the one of the two dimension given.

Parameters:

Name Type Description Default
dim tuple[int, int]

a tuple with two integers in the following order (width, height).

required
interpolation int

resize interpolation. Defaults to cv2.INTER_AREA.

INTER_AREA
copy bool

whether to return a new image or not.

False
Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py
def resize_fixed(
    self,
    dim: tuple[int, int],
    interpolation: int = cv2.INTER_AREA,
    copy: bool = False,
) -> Optional[Image]:
    """Resize the image using a fixed dimension well defined.
    This function can result in a distorted image if the ratio between
    width and height is different in the original and the new image.

    If the dim argument has a negative value in height or width, then
    a proportional ratio is applied based on the one of the two dimension given.

    Args:
        dim (tuple[int, int]): a tuple with two integers in the following order
            (width, height).
        interpolation (int, optional): resize interpolation.
            Defaults to cv2.INTER_AREA.
        copy (bool, optional): whether to return a new image or not.
    """
    if dim[0] < 0 and dim[1] < 0:  # check that the dim is positive
        raise ValueError(f"The dim argument {dim} has two negative values.")

    _dim = list(dim)

    # compute width or height if needed
    if _dim[1] <= 0:
        _dim[1] = int(self.base.height * (_dim[0] / self.base.width))
    if dim[0] <= 0:
        _dim[0] = int(self.base.width * (_dim[1] / self.base.height))

    result = cv2.resize(
        src=self.base.asarray, dsize=_dim, interpolation=interpolation
    )

    if copy:
        # pylint: disable=import-outside-toplevel
        from otary.image import Image

        return Image(image=result)

    self.base.asarray = result
    return None