Image

The image module provides a flexible and powerful way to work with images using a composition-based design. This allows you to dynamically build image objects with the exact functionality you need.

Core Components

The image module is built around the following core components:

Image: The main class that represents an image. It is composed of other components to provide its functionality.
Reader: Responsible for loading image data from various sources.
Drawer: Provides methods for drawing shapes and text on the image.
Transformer: Allows you to apply various transformations to the image, such as resizing, cropping, and color adjustments.

Available Modules

Below is a list of available modules and their functionalities:

Base Image module for basic image processing. It only contains very low-level, basic and generic image methods.

`BaseImage`

Base Image class

Source code in otary/image/base.py

class BaseImage:
    """Base Image class"""

    # pylint: disable=too-many-public-methods

    def __init__(self, image: NDArray) -> None:
        self.__asarray: NDArray = image.copy()

    @property
    def asarray(self) -> NDArray:
        """Array representation of the image"""
        return self.__asarray

    @asarray.setter
    def asarray(self, value: NDArray) -> None:
        """Setter for the asarray property

        Args:
            value (np.ndarray): value of the asarray to be changed
        """
        self.__asarray = value

    @property
    def asarray_binary(self) -> NDArray:
        """Returns the representation of the image as a array with value not in
        [0, 255] but in [0, 1].

        Returns:
            NDArray: an array with value in [0, 1]
        """
        return (self.asarray / 255).astype(np.float32)

    @property
    def is_gray(self) -> bool:
        """Whether the image is a grayscale image or not

        Returns:
            bool: True if image is in grayscale, 0 otherwise
        """
        return bool(len(self.asarray.shape) == 2)

    @property
    def channels(self) -> int:
        """Number of channels in the image

        Returns:
            int: number of channels
        """
        if self.is_gray:
            return 1
        return self.asarray.shape[2]

    @property
    def shape_array(self) -> tuple:
        """Returns the array shape value (height, width, channel)

        Returns:
            tuple[int]: image shape
        """
        return self.asarray.shape

    @property
    def shape_xy(self) -> tuple:
        """Returns the array shape value (width, height, channel).
        Use this if you consider the image as pixels in a X-Y 2D coordinate system.

        Returns:
            tuple[int]: image shape
        """
        return (self.width, self.height, self.channels)

    @property
    def height(self) -> int:
        """Height of the image.

        Returns:
            int: image height
        """
        return self.asarray.shape[0]

    @property
    def width(self) -> int:
        """Width of the image.

        Returns:
            int: image width
        """
        return self.asarray.shape[1]

    @property
    def area(self) -> int:
        """Area of the image

        Returns:
            int: image area
        """
        return self.width * self.height

    @property
    def center(self) -> NDArray[np.int16]:
        """Center point of the image.

        Please note that it is returned as type int because the center is
        represented as a X-Y coords of a pixel.

        Returns:
            np.ndarray: center point of the image
        """
        return (np.array([self.width, self.height]) / 2).astype(np.int16)

    @property
    def norm_side_length(self) -> int:
        """Returns the normalized side length of the image.
        This is the side length if the image had the same area but
        the shape of a square (four sides of the same length).

        Returns:
            int: normalized side length
        """
        return int(np.sqrt(self.area))

    @property
    def corners(self) -> NDArray:
        """Returns the corners in clockwise order:

        0. top left corner
        1. top right corner
        2. bottom right corner
        3. bottom left corner

        Returns:
            NDArray: array containing the corners
        """
        return np.array(
            [self.top_left, self.top_right, self.bottom_right, self.bottom_left]
        )

    @property
    def bottom_right(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([self.width - 1, self.height - 1], dtype=int)

    @property
    def bottom_left(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([0, self.height - 1], dtype=int)

    @property
    def top_right(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([self.width - 1, 0], dtype=int)

    @property
    def top_left(self) -> NDArray:
        """Get the bottom right point coordinate of the image

        Returns:
            NDArray: 2D point
        """
        return np.array([0, 0], dtype=int)

    def as_pil(self) -> ImagePIL.Image:
        """Return the image as PIL Image

        Returns:
            ImagePIL: PIL Image
        """
        return ImagePIL.fromarray(self.asarray)

    def as_bytes(self, fmt: str = "PNG") -> bytes:
        """Return the image as bytes

        Args:
            fmt (str, optional): format of the image. Defaults to "PNG".

        Returns:
            bytes: image in bytes
        """
        pil_image = self.as_pil()
        with io.BytesIO() as output:
            pil_image.save(output, format=fmt)
            return output.getvalue()

    def as_api_file_input(
        self, fmt: str = "png", filename: str = "image"
    ) -> dict[str, tuple[str, bytes, str]]:
        """Return the image as a file input for API requests.

        Args:
            fmt (str, optional): format of the image. Defaults to "png".
            filename (str, optional): name of the file. Defaults to "image".

        Returns:
            dict[str, tuple[str, bytes, str]]: dictionary with file input
                for API requests, where the key is "file" and the value is a tuple
                containing the filename, image bytes, and content type.
        """
        fmt_lower = fmt.lower()
        files = {
            "file": (
                f"{filename}.{fmt_lower}",
                self.as_bytes(fmt=fmt),
                f"image/{fmt_lower}",
            )
        }
        return files

    def as_grayscale(self) -> Self:
        """Generate the image in grayscale of shape (height, width)

        Returns:
            Self: original image in grayscale
        """
        if self.is_gray:
            return self
        self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_BGR2GRAY)
        return self

    def as_colorscale(self) -> Self:
        """Generate the image in colorscale (height, width, 3).
        This property can be useful when we wish to draw objects in a given color
        on a grayscale image.

        Returns:
            Self: original image in color
        """
        if not self.is_gray:
            return self
        self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_GRAY2BGR)
        return self

    def as_filled(self, fill_value: int | np.ndarray = 255) -> Self:
        """Returns an entirely white image of the same size as the original.
        Can be useful to get an empty representation of the same image to paint
        and draw things on an image of the same dimension.

        Args:
            fill_value (int | np.ndarray, optional): color to fill the new empty image.
                Defaults to 255 which means that is returns a entirely white image.

        Returns:
            Self: new image with a single color of the same size as original.
        """
        self.asarray = np.full(
            shape=self.shape_array, fill_value=fill_value, dtype=np.uint8
        )
        return self

    def as_white(self) -> Self:
        """Returns an entirely white image with the same dimension as the original.

        Returns:
            Self: new white image
        """
        self.as_filled(fill_value=255)
        return self

    def as_black(self) -> Self:
        """Returns an entirely black image with the same dimension as the original.

        Returns:
            Self: new black image
        """
        self.as_filled(fill_value=0)
        return self

    def rev(self) -> Self:
        """Reverse the image colors. Each pixel color value V becomes |V - 255|.

        Applied on a grayscale image the black pixel becomes white and the
        white pixels become black.
        """
        self.asarray = np.abs(self.asarray.astype(np.int16) - 255).astype(np.uint8)
        return self

    def is_equal_shape(self, other: BaseImage, consider_channel: bool = True) -> bool:
        """Check whether two images have the same shape

        Args:
            other (BaseImage): BaseImage object

        Returns:
            bool: True if the objects have the same shape, False otherwise
        """
        if consider_channel:
            shape0 = self.shape_array
            shape1 = other.shape_array
        else:
            shape0 = (
                self.shape_array
                if len(self.shape_array) == 2
                else self.shape_array[:-1]
            )
            shape1 = (
                self.shape_array
                if len(self.shape_array) == 2
                else self.shape_array[:-1]
            )
        return shape0 == shape1

    def dist_pct(self, pct: float) -> float:
        """Distance percentage that can be used an acceptable distance error margin.
        It is calculated based on the normalized side length.

        Args:
            pct (float, optional): percentage of distance error. Defaults to 0.01,
                which means 1% of the normalized side length as the
                default margin distance error.

        Returns:
            float: margin distance error
        """
        return self.norm_side_length * pct

`area` `property`

Area of the image

Returns:

Name	Type	Description
`int`	`int`	image area

`asarray` `property` `writable`

Array representation of the image

`asarray_binary` `property`

Returns the representation of the image as a array with value not in [0, 255] but in [0, 1].

Returns:

Name	Type	Description
`NDArray`	`NDArray`	an array with value in [0, 1]

`bottom_left` `property`

Get the bottom right point coordinate of the image

Returns:

Name	Type	Description
`NDArray`	`NDArray`	2D point

`bottom_right` `property`

Get the bottom right point coordinate of the image

Returns:

Name	Type	Description
`NDArray`	`NDArray`	2D point

`center` `property`

Center point of the image.

Please note that it is returned as type int because the center is represented as a X-Y coords of a pixel.

Returns:

Type	Description
`NDArray[int16]`	np.ndarray: center point of the image

`channels` `property`

Number of channels in the image

Returns:

Name	Type	Description
`int`	`int`	number of channels

`corners` `property`

Returns the corners in clockwise order:

top left corner
top right corner
bottom right corner
bottom left corner

Returns:

Name	Type	Description
`NDArray`	`NDArray`	array containing the corners

`height` `property`

Height of the image.

Returns:

Name	Type	Description
`int`	`int`	image height

`is_gray` `property`

Whether the image is a grayscale image or not

Returns:

Name	Type	Description
`bool`	`bool`	True if image is in grayscale, 0 otherwise

`norm_side_length` `property`

Returns the normalized side length of the image. This is the side length if the image had the same area but the shape of a square (four sides of the same length).

Returns:

Name	Type	Description
`int`	`int`	normalized side length

`shape_array` `property`

Returns the array shape value (height, width, channel)

Returns:

Type	Description
`tuple`	tuple[int]: image shape

`shape_xy` `property`

Returns the array shape value (width, height, channel). Use this if you consider the image as pixels in a X-Y 2D coordinate system.

Returns:

Type	Description
`tuple`	tuple[int]: image shape

`top_left` `property`

Get the bottom right point coordinate of the image

Returns:

Name	Type	Description
`NDArray`	`NDArray`	2D point

`top_right` `property`

Get the bottom right point coordinate of the image

Returns:

Name	Type	Description
`NDArray`	`NDArray`	2D point

`width` `property`

Width of the image.

Returns:

Name	Type	Description
`int`	`int`	image width

`as_api_file_input(fmt='png', filename='image')`

Return the image as a file input for API requests.

Parameters:

Name	Type	Description	Default
`fmt`	`str`	format of the image. Defaults to "png".	`'png'`
`filename`	`str`	name of the file. Defaults to "image".	`'image'`

Returns:

Type	Description
`dict[str, tuple[str, bytes, str]]`	dict[str, tuple[str, bytes, str]]: dictionary with file input for API requests, where the key is "file" and the value is a tuple containing the filename, image bytes, and content type.

Source code in otary/image/base.py

def as_api_file_input(
    self, fmt: str = "png", filename: str = "image"
) -> dict[str, tuple[str, bytes, str]]:
    """Return the image as a file input for API requests.

    Args:
        fmt (str, optional): format of the image. Defaults to "png".
        filename (str, optional): name of the file. Defaults to "image".

    Returns:
        dict[str, tuple[str, bytes, str]]: dictionary with file input
            for API requests, where the key is "file" and the value is a tuple
            containing the filename, image bytes, and content type.
    """
    fmt_lower = fmt.lower()
    files = {
        "file": (
            f"{filename}.{fmt_lower}",
            self.as_bytes(fmt=fmt),
            f"image/{fmt_lower}",
        )
    }
    return files

`as_black()`

Returns an entirely black image with the same dimension as the original.

Returns:

Name	Type	Description
`Self`	`Self`	new black image

Source code in otary/image/base.py

def as_black(self) -> Self:
    """Returns an entirely black image with the same dimension as the original.

    Returns:
        Self: new black image
    """
    self.as_filled(fill_value=0)
    return self

`as_bytes(fmt='PNG')`

Return the image as bytes

Parameters:

Name	Type	Description	Default
`fmt`	`str`	format of the image. Defaults to "PNG".	`'PNG'`

Returns:

Name	Type	Description
`bytes`	`bytes`	image in bytes

Source code in otary/image/base.py

def as_bytes(self, fmt: str = "PNG") -> bytes:
    """Return the image as bytes

    Args:
        fmt (str, optional): format of the image. Defaults to "PNG".

    Returns:
        bytes: image in bytes
    """
    pil_image = self.as_pil()
    with io.BytesIO() as output:
        pil_image.save(output, format=fmt)
        return output.getvalue()

`as_colorscale()`

Generate the image in colorscale (height, width, 3). This property can be useful when we wish to draw objects in a given color on a grayscale image.

Returns:

Name	Type	Description
`Self`	`Self`	original image in color

Source code in otary/image/base.py

def as_colorscale(self) -> Self:
    """Generate the image in colorscale (height, width, 3).
    This property can be useful when we wish to draw objects in a given color
    on a grayscale image.

    Returns:
        Self: original image in color
    """
    if not self.is_gray:
        return self
    self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_GRAY2BGR)
    return self

`as_filled(fill_value=255)`

Returns an entirely white image of the same size as the original. Can be useful to get an empty representation of the same image to paint and draw things on an image of the same dimension.

Parameters:

Name	Type	Description	Default
`fill_value`	`int \| ndarray`	color to fill the new empty image. Defaults to 255 which means that is returns a entirely white image.	`255`

Returns:

Name	Type	Description
`Self`	`Self`	new image with a single color of the same size as original.

Source code in otary/image/base.py

def as_filled(self, fill_value: int | np.ndarray = 255) -> Self:
    """Returns an entirely white image of the same size as the original.
    Can be useful to get an empty representation of the same image to paint
    and draw things on an image of the same dimension.

    Args:
        fill_value (int | np.ndarray, optional): color to fill the new empty image.
            Defaults to 255 which means that is returns a entirely white image.

    Returns:
        Self: new image with a single color of the same size as original.
    """
    self.asarray = np.full(
        shape=self.shape_array, fill_value=fill_value, dtype=np.uint8
    )
    return self

`as_grayscale()`

Generate the image in grayscale of shape (height, width)

Returns:

Name	Type	Description
`Self`	`Self`	original image in grayscale

Source code in otary/image/base.py

def as_grayscale(self) -> Self:
    """Generate the image in grayscale of shape (height, width)

    Returns:
        Self: original image in grayscale
    """
    if self.is_gray:
        return self
    self.asarray = cv2.cvtColor(self.asarray, cv2.COLOR_BGR2GRAY)
    return self

`as_pil()`

Return the image as PIL Image

Returns:

Name	Type	Description
`ImagePIL`	`Image`	PIL Image

Source code in otary/image/base.py

def as_pil(self) -> ImagePIL.Image:
    """Return the image as PIL Image

    Returns:
        ImagePIL: PIL Image
    """
    return ImagePIL.fromarray(self.asarray)

`as_white()`

Returns an entirely white image with the same dimension as the original.

Returns:

Name	Type	Description
`Self`	`Self`	new white image

Source code in otary/image/base.py

def as_white(self) -> Self:
    """Returns an entirely white image with the same dimension as the original.

    Returns:
        Self: new white image
    """
    self.as_filled(fill_value=255)
    return self

`dist_pct(pct)`

Distance percentage that can be used an acceptable distance error margin. It is calculated based on the normalized side length.

Parameters:

Name	Type	Description	Default
`pct`	`float`	percentage of distance error. Defaults to 0.01, which means 1% of the normalized side length as the default margin distance error.	required

Returns:

Name	Type	Description
`float`	`float`	margin distance error

Source code in otary/image/base.py

def dist_pct(self, pct: float) -> float:
    """Distance percentage that can be used an acceptable distance error margin.
    It is calculated based on the normalized side length.

    Args:
        pct (float, optional): percentage of distance error. Defaults to 0.01,
            which means 1% of the normalized side length as the
            default margin distance error.

    Returns:
        float: margin distance error
    """
    return self.norm_side_length * pct

`is_equal_shape(other, consider_channel=True)`

Check whether two images have the same shape

Parameters:

Name	Type	Description	Default
`other`	`BaseImage`	BaseImage object	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the objects have the same shape, False otherwise

Source code in otary/image/base.py

def is_equal_shape(self, other: BaseImage, consider_channel: bool = True) -> bool:
    """Check whether two images have the same shape

    Args:
        other (BaseImage): BaseImage object

    Returns:
        bool: True if the objects have the same shape, False otherwise
    """
    if consider_channel:
        shape0 = self.shape_array
        shape1 = other.shape_array
    else:
        shape0 = (
            self.shape_array
            if len(self.shape_array) == 2
            else self.shape_array[:-1]
        )
        shape1 = (
            self.shape_array
            if len(self.shape_array) == 2
            else self.shape_array[:-1]
        )
    return shape0 == shape1

`rev()`

Reverse the image colors. Each pixel color value V becomes |V - 255|.

Applied on a grayscale image the black pixel becomes white and the white pixels become black.

Source code in otary/image/base.py

def rev(self) -> Self:
    """Reverse the image colors. Each pixel color value V becomes |V - 255|.

    Applied on a grayscale image the black pixel becomes white and the
    white pixels become black.
    """
    self.asarray = np.abs(self.asarray.astype(np.int16) - 255).astype(np.uint8)
    return self

Image Drawer module. It only contains methods to draw objects in images.

`DrawerImage`

Image Drawer class to draw objects on a given image

Source code in otary/image/components/drawer/drawer.py

class DrawerImage:
    """Image Drawer class to draw objects on a given image"""

    def __init__(self, base: BaseImage):
        self.base = base

    def _pre_draw(self, n_objects: int, render: Render) -> NDArray:
        render.adjust_colors_length(n=n_objects)
        return self.base.as_colorscale().asarray

    def draw_circles(
        self,
        circles: Sequence[geo.Circle],
        render: CirclesRender = CirclesRender(),
    ) -> None:
        """Draw circles in the image

        Args:
            circles (Sequence[Circle]): list of Circle geometry objects.
            render (CirclesRender): circle renderer
        """
        im_array = self._pre_draw(n_objects=len(circles), render=render)
        for circle, color in zip(circles, render.colors_processed):
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=circle.center.astype(int),
                radius=int(circle.radius),
                color=color,
                thickness=render.thickness if not render.is_filled else -1,
                lineType=render.line_type,
            )
            if render.is_draw_center_point_enabled:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=circle.center.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_ellipses(
        self,
        ellipses: Sequence[geo.Ellipse],
        render: EllipsesRender = EllipsesRender(),
    ) -> None:
        """Draw ellipses in the image

        Args:
            ellipses (Sequence[Ellipse]): list of Ellipse geometry objects.
            render (EllipseRender): renderer (uses EllipseRender for color/thickness)
        """
        im_array = self._pre_draw(n_objects=len(ellipses), render=render)
        for ellipse, color in zip(ellipses, render.colors_processed):
            axes = (int(ellipse.semi_major_axis), int(ellipse.semi_minor_axis))
            cv2.ellipse(  # type: ignore[call-overload]
                img=im_array,
                center=ellipse.centroid.astype(int),
                axes=axes,
                angle=ellipse.angle(degree=True),
                startAngle=0,
                endAngle=360,
                color=color,
                thickness=render.thickness if not render.is_filled else -1,
                lineType=render.line_type,
            )
            if render.is_draw_center_point_enabled:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=ellipse.centroid.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
            if render.is_draw_focis_enabled:
                for foci in [ellipse.foci1, ellipse.foci2]:
                    cv2.circle(  # type: ignore[call-overload]
                        img=im_array,
                        center=foci.astype(int),
                        radius=1,
                        color=color,
                        thickness=render.thickness,
                        lineType=render.line_type,
                    )
        self.base.asarray = im_array

    def draw_points(
        self,
        points: NDArray | Sequence[geo.Point],
        render: PointsRender = PointsRender(),
    ) -> None:
        """Draw points in the image

        Args:
            points (NDArray): list of points. It must be of shape (n, 2). This
                means n points of shape 2 (x and y coordinates).
            render (PointsRender): point renderer
        """
        _points = prep_obj_draw(objects=points, _type=geo.Point)
        im_array = self._pre_draw(n_objects=len(_points), render=render)
        for point, color in zip(_points, render.colors_processed):
            cv2.circle(
                img=im_array,
                center=point,
                radius=render.radius,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
        self.base.asarray = im_array

    def draw_segments(
        self,
        segments: NDArray | Sequence[geo.Segment],
        render: SegmentsRender = SegmentsRender(),
    ) -> None:
        """Draw segments in the image. It can be arrowed segments (vectors) too.

        Args:
            segments (NDArray): list of segments. Can be a numpy array of shape
                (n, 2, 2) which means n array of shape (2, 2) that define a segment
                by two 2D points.
            render (SegmentsRender): segment renderer
        """
        _segments = prep_obj_draw(objects=segments, _type=geo.Segment)
        im_array = self._pre_draw(n_objects=len(segments), render=render)
        if render.as_vectors:
            for segment, color in zip(_segments, render.colors_processed):
                cv2.arrowedLine(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    line_type=render.line_type,
                    tipLength=render.tip_length / geo.Segment(segment).length,
                )
        else:
            for segment, color in zip(_segments, render.colors_processed):
                cv2.line(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_splines(
        self,
        splines: Sequence[geo.LinearSpline],
        render: LinearSplinesRender = LinearSplinesRender(),
    ) -> None:
        """Draw linear splines in the image.

        Args:
            splines (Sequence[geo.LinearSpline]): linear splines to draw.
            render (LinearSplinesRender, optional): linear splines render.
                Defaults to LinearSplinesRender().
        """
        _splines = prep_obj_draw(objects=splines, _type=geo.LinearSpline)
        im_array = self._pre_draw(n_objects=len(_splines), render=render)
        for spline, color in zip(_splines, render.colors_processed):

            if render.as_vectors:
                cv2.polylines(
                    img=im_array,
                    pts=[spline[:-1]],
                    isClosed=False,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )

                # Draw the last edge as a vector
                ix = int(len(spline) * (1 - render.pct_ix_head))
                ix = ix - 1 if ix == len(spline) - 1 else ix
                segment = [spline[ix], spline[-1]]
                cv2.arrowedLine(
                    img=im_array,
                    pt1=segment[0],
                    pt2=segment[1],
                    color=color,
                    thickness=render.thickness,
                    tipLength=render.tip_length / geo.Segment(segment).length,
                )

            else:
                cv2.polylines(
                    img=im_array,
                    pts=[spline],
                    isClosed=False,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )

    def draw_polygons(
        self, polygons: Sequence[geo.Polygon], render: PolygonsRender = PolygonsRender()
    ) -> None:
        """Draw polygons in the image

        Args:
            polygons (Sequence[Polygon]): list of Polygon objects
            render (PolygonsRender): PolygonRender object
        """
        _polygons = prep_obj_draw(objects=polygons, _type=geo.Polygon)
        im_array = self._pre_draw(n_objects=len(_polygons), render=render)
        for polygon, color in zip(_polygons, render.colors_processed):
            if render.is_filled:
                cv2.fillPoly(
                    img=im_array,
                    pts=[polygon],
                    color=color,
                    lineType=render.line_type,
                )
            else:
                cv2.polylines(
                    img=im_array,
                    pts=[polygon],
                    isClosed=True,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
        self.base.asarray = im_array

    def draw_ocr_outputs(
        self,
        ocr_outputs: Sequence[OcrSingleOutput],
        render: OcrSingleOutputRender = OcrSingleOutputRender(),
    ) -> None:
        """Return the image with the bounding boxes displayed from a list of OCR
        single output. It allows you to show bounding boxes that can have an angle,
        not necessarily vertical or horizontal.

        Args:
            ocr_outputs (Sequence[OcrSingleOutput]): list of OcrSingleOutput objects
            render (OcrSingleOutputRender): OcrSingleOutputRender object
        """
        im_array = self._pre_draw(n_objects=len(ocr_outputs), render=render)
        for ocrso, color in zip(ocr_outputs, render.colors_processed):
            if not isinstance(ocrso, OcrSingleOutput) or ocrso.bbox is None:
                # warnings.warn(
                #     f"Object {ocrso} is not an OcrSingleOutput or has no bbox. "
                #     "Skipping it."
                # )
                continue
            cnt = [ocrso.bbox.asarray.reshape((-1, 1, 2)).astype(np.int32)]
            im_array = cv2.drawContours(
                image=im_array,
                contours=cnt,
                contourIdx=-1,
                thickness=render.thickness,
                color=color,
                lineType=render.line_type,
            )
        self.base.asarray = im_array

`draw_circles(circles, render=CirclesRender())`

Draw circles in the image

Parameters:

Name	Type	Description	Default
`circles`	`Sequence[Circle]`	list of Circle geometry objects.	required
`render`	`CirclesRender`	circle renderer	`CirclesRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_circles(
    self,
    circles: Sequence[geo.Circle],
    render: CirclesRender = CirclesRender(),
) -> None:
    """Draw circles in the image

    Args:
        circles (Sequence[Circle]): list of Circle geometry objects.
        render (CirclesRender): circle renderer
    """
    im_array = self._pre_draw(n_objects=len(circles), render=render)
    for circle, color in zip(circles, render.colors_processed):
        cv2.circle(  # type: ignore[call-overload]
            img=im_array,
            center=circle.center.astype(int),
            radius=int(circle.radius),
            color=color,
            thickness=render.thickness if not render.is_filled else -1,
            lineType=render.line_type,
        )
        if render.is_draw_center_point_enabled:
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=circle.center.astype(int),
                radius=1,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

`draw_ellipses(ellipses, render=EllipsesRender())`

Draw ellipses in the image

Parameters:

Name	Type	Description	Default
`ellipses`	`Sequence[Ellipse]`	list of Ellipse geometry objects.	required
`render`	`EllipseRender`	renderer (uses EllipseRender for color/thickness)	`EllipsesRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_ellipses(
    self,
    ellipses: Sequence[geo.Ellipse],
    render: EllipsesRender = EllipsesRender(),
) -> None:
    """Draw ellipses in the image

    Args:
        ellipses (Sequence[Ellipse]): list of Ellipse geometry objects.
        render (EllipseRender): renderer (uses EllipseRender for color/thickness)
    """
    im_array = self._pre_draw(n_objects=len(ellipses), render=render)
    for ellipse, color in zip(ellipses, render.colors_processed):
        axes = (int(ellipse.semi_major_axis), int(ellipse.semi_minor_axis))
        cv2.ellipse(  # type: ignore[call-overload]
            img=im_array,
            center=ellipse.centroid.astype(int),
            axes=axes,
            angle=ellipse.angle(degree=True),
            startAngle=0,
            endAngle=360,
            color=color,
            thickness=render.thickness if not render.is_filled else -1,
            lineType=render.line_type,
        )
        if render.is_draw_center_point_enabled:
            cv2.circle(  # type: ignore[call-overload]
                img=im_array,
                center=ellipse.centroid.astype(int),
                radius=1,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
        if render.is_draw_focis_enabled:
            for foci in [ellipse.foci1, ellipse.foci2]:
                cv2.circle(  # type: ignore[call-overload]
                    img=im_array,
                    center=foci.astype(int),
                    radius=1,
                    color=color,
                    thickness=render.thickness,
                    lineType=render.line_type,
                )
    self.base.asarray = im_array

`draw_ocr_outputs(ocr_outputs, render=OcrSingleOutputRender())`

Return the image with the bounding boxes displayed from a list of OCR single output. It allows you to show bounding boxes that can have an angle, not necessarily vertical or horizontal.

Parameters:

Name	Type	Description	Default
`ocr_outputs`	`Sequence[OcrSingleOutput]`	list of OcrSingleOutput objects	required
`render`	`OcrSingleOutputRender`	OcrSingleOutputRender object	`OcrSingleOutputRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_ocr_outputs(
    self,
    ocr_outputs: Sequence[OcrSingleOutput],
    render: OcrSingleOutputRender = OcrSingleOutputRender(),
) -> None:
    """Return the image with the bounding boxes displayed from a list of OCR
    single output. It allows you to show bounding boxes that can have an angle,
    not necessarily vertical or horizontal.

    Args:
        ocr_outputs (Sequence[OcrSingleOutput]): list of OcrSingleOutput objects
        render (OcrSingleOutputRender): OcrSingleOutputRender object
    """
    im_array = self._pre_draw(n_objects=len(ocr_outputs), render=render)
    for ocrso, color in zip(ocr_outputs, render.colors_processed):
        if not isinstance(ocrso, OcrSingleOutput) or ocrso.bbox is None:
            # warnings.warn(
            #     f"Object {ocrso} is not an OcrSingleOutput or has no bbox. "
            #     "Skipping it."
            # )
            continue
        cnt = [ocrso.bbox.asarray.reshape((-1, 1, 2)).astype(np.int32)]
        im_array = cv2.drawContours(
            image=im_array,
            contours=cnt,
            contourIdx=-1,
            thickness=render.thickness,
            color=color,
            lineType=render.line_type,
        )
    self.base.asarray = im_array

`draw_points(points, render=PointsRender())`

Draw points in the image

Parameters:

Name	Type	Description	Default
`points`	`NDArray`	list of points. It must be of shape (n, 2). This means n points of shape 2 (x and y coordinates).	required
`render`	`PointsRender`	point renderer	`PointsRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_points(
    self,
    points: NDArray | Sequence[geo.Point],
    render: PointsRender = PointsRender(),
) -> None:
    """Draw points in the image

    Args:
        points (NDArray): list of points. It must be of shape (n, 2). This
            means n points of shape 2 (x and y coordinates).
        render (PointsRender): point renderer
    """
    _points = prep_obj_draw(objects=points, _type=geo.Point)
    im_array = self._pre_draw(n_objects=len(_points), render=render)
    for point, color in zip(_points, render.colors_processed):
        cv2.circle(
            img=im_array,
            center=point,
            radius=render.radius,
            color=color,
            thickness=render.thickness,
            lineType=render.line_type,
        )
    self.base.asarray = im_array

`draw_polygons(polygons, render=PolygonsRender())`

Draw polygons in the image

Parameters:

Name	Type	Description	Default
`polygons`	`Sequence[Polygon]`	list of Polygon objects	required
`render`	`PolygonsRender`	PolygonRender object	`PolygonsRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_polygons(
    self, polygons: Sequence[geo.Polygon], render: PolygonsRender = PolygonsRender()
) -> None:
    """Draw polygons in the image

    Args:
        polygons (Sequence[Polygon]): list of Polygon objects
        render (PolygonsRender): PolygonRender object
    """
    _polygons = prep_obj_draw(objects=polygons, _type=geo.Polygon)
    im_array = self._pre_draw(n_objects=len(_polygons), render=render)
    for polygon, color in zip(_polygons, render.colors_processed):
        if render.is_filled:
            cv2.fillPoly(
                img=im_array,
                pts=[polygon],
                color=color,
                lineType=render.line_type,
            )
        else:
            cv2.polylines(
                img=im_array,
                pts=[polygon],
                isClosed=True,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

`draw_segments(segments, render=SegmentsRender())`

Draw segments in the image. It can be arrowed segments (vectors) too.

Parameters:

Name	Type	Description	Default
`segments`	`NDArray`	list of segments. Can be a numpy array of shape (n, 2, 2) which means n array of shape (2, 2) that define a segment by two 2D points.	required
`render`	`SegmentsRender`	segment renderer	`SegmentsRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_segments(
    self,
    segments: NDArray | Sequence[geo.Segment],
    render: SegmentsRender = SegmentsRender(),
) -> None:
    """Draw segments in the image. It can be arrowed segments (vectors) too.

    Args:
        segments (NDArray): list of segments. Can be a numpy array of shape
            (n, 2, 2) which means n array of shape (2, 2) that define a segment
            by two 2D points.
        render (SegmentsRender): segment renderer
    """
    _segments = prep_obj_draw(objects=segments, _type=geo.Segment)
    im_array = self._pre_draw(n_objects=len(segments), render=render)
    if render.as_vectors:
        for segment, color in zip(_segments, render.colors_processed):
            cv2.arrowedLine(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                line_type=render.line_type,
                tipLength=render.tip_length / geo.Segment(segment).length,
            )
    else:
        for segment, color in zip(_segments, render.colors_processed):
            cv2.line(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )
    self.base.asarray = im_array

`draw_splines(splines, render=LinearSplinesRender())`

Draw linear splines in the image.

Parameters:

Name	Type	Description	Default
`splines`	`Sequence[LinearSpline]`	linear splines to draw.	required
`render`	`LinearSplinesRender`	linear splines render. Defaults to LinearSplinesRender().	`LinearSplinesRender()`

Source code in otary/image/components/drawer/drawer.py

def draw_splines(
    self,
    splines: Sequence[geo.LinearSpline],
    render: LinearSplinesRender = LinearSplinesRender(),
) -> None:
    """Draw linear splines in the image.

    Args:
        splines (Sequence[geo.LinearSpline]): linear splines to draw.
        render (LinearSplinesRender, optional): linear splines render.
            Defaults to LinearSplinesRender().
    """
    _splines = prep_obj_draw(objects=splines, _type=geo.LinearSpline)
    im_array = self._pre_draw(n_objects=len(_splines), render=render)
    for spline, color in zip(_splines, render.colors_processed):

        if render.as_vectors:
            cv2.polylines(
                img=im_array,
                pts=[spline[:-1]],
                isClosed=False,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )

            # Draw the last edge as a vector
            ix = int(len(spline) * (1 - render.pct_ix_head))
            ix = ix - 1 if ix == len(spline) - 1 else ix
            segment = [spline[ix], spline[-1]]
            cv2.arrowedLine(
                img=im_array,
                pt1=segment[0],
                pt2=segment[1],
                color=color,
                thickness=render.thickness,
                tipLength=render.tip_length / geo.Segment(segment).length,
            )

        else:
            cv2.polylines(
                img=im_array,
                pts=[spline],
                isClosed=False,
                color=color,
                thickness=render.thickness,
                lineType=render.line_type,
            )

Image Reader module

`ReaderImage`

ReaderImage class to facilitate the reading of images from different formats such as JPG, PNG, and PDF. It provides methods to load images from file paths.

Source code in otary/image/components/io/reader.py

class ReaderImage:
    """ReaderImage class to facilitate the reading of images from different formats
    such as JPG, PNG, and PDF. It provides methods to load images from file paths.
    """

    @staticmethod
    def from_fillvalue(value: int = 255, shape: tuple = (128, 128, 3)) -> NDArray:
        """Create an array image from a single value

        Args:
            value (int, optional): value in [0, 255]. Defaults to 255.
            shape (tuple, optional): image shape. If it has three elements then
                the last one must be a 3 for a coloscale image.
                Defaults to (128, 128, 3).

        Returns:
            NDArray: array with a single value
        """
        if value < 0 or value > 255:
            raise ValueError(f"The value {value} must be in [0, 255]")
        if len(shape) < 2 or len(shape) >= 4:
            raise ValueError(f"The shape {shape} must be of length 2 or 3")
        if len(shape) == 3 and shape[-1] != 3:
            raise ValueError(f"The last value of {shape} must be 3")
        return np.full(shape=shape, fill_value=value, dtype=np.uint8)

    @staticmethod
    def from_jpg(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image object from a JPG or JPEG file path

        Args:
            filepath (str): path to the JPG image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: numpy array
        """
        arr = np.asarray(cv2.imread(filepath, 1 - int(as_grayscale)))
        original_height, original_width = arr.shape[:2]

        if resolution is not None:
            # Calculate the aspect ratio
            aspect_ratio = original_width / original_height
            new_width = int(resolution * aspect_ratio)
            arr = cv2.resize(src=arr, dsize=(new_width, resolution))

        return arr

    @staticmethod
    def from_png(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image array from a PNG file image path

        Args:
            filepath (str): path to the image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: Image as array
        """
        return ReaderImage.from_jpg(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )

    @staticmethod
    def from_pdf(
        filepath: str,
        as_grayscale: bool = False,
        page_nb: int = 0,
        resolution: Optional[int] = None,
        clip_pct: Optional[pymupdf.Rect] = None,
    ) -> NDArray:
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        """Create an Image array from a pdf file.

        Args:
            filepath (str): path to the pdf file.
            as_grayscale (bool, optional): whether to turn the image in grayscale.
                Defaults to False.
            page_nb (int, optional): as we load only one image we have to select the
                page that will be turned into an image. Defaults to 0.
            resolution (Optional[int], optional): resolution of the loaded image.
                Defaults to 3508.
            clip_pct (pymmupdf.Rect, optional): optional zone to extract in the image.
                This is particularly useful to load into memory only a small part of the
                image without loading everything into memory. This reduces considerably
                the image loading time especially combined with a high resolution.

        Returns:
            NDArray: Image as array
        """
        arr = read_pdf_to_images(
            filepath_or_stream=filepath,
            resolution=resolution,
            page_nb=page_nb,
            clip_pct=clip_pct,
        )[0]

        if as_grayscale:
            arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

        return arr

    @staticmethod
    def from_file(
        filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
    ) -> NDArray:
        """Create a Image array from a file image path

        Args:
            filepath (str): path to the image file
            as_grayscale (bool, optional): turn the image in grayscale.
                Defaults to False.

        Returns:
            NDArray: Image as array
        """
        valid_format = ["png", "jpg", "jpeg", "pdf"]

        file_format = filepath.split(".")[-1]

        if file_format in ["png"]:
            return ReaderImage.from_png(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )
        if file_format in ["jpg", "jpeg"]:
            return ReaderImage.from_jpg(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )
        if file_format in ["pdf"]:
            return ReaderImage.from_pdf(
                filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
            )

        raise ValueError(f"The filepath is not in any valid format {valid_format}")

`from_file(filepath, as_grayscale=False, resolution=None)` `staticmethod`

Create a Image array from a file image path

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to the image file	required
`as_grayscale`	`bool`	turn the image in grayscale. Defaults to False.	`False`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	Image as array

Source code in otary/image/components/io/reader.py

@staticmethod
def from_file(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image array from a file image path

    Args:
        filepath (str): path to the image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: Image as array
    """
    valid_format = ["png", "jpg", "jpeg", "pdf"]

    file_format = filepath.split(".")[-1]

    if file_format in ["png"]:
        return ReaderImage.from_png(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )
    if file_format in ["jpg", "jpeg"]:
        return ReaderImage.from_jpg(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )
    if file_format in ["pdf"]:
        return ReaderImage.from_pdf(
            filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
        )

    raise ValueError(f"The filepath is not in any valid format {valid_format}")

`from_fillvalue(value=255, shape=(128, 128, 3))` `staticmethod`

Create an array image from a single value

Parameters:

Name	Type	Description	Default
`value`	`int`	value in [0, 255]. Defaults to 255.	`255`
`shape`	`tuple`	image shape. If it has three elements then the last one must be a 3 for a coloscale image. Defaults to (128, 128, 3).	`(128, 128, 3)`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	array with a single value

Source code in otary/image/components/io/reader.py

@staticmethod
def from_fillvalue(value: int = 255, shape: tuple = (128, 128, 3)) -> NDArray:
    """Create an array image from a single value

    Args:
        value (int, optional): value in [0, 255]. Defaults to 255.
        shape (tuple, optional): image shape. If it has three elements then
            the last one must be a 3 for a coloscale image.
            Defaults to (128, 128, 3).

    Returns:
        NDArray: array with a single value
    """
    if value < 0 or value > 255:
        raise ValueError(f"The value {value} must be in [0, 255]")
    if len(shape) < 2 or len(shape) >= 4:
        raise ValueError(f"The shape {shape} must be of length 2 or 3")
    if len(shape) == 3 and shape[-1] != 3:
        raise ValueError(f"The last value of {shape} must be 3")
    return np.full(shape=shape, fill_value=value, dtype=np.uint8)

`from_jpg(filepath, as_grayscale=False, resolution=None)` `staticmethod`

Create a Image object from a JPG or JPEG file path

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to the JPG image file	required
`as_grayscale`	`bool`	turn the image in grayscale. Defaults to False.	`False`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	numpy array

Source code in otary/image/components/io/reader.py

@staticmethod
def from_jpg(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image object from a JPG or JPEG file path

    Args:
        filepath (str): path to the JPG image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: numpy array
    """
    arr = np.asarray(cv2.imread(filepath, 1 - int(as_grayscale)))
    original_height, original_width = arr.shape[:2]

    if resolution is not None:
        # Calculate the aspect ratio
        aspect_ratio = original_width / original_height
        new_width = int(resolution * aspect_ratio)
        arr = cv2.resize(src=arr, dsize=(new_width, resolution))

    return arr

`from_pdf(filepath, as_grayscale=False, page_nb=0, resolution=None, clip_pct=None)` `staticmethod`

Create an Image array from a pdf file.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to the pdf file.	required
`as_grayscale`	`bool`	whether to turn the image in grayscale. Defaults to False.	`False`
`page_nb`	`int`	as we load only one image we have to select the page that will be turned into an image. Defaults to 0.	`0`
`resolution`	`Optional[int]`	resolution of the loaded image. Defaults to 3508.	`None`
`clip_pct`	`Rect`	optional zone to extract in the image. This is particularly useful to load into memory only a small part of the image without loading everything into memory. This reduces considerably the image loading time especially combined with a high resolution.	`None`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	Image as array

Source code in otary/image/components/io/reader.py

@staticmethod
def from_pdf(
    filepath: str,
    as_grayscale: bool = False,
    page_nb: int = 0,
    resolution: Optional[int] = None,
    clip_pct: Optional[pymupdf.Rect] = None,
) -> NDArray:
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    """Create an Image array from a pdf file.

    Args:
        filepath (str): path to the pdf file.
        as_grayscale (bool, optional): whether to turn the image in grayscale.
            Defaults to False.
        page_nb (int, optional): as we load only one image we have to select the
            page that will be turned into an image. Defaults to 0.
        resolution (Optional[int], optional): resolution of the loaded image.
            Defaults to 3508.
        clip_pct (pymmupdf.Rect, optional): optional zone to extract in the image.
            This is particularly useful to load into memory only a small part of the
            image without loading everything into memory. This reduces considerably
            the image loading time especially combined with a high resolution.

    Returns:
        NDArray: Image as array
    """
    arr = read_pdf_to_images(
        filepath_or_stream=filepath,
        resolution=resolution,
        page_nb=page_nb,
        clip_pct=clip_pct,
    )[0]

    if as_grayscale:
        arr = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)

    return arr

`from_png(filepath, as_grayscale=False, resolution=None)` `staticmethod`

Create a Image array from a PNG file image path

Parameters:

Name	Type	Description	Default
`filepath`	`str`	path to the image file	required
`as_grayscale`	`bool`	turn the image in grayscale. Defaults to False.	`False`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	Image as array

Source code in otary/image/components/io/reader.py

@staticmethod
def from_png(
    filepath: str, as_grayscale: bool = False, resolution: Optional[int] = None
) -> NDArray:
    """Create a Image array from a PNG file image path

    Args:
        filepath (str): path to the image file
        as_grayscale (bool, optional): turn the image in grayscale.
            Defaults to False.

    Returns:
        NDArray: Image as array
    """
    return ReaderImage.from_jpg(
        filepath=filepath, as_grayscale=as_grayscale, resolution=resolution
    )

WriterImage module

`WriterImage`

WriterImage class that provide methods to save and show the image

Source code in otary/image/components/io/writer.py

class WriterImage:
    """WriterImage class that provide methods to save and show the image"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def show(
        self,
        title: Optional[str] = None,
        figsize: tuple[float, float] = (8.0, 6.0),
        color_conversion: Optional[int] = cv2.COLOR_BGR2RGB,
        save_filepath: Optional[str] = None,
    ) -> None:
        """Show the image

        Args:
            title (Optional[str], optional): title of the image. Defaults to None.
            figsize (tuple[float, float], optional): size of the figure.
                Defaults to (8.0, 6.0).
            color_conversion (int, optional): color conversion parameter.
                Defaults to cv2.COLOR_BGR2RGB.
            save_filepath (Optional[str], optional): save the image if needed.
                Defaults to None.
        """
        # Converts from one colour space to the other. this is needed as RGB
        # is not the default colour space for OpenCV
        if color_conversion is not None:
            im = cv2.cvtColor(self.base.asarray, color_conversion)
        else:
            im = self.base.asarray

        plt.figure(figsize=figsize)

        # Show the image
        plt.imshow(im)

        # remove the axis / ticks for a clean looking image
        plt.xticks([])
        plt.yticks([])

        # if a title is provided, show it
        if title is not None:
            plt.title(title)

        if save_filepath is not None:
            plt.savefig(save_filepath)

        plt.show()

    def save(self, save_filepath: str) -> None:
        """Save the image in a local file

        Args:
            save_filepath (str): path to the file
        """
        self.show(save_filepath=save_filepath)

`save(save_filepath)`

Save the image in a local file

Parameters:

Name	Type	Description	Default
`save_filepath`	`str`	path to the file	required

Source code in otary/image/components/io/writer.py

def save(self, save_filepath: str) -> None:
    """Save the image in a local file

    Args:
        save_filepath (str): path to the file
    """
    self.show(save_filepath=save_filepath)

`show(title=None, figsize=(8.0, 6.0), color_conversion=cv2.COLOR_BGR2RGB, save_filepath=None)`

Show the image

Parameters:

Name	Type	Description	Default
`title`	`Optional[str]`	title of the image. Defaults to None.	`None`
`figsize`	`tuple[float, float]`	size of the figure. Defaults to (8.0, 6.0).	`(8.0, 6.0)`
`color_conversion`	`int`	color conversion parameter. Defaults to cv2.COLOR_BGR2RGB.	`COLOR_BGR2RGB`
`save_filepath`	`Optional[str]`	save the image if needed. Defaults to None.	`None`

Source code in otary/image/components/io/writer.py

def show(
    self,
    title: Optional[str] = None,
    figsize: tuple[float, float] = (8.0, 6.0),
    color_conversion: Optional[int] = cv2.COLOR_BGR2RGB,
    save_filepath: Optional[str] = None,
) -> None:
    """Show the image

    Args:
        title (Optional[str], optional): title of the image. Defaults to None.
        figsize (tuple[float, float], optional): size of the figure.
            Defaults to (8.0, 6.0).
        color_conversion (int, optional): color conversion parameter.
            Defaults to cv2.COLOR_BGR2RGB.
        save_filepath (Optional[str], optional): save the image if needed.
            Defaults to None.
    """
    # Converts from one colour space to the other. this is needed as RGB
    # is not the default colour space for OpenCV
    if color_conversion is not None:
        im = cv2.cvtColor(self.base.asarray, color_conversion)
    else:
        im = self.base.asarray

    plt.figure(figsize=figsize)

    # Show the image
    plt.imshow(im)

    # remove the axis / ticks for a clean looking image
    plt.xticks([])
    plt.yticks([])

    # if a title is provided, show it
    if title is not None:
        plt.title(title)

    if save_filepath is not None:
        plt.savefig(save_filepath)

    plt.show()

Cropper Transformer component

`CropperImage`

CropperImage class

Source code in otary/image/components/transformer/components/cropper/cropper.py

class CropperImage:
    """CropperImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def __crop_with_padding(
        self, x0: int, y0: int, x1: int, y1: int, pad_value: int = 0
    ) -> NDArray:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1].

        This method is specific to crop with padding meaning that if the
        coordinates are out of the image bounds, the padding is added to the
        output cropped image with the pad value parameter, black by default.

        Args:
            x0 (int): x coordinate of the top-left point
            y0 (int): y coordinate of the top-left point
            x1 (int): x coordinate of the bottom-right point
            y1 (int): y coordinate of the bottom-right point
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            NDArray: output cropped image
        """
        # pylint: disable=too-many-locals
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        # Output size
        crop_width = x1 - x0
        crop_height = y1 - y0

        # Initialize output with black (zeros), same dtype and channel count
        channels = 1 if self.base.is_gray else self.base.asarray.shape[2]
        output_shape = (
            (crop_height, crop_width)
            if channels == 1
            else (crop_height, crop_width, channels)
        )
        result = np.full(shape=output_shape, fill_value=pad_value, dtype=np.uint8)

        # Compute the intersection of crop with image bounds
        ix0 = max(x0, 0)
        iy0 = max(y0, 0)
        ix1 = min(x1, self.base.width)
        iy1 = min(y1, self.base.height)

        # Compute corresponding position in output
        ox0 = ix0 - x0
        oy0 = iy0 - y0
        ox1 = ox0 + (ix1 - ix0)
        oy1 = oy0 + (iy1 - iy0)

        # Copy the valid region
        result[oy0:oy1, ox0:ox1] = self.base.asarray[iy0:iy1, ix0:ix1]

        return result

    def __crop_with_clipping(self, x0: int, y0: int, x1: int, y1: int) -> NDArray:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1].

        Crop by clipping meaning that if the coordinates are out of the image
        bounds the output is only the part of the image that is in the bounds.

        Args:
            x0 (int): x coordinate of the top-left point
            y0 (int): y coordinate of the top-left point
            x1 (int): x coordinate of the bottom-right point
            y1 (int): y coordinate of the bottom-right point

        Returns:
            Self: image cropped
        """
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        if x0 >= self.base.width or y0 >= self.base.height or x1 <= 0 or y1 <= 0:
            raise ValueError(
                f"The coordinates ({x0}, {y0}, {x1}, {y1}) are out of the image "
                f"boundaries (width={self.base.width}, height={self.base.height}). "
                "No crop is possible."
            )

        def clip(value: int, min_value: int, max_value: int) -> int:
            return int(max(min_value, min(value, max_value)))

        x0 = clip(x0, 0, self.base.width)
        y0 = clip(y0, 0, self.base.height)
        x1 = clip(x1, 0, self.base.width)
        y1 = clip(y1, 0, self.base.height)

        result = self.base.asarray[y0:y1, x0:x1]
        return result

    def crop(
        self,
        x0: int,
        y0: int,
        x1: int,
        y1: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image in a straight axis-aligned rectangle way given
        by the top-left point [x0, y0] and the bottom-right point [x1, y1]

        This function inputs represents the top-left and bottom-right points.
        This method does not provide a way to extract a rotated rectangle or a
        different shape from the image.

        Remember that in this library the x coordinates represent the y coordinates of
        the image array (horizontal axis of the image).
        The array representation is always rows then columns.
        In this library this is the contrary like in opencv.

        Args:
            x0 (int): top-left x coordinate
            y0 (int): top-left y coordinate
            x1 (int): bottom-right x coordinate
            y1 (int): bottom-right y coordinate
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: cropped image if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        if (clip and pad) or (not clip and not pad):
            raise ValueError(f"Parameters clip and pad cannot be both {clip}")

        if clip and not pad:
            array_crop = self.__crop_with_clipping(
                x0=x0 - extra_border_size,
                y0=y0 - extra_border_size,
                x1=x1 + extra_border_size,
                y1=y1 + extra_border_size,
            )
        else:  # pad and not clip:
            array_crop = self.__crop_with_padding(
                x0=x0 - extra_border_size,
                y0=y0 - extra_border_size,
                x1=x1 + extra_border_size,
                y1=y1 + extra_border_size,
                pad_value=pad_value,
            )

        if copy:
            # really important feature to allow new image from original
            # without the user doing image.copy().crop()
            # which would be much more expensive if the image is large
            # this is why the output of the methods is Optional[Image] not None
            # pylint: disable=import-outside-toplevel
            from otary.image import Image

            return Image(image=array_crop)

        self.base.asarray = array_crop
        return None

    def crop_from_topleft(
        self,
        topleft: np.ndarray,
        width: int,
        height: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a rectangle defined by its top-left point, its width and
        its height.

        Args:
            topleft (np.ndarray): (x, y) coordinates of the top-left point
            width (int): width of the rectangle to crop
            height (int): height of the rectangle to crop
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: image cropped if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=topleft[0],
            y0=topleft[1],
            x1=topleft[0] + width,
            y1=topleft[1] + height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_center(
        self,
        center: NDArray,
        width: int,
        height: int,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a rectangle defined by its center point, its width and
        its height.

        Args:
            center (NDArray): (x, y) coordinates of the center point
            width (int): width of the rectangle to crop
            height (int): height of the rectangle to crop
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: image cropped if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop_from_topleft(
            topleft=center - np.array([width / 2, height / 2]),
            width=width,
            height=height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_polygon(
        self,
        polygon: geo.Polygon,
        copy: bool = False,
        clip: bool = True,
        pad: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a polygon"""
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=int(polygon.xmin),
            y0=int(polygon.ymin),
            x1=int(polygon.xmax),
            y1=int(polygon.ymax),
            copy=copy,
            clip=clip,
            pad=pad,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_linear_spline(
        self,
        spline: geo.LinearSpline,
        copy: bool = False,
        clip: bool = True,
        pad: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from a linear spline"""
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        return self.crop(
            x0=int(spline.xmin),
            y0=int(spline.ymin),
            x1=int(spline.xmax),
            y1=int(spline.ymax),
            copy=copy,
            clip=clip,
            pad=pad,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

    def crop_from_axis_aligned_bbox(
        self,
        bbox: geo.Rectangle,
        clip: bool = True,
        pad: bool = False,
        copy: bool = False,
        extra_border_size: int = 0,
        pad_value: int = 0,
    ) -> Optional[Image]:
        """Crop the image from an Axis-Aligned Bounding Box (AABB).
        Inclusive crops which means that the cropped image will have
        width and height equal to the width and height of the AABB.

        Args:
            bbox (geo.Rectangle): axis-aligned bounding box
            clip (bool, optional): whether to clip or not. Defaults to True.
            pad (bool, optional): whether to pad or not. Defaults to False.
            copy (bool, optional): whether to copy or not. Defaults to False.
            extra_border_size (int, optional): extra border size to add to the crop
                in the x and y directions. Defaults to 0 which means no extra border.
            pad_value (int, optional): pad fill value. Defaults to 0.

        Returns:
            Optional[Image]: cropped image if copy=True else None
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        assert bbox.is_axis_aligned
        topleft = np.asarray([bbox.xmin, bbox.ymin])
        height = int(bbox.ymax - bbox.ymin + 1)
        width = int(bbox.xmax - bbox.xmin + 1)
        return self.crop_from_topleft(
            topleft=topleft,
            width=width,
            height=height,
            clip=clip,
            pad=pad,
            copy=copy,
            extra_border_size=extra_border_size,
            pad_value=pad_value,
        )

`__crop_with_clipping(x0, y0, x1, y1)`

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1].

Crop by clipping meaning that if the coordinates are out of the image bounds the output is only the part of the image that is in the bounds.

Parameters:

Name	Type	Description	Default
`x0`	`int`	x coordinate of the top-left point	required
`y0`	`int`	y coordinate of the top-left point	required
`x1`	`int`	x coordinate of the bottom-right point	required
`y1`	`int`	y coordinate of the bottom-right point	required

Returns:

Name	Type	Description
`Self`	`NDArray`	image cropped

Source code in otary/image/components/transformer/components/cropper/cropper.py

def __crop_with_clipping(self, x0: int, y0: int, x1: int, y1: int) -> NDArray:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1].

    Crop by clipping meaning that if the coordinates are out of the image
    bounds the output is only the part of the image that is in the bounds.

    Args:
        x0 (int): x coordinate of the top-left point
        y0 (int): y coordinate of the top-left point
        x1 (int): x coordinate of the bottom-right point
        y1 (int): y coordinate of the bottom-right point

    Returns:
        Self: image cropped
    """
    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

    if x0 >= self.base.width or y0 >= self.base.height or x1 <= 0 or y1 <= 0:
        raise ValueError(
            f"The coordinates ({x0}, {y0}, {x1}, {y1}) are out of the image "
            f"boundaries (width={self.base.width}, height={self.base.height}). "
            "No crop is possible."
        )

    def clip(value: int, min_value: int, max_value: int) -> int:
        return int(max(min_value, min(value, max_value)))

    x0 = clip(x0, 0, self.base.width)
    y0 = clip(y0, 0, self.base.height)
    x1 = clip(x1, 0, self.base.width)
    y1 = clip(y1, 0, self.base.height)

    result = self.base.asarray[y0:y1, x0:x1]
    return result

`__crop_with_padding(x0, y0, x1, y1, pad_value=0)`

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1].

This method is specific to crop with padding meaning that if the coordinates are out of the image bounds, the padding is added to the output cropped image with the pad value parameter, black by default.

Parameters:

Name	Type	Description	Default
`x0`	`int`	x coordinate of the top-left point	required
`y0`	`int`	y coordinate of the top-left point	required
`x1`	`int`	x coordinate of the bottom-right point	required
`y1`	`int`	y coordinate of the bottom-right point	required
`pad_value`	`int`	pad fill value. Defaults to 0.	`0`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	output cropped image

Source code in otary/image/components/transformer/components/cropper/cropper.py

def __crop_with_padding(
    self, x0: int, y0: int, x1: int, y1: int, pad_value: int = 0
) -> NDArray:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1].

    This method is specific to crop with padding meaning that if the
    coordinates are out of the image bounds, the padding is added to the
    output cropped image with the pad value parameter, black by default.

    Args:
        x0 (int): x coordinate of the top-left point
        y0 (int): y coordinate of the top-left point
        x1 (int): x coordinate of the bottom-right point
        y1 (int): y coordinate of the bottom-right point
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        NDArray: output cropped image
    """
    # pylint: disable=too-many-locals
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

    # Output size
    crop_width = x1 - x0
    crop_height = y1 - y0

    # Initialize output with black (zeros), same dtype and channel count
    channels = 1 if self.base.is_gray else self.base.asarray.shape[2]
    output_shape = (
        (crop_height, crop_width)
        if channels == 1
        else (crop_height, crop_width, channels)
    )
    result = np.full(shape=output_shape, fill_value=pad_value, dtype=np.uint8)

    # Compute the intersection of crop with image bounds
    ix0 = max(x0, 0)
    iy0 = max(y0, 0)
    ix1 = min(x1, self.base.width)
    iy1 = min(y1, self.base.height)

    # Compute corresponding position in output
    ox0 = ix0 - x0
    oy0 = iy0 - y0
    ox1 = ox0 + (ix1 - ix0)
    oy1 = oy0 + (iy1 - iy0)

    # Copy the valid region
    result[oy0:oy1, ox0:ox1] = self.base.asarray[iy0:iy1, ix0:ix1]

    return result

`crop(x0, y0, x1, y1, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

Crop the image in a straight axis-aligned rectangle way given by the top-left point [x0, y0] and the bottom-right point [x1, y1]

This function inputs represents the top-left and bottom-right points. This method does not provide a way to extract a rotated rectangle or a different shape from the image.

Remember that in this library the x coordinates represent the y coordinates of the image array (horizontal axis of the image). The array representation is always rows then columns. In this library this is the contrary like in opencv.

Parameters:

Name	Type	Description	Default
`x0`	`int`	top-left x coordinate	required
`y0`	`int`	top-left y coordinate	required
`x1`	`int`	bottom-right x coordinate	required
`y1`	`int`	bottom-right y coordinate	required
`clip`	`bool`	whether to clip or not. Defaults to True.	`True`
`pad`	`bool`	whether to pad or not. Defaults to False.	`False`
`copy`	`bool`	whether to copy or not. Defaults to False.	`False`
`extra_border_size`	`int`	extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.	`0`
`pad_value`	`int`	pad fill value. Defaults to 0.	`0`

Returns:

Type	Description
`Optional[Image]`	Optional[Image]: cropped image if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop(
    self,
    x0: int,
    y0: int,
    x1: int,
    y1: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image in a straight axis-aligned rectangle way given
    by the top-left point [x0, y0] and the bottom-right point [x1, y1]

    This function inputs represents the top-left and bottom-right points.
    This method does not provide a way to extract a rotated rectangle or a
    different shape from the image.

    Remember that in this library the x coordinates represent the y coordinates of
    the image array (horizontal axis of the image).
    The array representation is always rows then columns.
    In this library this is the contrary like in opencv.

    Args:
        x0 (int): top-left x coordinate
        y0 (int): top-left y coordinate
        x1 (int): bottom-right x coordinate
        y1 (int): bottom-right y coordinate
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: cropped image if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    if (clip and pad) or (not clip and not pad):
        raise ValueError(f"Parameters clip and pad cannot be both {clip}")

    if clip and not pad:
        array_crop = self.__crop_with_clipping(
            x0=x0 - extra_border_size,
            y0=y0 - extra_border_size,
            x1=x1 + extra_border_size,
            y1=y1 + extra_border_size,
        )
    else:  # pad and not clip:
        array_crop = self.__crop_with_padding(
            x0=x0 - extra_border_size,
            y0=y0 - extra_border_size,
            x1=x1 + extra_border_size,
            y1=y1 + extra_border_size,
            pad_value=pad_value,
        )

    if copy:
        # really important feature to allow new image from original
        # without the user doing image.copy().crop()
        # which would be much more expensive if the image is large
        # this is why the output of the methods is Optional[Image] not None
        # pylint: disable=import-outside-toplevel
        from otary.image import Image

        return Image(image=array_crop)

    self.base.asarray = array_crop
    return None

`crop_from_axis_aligned_bbox(bbox, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

Crop the image from an Axis-Aligned Bounding Box (AABB). Inclusive crops which means that the cropped image will have width and height equal to the width and height of the AABB.

Parameters:

Name	Type	Description	Default
`bbox`	`Rectangle`	axis-aligned bounding box	required
`clip`	`bool`	whether to clip or not. Defaults to True.	`True`
`pad`	`bool`	whether to pad or not. Defaults to False.	`False`
`copy`	`bool`	whether to copy or not. Defaults to False.	`False`
`extra_border_size`	`int`	extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.	`0`
`pad_value`	`int`	pad fill value. Defaults to 0.	`0`

Returns:

Type	Description
`Optional[Image]`	Optional[Image]: cropped image if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop_from_axis_aligned_bbox(
    self,
    bbox: geo.Rectangle,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from an Axis-Aligned Bounding Box (AABB).
    Inclusive crops which means that the cropped image will have
    width and height equal to the width and height of the AABB.

    Args:
        bbox (geo.Rectangle): axis-aligned bounding box
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: cropped image if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    assert bbox.is_axis_aligned
    topleft = np.asarray([bbox.xmin, bbox.ymin])
    height = int(bbox.ymax - bbox.ymin + 1)
    width = int(bbox.xmax - bbox.xmin + 1)
    return self.crop_from_topleft(
        topleft=topleft,
        width=width,
        height=height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

`crop_from_center(center, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

Crop the image from a rectangle defined by its center point, its width and its height.

Parameters:

Name	Type	Description	Default
`center`	`NDArray`	(x, y) coordinates of the center point	required
`width`	`int`	width of the rectangle to crop	required
`height`	`int`	height of the rectangle to crop	required
`clip`	`bool`	whether to clip or not. Defaults to True.	`True`
`pad`	`bool`	whether to pad or not. Defaults to False.	`False`
`copy`	`bool`	whether to copy or not. Defaults to False.	`False`
`extra_border_size`	`int`	extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.	`0`
`pad_value`	`int`	pad fill value. Defaults to 0.	`0`

Returns:

Type	Description
`Optional[Image]`	Optional[Image]: image cropped if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop_from_center(
    self,
    center: NDArray,
    width: int,
    height: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a rectangle defined by its center point, its width and
    its height.

    Args:
        center (NDArray): (x, y) coordinates of the center point
        width (int): width of the rectangle to crop
        height (int): height of the rectangle to crop
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: image cropped if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop_from_topleft(
        topleft=center - np.array([width / 2, height / 2]),
        width=width,
        height=height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

`crop_from_linear_spline(spline, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)`

Crop the image from a linear spline

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop_from_linear_spline(
    self,
    spline: geo.LinearSpline,
    copy: bool = False,
    clip: bool = True,
    pad: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a linear spline"""
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=int(spline.xmin),
        y0=int(spline.ymin),
        x1=int(spline.xmax),
        y1=int(spline.ymax),
        copy=copy,
        clip=clip,
        pad=pad,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

`crop_from_polygon(polygon, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)`

Crop the image from a polygon

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop_from_polygon(
    self,
    polygon: geo.Polygon,
    copy: bool = False,
    clip: bool = True,
    pad: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a polygon"""
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=int(polygon.xmin),
        y0=int(polygon.ymin),
        x1=int(polygon.xmax),
        y1=int(polygon.ymax),
        copy=copy,
        clip=clip,
        pad=pad,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

`crop_from_topleft(topleft, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

Crop the image from a rectangle defined by its top-left point, its width and its height.

Parameters:

Name	Type	Description	Default
`topleft`	`ndarray`	(x, y) coordinates of the top-left point	required
`width`	`int`	width of the rectangle to crop	required
`height`	`int`	height of the rectangle to crop	required
`clip`	`bool`	whether to clip or not. Defaults to True.	`True`
`pad`	`bool`	whether to pad or not. Defaults to False.	`False`
`copy`	`bool`	whether to copy or not. Defaults to False.	`False`
`extra_border_size`	`int`	extra border size to add to the crop in the x and y directions. Defaults to 0 which means no extra border.	`0`
`pad_value`	`int`	pad fill value. Defaults to 0.	`0`

Returns:

Type	Description
`Optional[Image]`	Optional[Image]: image cropped if copy=True else None

Source code in otary/image/components/transformer/components/cropper/cropper.py

def crop_from_topleft(
    self,
    topleft: np.ndarray,
    width: int,
    height: int,
    clip: bool = True,
    pad: bool = False,
    copy: bool = False,
    extra_border_size: int = 0,
    pad_value: int = 0,
) -> Optional[Image]:
    """Crop the image from a rectangle defined by its top-left point, its width and
    its height.

    Args:
        topleft (np.ndarray): (x, y) coordinates of the top-left point
        width (int): width of the rectangle to crop
        height (int): height of the rectangle to crop
        clip (bool, optional): whether to clip or not. Defaults to True.
        pad (bool, optional): whether to pad or not. Defaults to False.
        copy (bool, optional): whether to copy or not. Defaults to False.
        extra_border_size (int, optional): extra border size to add to the crop
            in the x and y directions. Defaults to 0 which means no extra border.
        pad_value (int, optional): pad fill value. Defaults to 0.

    Returns:
        Optional[Image]: image cropped if copy=True else None
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    return self.crop(
        x0=topleft[0],
        y0=topleft[1],
        x1=topleft[0] + width,
        y1=topleft[1] + height,
        clip=clip,
        pad=pad,
        copy=copy,
        extra_border_size=extra_border_size,
        pad_value=pad_value,
    )

Binarizer component

`BinarizerImage`

BinarizerImage class

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

class BinarizerImage:
    """BinarizerImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def threshold_simple(self, thresh: int) -> None:
        """Compute the image thesholded by a single value T.
        All pixels with value v <= T are turned black and those with value v > T are
        turned white.

        Args:
            thresh (int): value to separate the black from the white pixels.
        """
        self.base.as_grayscale()
        self.base.asarray = np.array((self.base.asarray > thresh) * 255, dtype=np.uint8)

    def threshold_adaptative(self) -> None:
        """Apply adaptive thresholding.

        A median blur is applied before for better thresholding results.
        See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.
        """
        self.base.as_grayscale()
        binary = cv2.adaptiveThreshold(
            self.base.asarray,
            255,
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
            cv2.THRESH_BINARY,
            11,
            2,
        )
        self.base.asarray = binary

    def threshold_otsu(self) -> None:
        """Apply Ostu thresholding.

        A gaussian blur is applied before for better thresholding results.
        See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.
        """
        self.base.as_grayscale()
        _, img_thresholded = cv2.threshold(
            self.base.asarray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
        )
        self.base.asarray = img_thresholded

    def threshold_sauvola(
        self, window_size: int = 15, k: float = 0.2, r: float = 128.0
    ) -> None:
        """Apply Sauvola thresholding.
        See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
                plot_niblack_sauvola.html.

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.

        Args:
            window_size (int, optional): sauvola window size to apply on the
                image. Defaults to 15.
            k (float, optional): sauvola k factor to apply to regulate the impact
                of the std. Defaults to 0.2.
            r (float, optional): sauvola r value. Defaults to 128.
        """
        self.base.as_grayscale()
        self.base.asarray = threshold_niblack_like(
            img=self.base.asarray, method="sauvola", window_size=window_size, k=k, r=r
        )[1]

    def threshold_niblack(self, window_size: int = 15, k: float = 0.2) -> None:
        """Apply Niblack thresholding.
        See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
                plot_niblack_sauvola.html

        As the input image must be a grayscale before applying any thresholding
        methods we convert the image to grayscale.

        Args:
            window_size (int, optional): apply on the
                image. Defaults to 15.
            k (float, optional): factor to apply to regulate the impact
                of the std. Defaults to 0.2.
        """
        self.base.as_grayscale()
        self.base.asarray = threshold_niblack_like(
            img=self.base.asarray, method="niblack", window_size=window_size, k=k
        )[1]

    def binary(self, method: BinarizationMethods = "sauvola") -> NDArray:
        """Binary representation of the image with values that can be only 0 or 1.
        The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1.
        We can also talk about the mask of the image to refer to the binary
        representation of it.

        The sauvola is generally the best binarization method however it is
        way slower than the others methods. The adaptative or otsu method are the best
        method in terms of speed and quality.

        Args:
            method (str, optional): the binarization method to apply.
                Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"].
                Defaults to "sauvola".

        Returns:
            NDArray: array where its inner values are 0 or 1
        """
        if method not in list(get_args(BinarizationMethods)):
            raise ValueError(
                f"Invalid binarization method {method}. "
                f"Must be in {BinarizationMethods}"
            )
        getattr(self, f"threshold_{method}")()
        return self.base.asarray_binary

    def binaryrev(self, method: BinarizationMethods = "sauvola") -> NDArray:
        """Reversed binary representation of the image.
        The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0.
        This is why it is called the "binary rev" or "binary reversed".

        Args:
            method (str, optional): the binarization method to apply.
                Defaults to "adaptative".

        Returns:
            NDArray: array where its inner values are 0 or 1
        """
        return 1 - self.binary(method=method)

`binary(method='sauvola')`

Binary representation of the image with values that can be only 0 or 1. The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1. We can also talk about the mask of the image to refer to the binary representation of it.

The sauvola is generally the best binarization method however it is way slower than the others methods. The adaptative or otsu method are the best method in terms of speed and quality.

Parameters:

Name	Type	Description	Default
`method`	`str`	the binarization method to apply. Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"]. Defaults to "sauvola".	`'sauvola'`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	array where its inner values are 0 or 1

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def binary(self, method: BinarizationMethods = "sauvola") -> NDArray:
    """Binary representation of the image with values that can be only 0 or 1.
    The value 0 is now 0 and value of 255 are now 1. Black is 0 and white is 1.
    We can also talk about the mask of the image to refer to the binary
    representation of it.

    The sauvola is generally the best binarization method however it is
    way slower than the others methods. The adaptative or otsu method are the best
    method in terms of speed and quality.

    Args:
        method (str, optional): the binarization method to apply.
            Must be in ["adaptative", "otsu", "sauvola", "niblack", "nick", "wolf"].
            Defaults to "sauvola".

    Returns:
        NDArray: array where its inner values are 0 or 1
    """
    if method not in list(get_args(BinarizationMethods)):
        raise ValueError(
            f"Invalid binarization method {method}. "
            f"Must be in {BinarizationMethods}"
        )
    getattr(self, f"threshold_{method}")()
    return self.base.asarray_binary

`binaryrev(method='sauvola')`

Reversed binary representation of the image. The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0. This is why it is called the "binary rev" or "binary reversed".

Parameters:

Name	Type	Description	Default
`method`	`str`	the binarization method to apply. Defaults to "adaptative".	`'sauvola'`

Returns:

Name	Type	Description
`NDArray`	`NDArray`	array where its inner values are 0 or 1

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def binaryrev(self, method: BinarizationMethods = "sauvola") -> NDArray:
    """Reversed binary representation of the image.
    The value 0 is now 1 and value of 255 are now 0. Black is 1 and white is 0.
    This is why it is called the "binary rev" or "binary reversed".

    Args:
        method (str, optional): the binarization method to apply.
            Defaults to "adaptative".

    Returns:
        NDArray: array where its inner values are 0 or 1
    """
    return 1 - self.binary(method=method)

`threshold_adaptative()`

Apply adaptive thresholding.

A median blur is applied before for better thresholding results. See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def threshold_adaptative(self) -> None:
    """Apply adaptive thresholding.

    A median blur is applied before for better thresholding results.
    See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.
    """
    self.base.as_grayscale()
    binary = cv2.adaptiveThreshold(
        self.base.asarray,
        255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11,
        2,
    )
    self.base.asarray = binary

`threshold_niblack(window_size=15, k=0.2)`

Apply Niblack thresholding. See https://scikit-image.org/docs/stable/auto_examples/segmentation/ plot_niblack_sauvola.html

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	apply on the image. Defaults to 15.	`15`
`k`	`float`	factor to apply to regulate the impact of the std. Defaults to 0.2.	`0.2`

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def threshold_niblack(self, window_size: int = 15, k: float = 0.2) -> None:
    """Apply Niblack thresholding.
    See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
            plot_niblack_sauvola.html

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.

    Args:
        window_size (int, optional): apply on the
            image. Defaults to 15.
        k (float, optional): factor to apply to regulate the impact
            of the std. Defaults to 0.2.
    """
    self.base.as_grayscale()
    self.base.asarray = threshold_niblack_like(
        img=self.base.asarray, method="niblack", window_size=window_size, k=k
    )[1]

`threshold_otsu()`

Apply Ostu thresholding.

A gaussian blur is applied before for better thresholding results. See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def threshold_otsu(self) -> None:
    """Apply Ostu thresholding.

    A gaussian blur is applied before for better thresholding results.
    See https://docs.opencv.org/4.x/d7/d4d/tutorial_py_thresholding.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.
    """
    self.base.as_grayscale()
    _, img_thresholded = cv2.threshold(
        self.base.asarray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    self.base.asarray = img_thresholded

`threshold_sauvola(window_size=15, k=0.2, r=128.0)`

Apply Sauvola thresholding. See https://scikit-image.org/docs/stable/auto_examples/segmentation/ plot_niblack_sauvola.html.

As the input image must be a grayscale before applying any thresholding methods we convert the image to grayscale.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	sauvola window size to apply on the image. Defaults to 15.	`15`
`k`	`float`	sauvola k factor to apply to regulate the impact of the std. Defaults to 0.2.	`0.2`
`r`	`float`	sauvola r value. Defaults to 128.	`128.0`

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def threshold_sauvola(
    self, window_size: int = 15, k: float = 0.2, r: float = 128.0
) -> None:
    """Apply Sauvola thresholding.
    See https://scikit-image.org/docs/stable/auto_examples/segmentation/\
            plot_niblack_sauvola.html.

    As the input image must be a grayscale before applying any thresholding
    methods we convert the image to grayscale.

    Args:
        window_size (int, optional): sauvola window size to apply on the
            image. Defaults to 15.
        k (float, optional): sauvola k factor to apply to regulate the impact
            of the std. Defaults to 0.2.
        r (float, optional): sauvola r value. Defaults to 128.
    """
    self.base.as_grayscale()
    self.base.asarray = threshold_niblack_like(
        img=self.base.asarray, method="sauvola", window_size=window_size, k=k, r=r
    )[1]

`threshold_simple(thresh)`

Compute the image thesholded by a single value T. All pixels with value v <= T are turned black and those with value v > T are turned white.

Parameters:

Name	Type	Description	Default
`thresh`	`int`	value to separate the black from the white pixels.	required

Source code in otary/image/components/transformer/components/binarizer/binarizer.py

def threshold_simple(self, thresh: int) -> None:
    """Compute the image thesholded by a single value T.
    All pixels with value v <= T are turned black and those with value v > T are
    turned white.

    Args:
        thresh (int): value to separate the black from the white pixels.
    """
    self.base.as_grayscale()
    self.base.asarray = np.array((self.base.asarray > thresh) * 255, dtype=np.uint8)

Geometry Transformer component

`GeometrizerImage`

GeometrizerImage class

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

class GeometrizerImage:
    """GeometrizerImage class"""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def shift(self, shift: NDArray, fill_value: Sequence[float] = (0.0,)) -> None:
        """Shift the image by performing a translation operation

        Args:
            shift (NDArray): Vector for translation
            border_fill_value (int | tuple[int, int, int], optional): value to fill the
                border of the image after the rotation in case reshape is True.
                Can be a tuple of 3 integers for RGB image or a single integer for
                grayscale image. Defaults to (0.0,) which is black.
        """
        vector_shift = assert_transform_shift_vector(vector=shift)
        shift_matrix = np.asarray(
            [[1.0, 0.0, vector_shift[0]], [0.0, 1.0, vector_shift[1]]],
            dtype=np.float32,
        )

        self.base.asarray = cv2.warpAffine(
            src=self.base.asarray,
            M=shift_matrix,
            dsize=(self.base.width, self.base.height),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=fill_value,
        )  # type: ignore[call-overload]

    def __rotate_exact(
        self,
        angle: float,
        is_degree: bool = False,
        is_clockwise: bool = True,
        reshape: bool = True,
        border_fill_value: float = 0.0,
    ) -> None:
        """Rotate the image by a given angle.
        This method is more accurate than the rotate method but way slower
        (about 10 times slower).

        Args:
            angle (float): angle to rotate the image
            is_degree (bool, optional): whether the angle is in degree or not.
                If not it is considered to be in radians.
                Defaults to False which means radians.
            is_clockwise (bool, optional): whether the rotation is clockwise or
                counter-clockwise. Defaults to True.
            reshape (bool, optional): scipy reshape option. Defaults to True.
            border_fill_value (float, optional): value to fill the border of the image
                after the rotation in case reshape is True. Can only be a single
                integer. Does not support tuple of 3 integers for RGB image.
                Defaults to 0.0 which is black.
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        if not is_degree:
            angle = np.rad2deg(angle)
        if is_clockwise:
            # by default scipy rotate is counter-clockwise
            angle = -angle
        self.base.asarray = scipy.ndimage.rotate(
            input=self.base.asarray,
            angle=angle,
            reshape=reshape,
            cval=border_fill_value,
        )

    def rotate(
        self,
        angle: float,
        is_degree: bool = False,
        is_clockwise: bool = True,
        reshape: bool = True,
        fill_value: Sequence[float] = (0.0,),
        fast: bool = True,
    ) -> None:
        """Rotate the image by a given angle.

        For the rotation with reshape, meaning preserving the whole image,
        we used the code from the imutils library:
        https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

        Args:
            angle (float): angle to rotate the image
            is_degree (bool, optional): whether the angle is in degree or not.
                If not it is considered to be in radians.
                Defaults to False which means radians.
            is_clockwise (bool, optional): whether the rotation is clockwise or
                counter-clockwise. Defaults to True.
            reshape (bool, optional): whether to preserve the original image or not.
                If True, the complete image is preserved hence the width and height
                of the rotated image are different than in the original image.
                Defaults to True.
            border_fill_value (Sequence[float], optional): value to
                fill the border of the image after the rotation in case reshape is True.
                Can be a tuple of 3 integers for RGB image or a single integer for
                grayscale image. Defaults to (0.0,) which is black.
        """
        # pylint: disable=too-many-arguments, too-many-positional-arguments
        # pylint: disable=too-many-locals
        if not fast:  # using scipy rotate which is slower than cv2
            border_fill_value_scalar = fill_value[0]
            if not isinstance(border_fill_value_scalar, float):
                raise ValueError(
                    f"The border_fill_value {border_fill_value_scalar} is not a valid "
                    "value. It must be a single integer when fast mode is off"
                )
            self.__rotate_exact(
                angle=angle,
                is_degree=is_degree,
                is_clockwise=is_clockwise,
                reshape=reshape,
                border_fill_value=border_fill_value_scalar,
            )
            return None

        if not is_degree:
            angle = np.rad2deg(angle)
        if is_clockwise:
            angle = -angle

        h, w = self.base.asarray.shape[:2]
        center = (w / 2, h / 2)

        # Compute rotation matrix
        rotmat = cv2.getRotationMatrix2D(center, angle, 1.0)  # param angle in degree

        if reshape:
            # Compute new bounding dimensions
            cos_a = np.abs(rotmat[0, 0])
            sin_a = np.abs(rotmat[0, 1])
            new_w = int((h * sin_a) + (w * cos_a))
            new_h = int((h * cos_a) + (w * sin_a))
            w, h = new_w, new_h

            # Adjust the rotation matrix to shift the image center
            rotmat[0, 2] += (w / 2) - center[0]
            rotmat[1, 2] += (h / 2) - center[1]

        self.base.asarray = cv2.warpAffine(
            src=self.base.asarray,
            M=rotmat,
            dsize=(w, h),
            flags=cv2.INTER_LINEAR,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=fill_value,
        )  # type: ignore[call-overload]
        return None

    def center_to_point(self, point: NDArray) -> NDArray:
        """Shift the image so that the input point ends up in the middle of the
        new image

        Args:
            point (NDArray): point as (2,) shape numpy array

        Returns:
            NDArray: translation Vector
        """
        shift_vector = self.base.center - point
        self.shift(shift=shift_vector)
        return shift_vector

    def center_to_segment(self, segment: NDArray) -> NDArray:
        """Shift the image so that the segment middle point ends up in the middle
        of the new image

        Args:
            segment (NDArray): segment as numpy array of shape (2, 2)

        Returns:
            NDArray: vector_shift
        """
        return self.center_to_point(point=geo.Segment(segment).centroid)

    def restrict_rect_in_frame(self, rectangle: geo.Rectangle) -> geo.Rectangle:
        """Create a new rectangle that is contained within the image borders.
        If the input rectangle is outside the image, the returned rectangle is a
        image frame-fitted rectangle that preserve the same shape.

        Args:
            rectangle (geo.Rectangle): input rectangle

        Returns:
            geo.Rectangle: new rectangle
        """
        # rectangle boundaries
        xmin, xmax = rectangle.xmin, rectangle.xmax
        ymin, ymax = rectangle.ymin, rectangle.ymax

        # recalculate boundaries based on image shape
        xmin = max(0, xmin)
        ymin = max(0, ymin)
        xmax = min(self.base.width, xmax)
        ymax = min(self.base.height, ymax)

        # recreate a rectangle with new coordinates
        rect_restricted = geo.Rectangle.from_topleft_bottomright(
            topleft=np.asarray([xmin, ymin]),
            bottomright=np.asarray([xmax, ymax]),
            is_cast_int=True,
        )
        return rect_restricted

`__rotate_exact(angle, is_degree=False, is_clockwise=True, reshape=True, border_fill_value=0.0)`

Rotate the image by a given angle. This method is more accurate than the rotate method but way slower (about 10 times slower).

Parameters:

Name	Type	Description	Default
`angle`	`float`	angle to rotate the image	required
`is_degree`	`bool`	whether the angle is in degree or not. If not it is considered to be in radians. Defaults to False which means radians.	`False`
`is_clockwise`	`bool`	whether the rotation is clockwise or counter-clockwise. Defaults to True.	`True`
`reshape`	`bool`	scipy reshape option. Defaults to True.	`True`
`border_fill_value`	`float`	value to fill the border of the image after the rotation in case reshape is True. Can only be a single integer. Does not support tuple of 3 integers for RGB image. Defaults to 0.0 which is black.	`0.0`

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def __rotate_exact(
    self,
    angle: float,
    is_degree: bool = False,
    is_clockwise: bool = True,
    reshape: bool = True,
    border_fill_value: float = 0.0,
) -> None:
    """Rotate the image by a given angle.
    This method is more accurate than the rotate method but way slower
    (about 10 times slower).

    Args:
        angle (float): angle to rotate the image
        is_degree (bool, optional): whether the angle is in degree or not.
            If not it is considered to be in radians.
            Defaults to False which means radians.
        is_clockwise (bool, optional): whether the rotation is clockwise or
            counter-clockwise. Defaults to True.
        reshape (bool, optional): scipy reshape option. Defaults to True.
        border_fill_value (float, optional): value to fill the border of the image
            after the rotation in case reshape is True. Can only be a single
            integer. Does not support tuple of 3 integers for RGB image.
            Defaults to 0.0 which is black.
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    if not is_degree:
        angle = np.rad2deg(angle)
    if is_clockwise:
        # by default scipy rotate is counter-clockwise
        angle = -angle
    self.base.asarray = scipy.ndimage.rotate(
        input=self.base.asarray,
        angle=angle,
        reshape=reshape,
        cval=border_fill_value,
    )

`center_to_point(point)`

Shift the image so that the input point ends up in the middle of the new image

Parameters:

Name	Type	Description	Default
`point`	`NDArray`	point as (2,) shape numpy array	required

Returns:

Name	Type	Description
`NDArray`	`NDArray`	translation Vector

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def center_to_point(self, point: NDArray) -> NDArray:
    """Shift the image so that the input point ends up in the middle of the
    new image

    Args:
        point (NDArray): point as (2,) shape numpy array

    Returns:
        NDArray: translation Vector
    """
    shift_vector = self.base.center - point
    self.shift(shift=shift_vector)
    return shift_vector

`center_to_segment(segment)`

Shift the image so that the segment middle point ends up in the middle of the new image

Parameters:

Name	Type	Description	Default
`segment`	`NDArray`	segment as numpy array of shape (2, 2)	required

Returns:

Name	Type	Description
`NDArray`	`NDArray`	vector_shift

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def center_to_segment(self, segment: NDArray) -> NDArray:
    """Shift the image so that the segment middle point ends up in the middle
    of the new image

    Args:
        segment (NDArray): segment as numpy array of shape (2, 2)

    Returns:
        NDArray: vector_shift
    """
    return self.center_to_point(point=geo.Segment(segment).centroid)

`restrict_rect_in_frame(rectangle)`

Create a new rectangle that is contained within the image borders. If the input rectangle is outside the image, the returned rectangle is a image frame-fitted rectangle that preserve the same shape.

Parameters:

Name	Type	Description	Default
`rectangle`	`Rectangle`	input rectangle	required

Returns:

Type	Description
`Rectangle`	geo.Rectangle: new rectangle

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def restrict_rect_in_frame(self, rectangle: geo.Rectangle) -> geo.Rectangle:
    """Create a new rectangle that is contained within the image borders.
    If the input rectangle is outside the image, the returned rectangle is a
    image frame-fitted rectangle that preserve the same shape.

    Args:
        rectangle (geo.Rectangle): input rectangle

    Returns:
        geo.Rectangle: new rectangle
    """
    # rectangle boundaries
    xmin, xmax = rectangle.xmin, rectangle.xmax
    ymin, ymax = rectangle.ymin, rectangle.ymax

    # recalculate boundaries based on image shape
    xmin = max(0, xmin)
    ymin = max(0, ymin)
    xmax = min(self.base.width, xmax)
    ymax = min(self.base.height, ymax)

    # recreate a rectangle with new coordinates
    rect_restricted = geo.Rectangle.from_topleft_bottomright(
        topleft=np.asarray([xmin, ymin]),
        bottomright=np.asarray([xmax, ymax]),
        is_cast_int=True,
    )
    return rect_restricted

`rotate(angle, is_degree=False, is_clockwise=True, reshape=True, fill_value=(0.0,), fast=True)`

Rotate the image by a given angle.

For the rotation with reshape, meaning preserving the whole image, we used the code from the imutils library: https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

Parameters:

Name	Type	Description	Default
`angle`	`float`	angle to rotate the image	required
`is_degree`	`bool`	whether the angle is in degree or not. If not it is considered to be in radians. Defaults to False which means radians.	`False`
`is_clockwise`	`bool`	whether the rotation is clockwise or counter-clockwise. Defaults to True.	`True`
`reshape`	`bool`	whether to preserve the original image or not. If True, the complete image is preserved hence the width and height of the rotated image are different than in the original image. Defaults to True.	`True`
`border_fill_value`	`Sequence[float]`	value to fill the border of the image after the rotation in case reshape is True. Can be a tuple of 3 integers for RGB image or a single integer for grayscale image. Defaults to (0.0,) which is black.	required

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def rotate(
    self,
    angle: float,
    is_degree: bool = False,
    is_clockwise: bool = True,
    reshape: bool = True,
    fill_value: Sequence[float] = (0.0,),
    fast: bool = True,
) -> None:
    """Rotate the image by a given angle.

    For the rotation with reshape, meaning preserving the whole image,
    we used the code from the imutils library:
    https://github.com/PyImageSearch/imutils/blob/master/imutils/convenience.py#L41

    Args:
        angle (float): angle to rotate the image
        is_degree (bool, optional): whether the angle is in degree or not.
            If not it is considered to be in radians.
            Defaults to False which means radians.
        is_clockwise (bool, optional): whether the rotation is clockwise or
            counter-clockwise. Defaults to True.
        reshape (bool, optional): whether to preserve the original image or not.
            If True, the complete image is preserved hence the width and height
            of the rotated image are different than in the original image.
            Defaults to True.
        border_fill_value (Sequence[float], optional): value to
            fill the border of the image after the rotation in case reshape is True.
            Can be a tuple of 3 integers for RGB image or a single integer for
            grayscale image. Defaults to (0.0,) which is black.
    """
    # pylint: disable=too-many-arguments, too-many-positional-arguments
    # pylint: disable=too-many-locals
    if not fast:  # using scipy rotate which is slower than cv2
        border_fill_value_scalar = fill_value[0]
        if not isinstance(border_fill_value_scalar, float):
            raise ValueError(
                f"The border_fill_value {border_fill_value_scalar} is not a valid "
                "value. It must be a single integer when fast mode is off"
            )
        self.__rotate_exact(
            angle=angle,
            is_degree=is_degree,
            is_clockwise=is_clockwise,
            reshape=reshape,
            border_fill_value=border_fill_value_scalar,
        )
        return None

    if not is_degree:
        angle = np.rad2deg(angle)
    if is_clockwise:
        angle = -angle

    h, w = self.base.asarray.shape[:2]
    center = (w / 2, h / 2)

    # Compute rotation matrix
    rotmat = cv2.getRotationMatrix2D(center, angle, 1.0)  # param angle in degree

    if reshape:
        # Compute new bounding dimensions
        cos_a = np.abs(rotmat[0, 0])
        sin_a = np.abs(rotmat[0, 1])
        new_w = int((h * sin_a) + (w * cos_a))
        new_h = int((h * cos_a) + (w * sin_a))
        w, h = new_w, new_h

        # Adjust the rotation matrix to shift the image center
        rotmat[0, 2] += (w / 2) - center[0]
        rotmat[1, 2] += (h / 2) - center[1]

    self.base.asarray = cv2.warpAffine(
        src=self.base.asarray,
        M=rotmat,
        dsize=(w, h),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=fill_value,
    )  # type: ignore[call-overload]
    return None

`shift(shift, fill_value=(0.0,))`

Shift the image by performing a translation operation

Parameters:

Name	Type	Description	Default
`shift`	`NDArray`	Vector for translation	required
`border_fill_value`	`int \| tuple[int, int, int]`	value to fill the border of the image after the rotation in case reshape is True. Can be a tuple of 3 integers for RGB image or a single integer for grayscale image. Defaults to (0.0,) which is black.	required

Source code in otary/image/components/transformer/components/geometrizer/geometrizer.py

def shift(self, shift: NDArray, fill_value: Sequence[float] = (0.0,)) -> None:
    """Shift the image by performing a translation operation

    Args:
        shift (NDArray): Vector for translation
        border_fill_value (int | tuple[int, int, int], optional): value to fill the
            border of the image after the rotation in case reshape is True.
            Can be a tuple of 3 integers for RGB image or a single integer for
            grayscale image. Defaults to (0.0,) which is black.
    """
    vector_shift = assert_transform_shift_vector(vector=shift)
    shift_matrix = np.asarray(
        [[1.0, 0.0, vector_shift[0]], [0.0, 1.0, vector_shift[1]]],
        dtype=np.float32,
    )

    self.base.asarray = cv2.warpAffine(
        src=self.base.asarray,
        M=shift_matrix,
        dsize=(self.base.width, self.base.height),
        flags=cv2.INTER_LINEAR,
        borderMode=cv2.BORDER_CONSTANT,
        borderValue=fill_value,
    )  # type: ignore[call-overload]

Morphologyzer Transformer component

`MorphologyzerImage`

MorphologyzerImage.

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

class MorphologyzerImage:
    """MorphologyzerImage."""

    def __init__(self, base: BaseImage) -> None:
        self.base = base

    def resize_fixed(
        self,
        dim: tuple[int, int],
        interpolation: int = cv2.INTER_AREA,
        copy: bool = False,
    ) -> Optional[Image]:
        """Resize the image using a fixed dimension well defined.
        This function can result in a distorted image if the ratio between
        width and height is different in the original and the new image.

        If the dim argument has a negative value in height or width, then
        a proportional ratio is applied based on the one of the two dimension given.

        Args:
            dim (tuple[int, int]): a tuple with two integers in the following order
                (width, height).
            interpolation (int, optional): resize interpolation.
                Defaults to cv2.INTER_AREA.
            copy (bool, optional): whether to return a new image or not.
        """
        if dim[0] < 0 and dim[1] < 0:  # check that the dim is positive
            raise ValueError(f"The dim argument {dim} has two negative values.")

        _dim = list(dim)

        # compute width or height if needed
        if _dim[1] <= 0:
            _dim[1] = int(self.base.height * (_dim[0] / self.base.width))
        if dim[0] <= 0:
            _dim[0] = int(self.base.width * (_dim[1] / self.base.height))

        result = cv2.resize(
            src=self.base.asarray, dsize=_dim, interpolation=interpolation
        )

        if copy:
            # pylint: disable=import-outside-toplevel
            from otary.image import Image

            return Image(image=result)

        self.base.asarray = result
        return None

    def resize(
        self, factor: float, interpolation: int = cv2.INTER_AREA, copy: bool = False
    ) -> Optional[Image]:
        """Resize the image to a new size using a scaling factor value that
        will be applied to all dimensions (width and height).

        Applying this method can not result in a distorted image.

        Args:
            factor (float): factor in [0, 5] to resize the image.
                A value of 1 does not change the image.
                A value of 2 doubles the image size.
                A maximum value of 5 is set to avoid accidentally producing a gigantic
                image.
            interpolation (int, optional): resize interpolation.
                Defaults to cv2.INTER_AREA.
            copy (bool, optional): whether to return a new image or not.
        """
        if factor == 1:
            return None

        if factor < 0:
            raise ValueError(
                f"The resize factor value {factor} must be stricly positive"
            )

        max_scale_pct = 5
        if factor > max_scale_pct:
            raise ValueError(f"The resize factor value {factor} is probably too big")

        width = int(self.base.width * factor)
        height = int(self.base.height * factor)
        dim = (width, height)

        return self.resize_fixed(dim=dim, interpolation=interpolation, copy=copy)

    def blur(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        method: BlurMethods = "average",
        sigmax: float = 0,
    ) -> None:
        """Blur the image

        Args:
            kernel (tuple, optional): blur kernel size. Defaults to (5, 5).
            iterations (int, optional): number of iterations. Defaults to 1.
            method (str, optional): blur method.
                Must be in ["average", "median", "gaussian", "bilateral"].
                Defaults to "average".
            sigmax (float, optional): sigmaX value for the gaussian blur.
                Defaults to 0.
        """
        if method not in list(get_args(BlurMethods)):
            raise ValueError(f"Invalid blur method {method}. Must be in {BlurMethods}")

        for _ in range(iterations):
            if method == "average":
                self.base.asarray = cv2.blur(src=self.base.asarray, ksize=kernel)
            elif method == "median":
                self.base.asarray = cv2.medianBlur(
                    src=self.base.asarray, ksize=kernel[0]
                )
            elif method == "gaussian":
                self.base.asarray = cv2.GaussianBlur(
                    src=self.base.asarray, ksize=kernel, sigmaX=sigmax
                )
            elif method == "bilateral":
                self.base.asarray = cv2.bilateralFilter(
                    src=self.base.asarray, d=kernel[0], sigmaColor=75, sigmaSpace=75
                )

    def dilate(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        dilate_black_pixels: bool = True,
    ) -> None:
        """Dilate the image by making the black pixels expand in the image.
        The dilatation can be parametrize thanks to the kernel and iterations
        arguments.

        Args:
            kernel (tuple, optional): kernel to dilate. Defaults to (5, 5).
            iterations (int, optional): number of dilatation iterations. Defaults to 1.
            dilate_black_pixels (bool, optional): whether to dilate black pixels or not
        """
        if iterations == 0:
            return None

        if dilate_black_pixels:
            self.base.asarray = 255 - np.asarray(
                cv2.dilate(
                    self.base.rev().asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )
        else:  # dilate white pixels by default
            self.base.asarray = np.asarray(
                cv2.dilate(
                    self.base.asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )

        return None

    def erode(
        self,
        kernel: tuple = (5, 5),
        iterations: int = 1,
        erode_black_pixels: bool = True,
    ) -> None:
        """Erode the image by making the black pixels shrink in the image.
        The anti-dilatation can be parametrize thanks to the kernel and iterations
        arguments.

        Args:
            kernel (tuple, optional): kernel to erode. Defaults to (5, 5).
            iterations (int, optional): number of iterations. Defaults to 1.
            erode_black_pixels (bool, optional): whether to erode black pixels or not
        """
        if iterations == 0:
            pass

        if erode_black_pixels:
            self.base.asarray = 255 - np.asarray(
                cv2.erode(
                    self.base.rev().asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )
        else:
            self.base.asarray = np.asarray(
                cv2.erode(
                    self.base.asarray,
                    kernel=np.ones(kernel, np.uint8),
                    iterations=iterations,
                ),
                dtype=np.uint8,
            )

    def add_border(self, size: int, fill_value: int = 0) -> None:
        """Add a border to the image.

        Args:
            thickness (int): border thickness.
            color (int, optional): border color. Defaults to 0.
        """
        size = int(size)
        self.base.asarray = cv2.copyMakeBorder(
            src=self.base.asarray,
            top=size,
            bottom=size,
            left=size,
            right=size,
            borderType=cv2.BORDER_CONSTANT,
            value=fill_value,
        )  # type: ignore[call-overload]

`add_border(size, fill_value=0)`

Add a border to the image.

Parameters:

Name	Type	Description	Default
`thickness`	`int`	border thickness.	required
`color`	`int`	border color. Defaults to 0.	required

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def add_border(self, size: int, fill_value: int = 0) -> None:
    """Add a border to the image.

    Args:
        thickness (int): border thickness.
        color (int, optional): border color. Defaults to 0.
    """
    size = int(size)
    self.base.asarray = cv2.copyMakeBorder(
        src=self.base.asarray,
        top=size,
        bottom=size,
        left=size,
        right=size,
        borderType=cv2.BORDER_CONSTANT,
        value=fill_value,
    )  # type: ignore[call-overload]

`blur(kernel=(5, 5), iterations=1, method='average', sigmax=0)`

Blur the image

Parameters:

Name	Type	Description	Default
`kernel`	`tuple`	blur kernel size. Defaults to (5, 5).	`(5, 5)`
`iterations`	`int`	number of iterations. Defaults to 1.	`1`
`method`	`str`	blur method. Must be in ["average", "median", "gaussian", "bilateral"]. Defaults to "average".	`'average'`
`sigmax`	`float`	sigmaX value for the gaussian blur. Defaults to 0.	`0`

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def blur(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    method: BlurMethods = "average",
    sigmax: float = 0,
) -> None:
    """Blur the image

    Args:
        kernel (tuple, optional): blur kernel size. Defaults to (5, 5).
        iterations (int, optional): number of iterations. Defaults to 1.
        method (str, optional): blur method.
            Must be in ["average", "median", "gaussian", "bilateral"].
            Defaults to "average".
        sigmax (float, optional): sigmaX value for the gaussian blur.
            Defaults to 0.
    """
    if method not in list(get_args(BlurMethods)):
        raise ValueError(f"Invalid blur method {method}. Must be in {BlurMethods}")

    for _ in range(iterations):
        if method == "average":
            self.base.asarray = cv2.blur(src=self.base.asarray, ksize=kernel)
        elif method == "median":
            self.base.asarray = cv2.medianBlur(
                src=self.base.asarray, ksize=kernel[0]
            )
        elif method == "gaussian":
            self.base.asarray = cv2.GaussianBlur(
                src=self.base.asarray, ksize=kernel, sigmaX=sigmax
            )
        elif method == "bilateral":
            self.base.asarray = cv2.bilateralFilter(
                src=self.base.asarray, d=kernel[0], sigmaColor=75, sigmaSpace=75
            )

`dilate(kernel=(5, 5), iterations=1, dilate_black_pixels=True)`

Dilate the image by making the black pixels expand in the image. The dilatation can be parametrize thanks to the kernel and iterations arguments.

Parameters:

Name	Type	Description	Default
`kernel`	`tuple`	kernel to dilate. Defaults to (5, 5).	`(5, 5)`
`iterations`	`int`	number of dilatation iterations. Defaults to 1.	`1`
`dilate_black_pixels`	`bool`	whether to dilate black pixels or not	`True`

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def dilate(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    dilate_black_pixels: bool = True,
) -> None:
    """Dilate the image by making the black pixels expand in the image.
    The dilatation can be parametrize thanks to the kernel and iterations
    arguments.

    Args:
        kernel (tuple, optional): kernel to dilate. Defaults to (5, 5).
        iterations (int, optional): number of dilatation iterations. Defaults to 1.
        dilate_black_pixels (bool, optional): whether to dilate black pixels or not
    """
    if iterations == 0:
        return None

    if dilate_black_pixels:
        self.base.asarray = 255 - np.asarray(
            cv2.dilate(
                self.base.rev().asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )
    else:  # dilate white pixels by default
        self.base.asarray = np.asarray(
            cv2.dilate(
                self.base.asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )

    return None

`erode(kernel=(5, 5), iterations=1, erode_black_pixels=True)`

Erode the image by making the black pixels shrink in the image. The anti-dilatation can be parametrize thanks to the kernel and iterations arguments.

Parameters:

Name	Type	Description	Default
`kernel`	`tuple`	kernel to erode. Defaults to (5, 5).	`(5, 5)`
`iterations`	`int`	number of iterations. Defaults to 1.	`1`
`erode_black_pixels`	`bool`	whether to erode black pixels or not	`True`

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def erode(
    self,
    kernel: tuple = (5, 5),
    iterations: int = 1,
    erode_black_pixels: bool = True,
) -> None:
    """Erode the image by making the black pixels shrink in the image.
    The anti-dilatation can be parametrize thanks to the kernel and iterations
    arguments.

    Args:
        kernel (tuple, optional): kernel to erode. Defaults to (5, 5).
        iterations (int, optional): number of iterations. Defaults to 1.
        erode_black_pixels (bool, optional): whether to erode black pixels or not
    """
    if iterations == 0:
        pass

    if erode_black_pixels:
        self.base.asarray = 255 - np.asarray(
            cv2.erode(
                self.base.rev().asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )
    else:
        self.base.asarray = np.asarray(
            cv2.erode(
                self.base.asarray,
                kernel=np.ones(kernel, np.uint8),
                iterations=iterations,
            ),
            dtype=np.uint8,
        )

`resize(factor, interpolation=cv2.INTER_AREA, copy=False)`

Resize the image to a new size using a scaling factor value that will be applied to all dimensions (width and height).

Applying this method can not result in a distorted image.

Parameters:

Name	Type	Description	Default
`factor`	`float`	factor in [0, 5] to resize the image. A value of 1 does not change the image. A value of 2 doubles the image size. A maximum value of 5 is set to avoid accidentally producing a gigantic image.	required
`interpolation`	`int`	resize interpolation. Defaults to cv2.INTER_AREA.	`INTER_AREA`
`copy`	`bool`	whether to return a new image or not.	`False`

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def resize(
    self, factor: float, interpolation: int = cv2.INTER_AREA, copy: bool = False
) -> Optional[Image]:
    """Resize the image to a new size using a scaling factor value that
    will be applied to all dimensions (width and height).

    Applying this method can not result in a distorted image.

    Args:
        factor (float): factor in [0, 5] to resize the image.
            A value of 1 does not change the image.
            A value of 2 doubles the image size.
            A maximum value of 5 is set to avoid accidentally producing a gigantic
            image.
        interpolation (int, optional): resize interpolation.
            Defaults to cv2.INTER_AREA.
        copy (bool, optional): whether to return a new image or not.
    """
    if factor == 1:
        return None

    if factor < 0:
        raise ValueError(
            f"The resize factor value {factor} must be stricly positive"
        )

    max_scale_pct = 5
    if factor > max_scale_pct:
        raise ValueError(f"The resize factor value {factor} is probably too big")

    width = int(self.base.width * factor)
    height = int(self.base.height * factor)
    dim = (width, height)

    return self.resize_fixed(dim=dim, interpolation=interpolation, copy=copy)

`resize_fixed(dim, interpolation=cv2.INTER_AREA, copy=False)`

Resize the image using a fixed dimension well defined. This function can result in a distorted image if the ratio between width and height is different in the original and the new image.

If the dim argument has a negative value in height or width, then a proportional ratio is applied based on the one of the two dimension given.

Parameters:

Name	Type	Description	Default
`dim`	`tuple[int, int]`	a tuple with two integers in the following order (width, height).	required
`interpolation`	`int`	resize interpolation. Defaults to cv2.INTER_AREA.	`INTER_AREA`
`copy`	`bool`	whether to return a new image or not.	`False`

Source code in otary/image/components/transformer/components/morphologyzer/morphologyzer.py

def resize_fixed(
    self,
    dim: tuple[int, int],
    interpolation: int = cv2.INTER_AREA,
    copy: bool = False,
) -> Optional[Image]:
    """Resize the image using a fixed dimension well defined.
    This function can result in a distorted image if the ratio between
    width and height is different in the original and the new image.

    If the dim argument has a negative value in height or width, then
    a proportional ratio is applied based on the one of the two dimension given.

    Args:
        dim (tuple[int, int]): a tuple with two integers in the following order
            (width, height).
        interpolation (int, optional): resize interpolation.
            Defaults to cv2.INTER_AREA.
        copy (bool, optional): whether to return a new image or not.
    """
    if dim[0] < 0 and dim[1] < 0:  # check that the dim is positive
        raise ValueError(f"The dim argument {dim} has two negative values.")

    _dim = list(dim)

    # compute width or height if needed
    if _dim[1] <= 0:
        _dim[1] = int(self.base.height * (_dim[0] / self.base.width))
    if dim[0] <= 0:
        _dim[0] = int(self.base.width * (_dim[1] / self.base.height))

    result = cv2.resize(
        src=self.base.asarray, dsize=_dim, interpolation=interpolation
    )

    if copy:
        # pylint: disable=import-outside-toplevel
        from otary.image import Image

        return Image(image=result)

    self.base.asarray = result
    return None

Image

Core Components

Available Modules

BaseImage

area property

asarray property writable

asarray_binary property

bottom_left property

bottom_right property

center property

channels property

corners property

height property

is_gray property

norm_side_length property

shape_array property

shape_xy property

top_left property

top_right property

width property

as_api_file_input(fmt='png', filename='image')

as_black()

as_bytes(fmt='PNG')

as_colorscale()

as_filled(fill_value=255)

as_grayscale()

as_pil()

as_white()

dist_pct(pct)

is_equal_shape(other, consider_channel=True)

rev()

DrawerImage

draw_circles(circles, render=CirclesRender())

draw_ellipses(ellipses, render=EllipsesRender())

draw_ocr_outputs(ocr_outputs, render=OcrSingleOutputRender())

draw_points(points, render=PointsRender())

draw_polygons(polygons, render=PolygonsRender())

draw_segments(segments, render=SegmentsRender())

draw_splines(splines, render=LinearSplinesRender())

ReaderImage

from_file(filepath, as_grayscale=False, resolution=None) staticmethod

from_fillvalue(value=255, shape=(128, 128, 3)) staticmethod

from_jpg(filepath, as_grayscale=False, resolution=None) staticmethod

from_pdf(filepath, as_grayscale=False, page_nb=0, resolution=None, clip_pct=None) staticmethod

from_png(filepath, as_grayscale=False, resolution=None) staticmethod

WriterImage

save(save_filepath)

show(title=None, figsize=(8.0, 6.0), color_conversion=cv2.COLOR_BGR2RGB, save_filepath=None)

CropperImage

__crop_with_clipping(x0, y0, x1, y1)

__crop_with_padding(x0, y0, x1, y1, pad_value=0)

crop(x0, y0, x1, y1, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

crop_from_axis_aligned_bbox(bbox, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

crop_from_center(center, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

crop_from_linear_spline(spline, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)

crop_from_polygon(polygon, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)

crop_from_topleft(topleft, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)

BinarizerImage

binary(method='sauvola')

binaryrev(method='sauvola')

threshold_adaptative()

threshold_niblack(window_size=15, k=0.2)

threshold_otsu()

threshold_sauvola(window_size=15, k=0.2, r=128.0)

threshold_simple(thresh)

GeometrizerImage

__rotate_exact(angle, is_degree=False, is_clockwise=True, reshape=True, border_fill_value=0.0)

center_to_point(point)

center_to_segment(segment)

restrict_rect_in_frame(rectangle)

rotate(angle, is_degree=False, is_clockwise=True, reshape=True, fill_value=(0.0,), fast=True)

shift(shift, fill_value=(0.0,))

MorphologyzerImage

add_border(size, fill_value=0)

blur(kernel=(5, 5), iterations=1, method='average', sigmax=0)

dilate(kernel=(5, 5), iterations=1, dilate_black_pixels=True)

erode(kernel=(5, 5), iterations=1, erode_black_pixels=True)

resize(factor, interpolation=cv2.INTER_AREA, copy=False)

resize_fixed(dim, interpolation=cv2.INTER_AREA, copy=False)

`BaseImage`

`area` `property`

`asarray` `property` `writable`

`asarray_binary` `property`

`bottom_left` `property`

`bottom_right` `property`

`center` `property`

`channels` `property`

`corners` `property`

`height` `property`

`is_gray` `property`

`norm_side_length` `property`

`shape_array` `property`

`shape_xy` `property`

`top_left` `property`

`top_right` `property`

`width` `property`

`as_api_file_input(fmt='png', filename='image')`

`as_black()`

`as_bytes(fmt='PNG')`

`as_colorscale()`

`as_filled(fill_value=255)`

`as_grayscale()`

`as_pil()`

`as_white()`

`dist_pct(pct)`

`is_equal_shape(other, consider_channel=True)`

`rev()`

`DrawerImage`

`draw_circles(circles, render=CirclesRender())`

`draw_ellipses(ellipses, render=EllipsesRender())`

`draw_ocr_outputs(ocr_outputs, render=OcrSingleOutputRender())`

`draw_points(points, render=PointsRender())`

`draw_polygons(polygons, render=PolygonsRender())`

`draw_segments(segments, render=SegmentsRender())`

`draw_splines(splines, render=LinearSplinesRender())`

`ReaderImage`

`from_file(filepath, as_grayscale=False, resolution=None)` `staticmethod`

`from_fillvalue(value=255, shape=(128, 128, 3))` `staticmethod`

`from_jpg(filepath, as_grayscale=False, resolution=None)` `staticmethod`

`from_pdf(filepath, as_grayscale=False, page_nb=0, resolution=None, clip_pct=None)` `staticmethod`

`from_png(filepath, as_grayscale=False, resolution=None)` `staticmethod`

`WriterImage`

`save(save_filepath)`

`show(title=None, figsize=(8.0, 6.0), color_conversion=cv2.COLOR_BGR2RGB, save_filepath=None)`

`CropperImage`

`__crop_with_clipping(x0, y0, x1, y1)`

`__crop_with_padding(x0, y0, x1, y1, pad_value=0)`

`crop(x0, y0, x1, y1, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

`crop_from_axis_aligned_bbox(bbox, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

`crop_from_center(center, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

`crop_from_linear_spline(spline, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)`

`crop_from_polygon(polygon, copy=False, clip=True, pad=False, extra_border_size=0, pad_value=0)`

`crop_from_topleft(topleft, width, height, clip=True, pad=False, copy=False, extra_border_size=0, pad_value=0)`

`BinarizerImage`

`binary(method='sauvola')`

`binaryrev(method='sauvola')`

`threshold_adaptative()`

`threshold_niblack(window_size=15, k=0.2)`

`threshold_otsu()`

`threshold_sauvola(window_size=15, k=0.2, r=128.0)`

`threshold_simple(thresh)`

`GeometrizerImage`

`__rotate_exact(angle, is_degree=False, is_clockwise=True, reshape=True, border_fill_value=0.0)`

`center_to_point(point)`

`center_to_segment(segment)`

`restrict_rect_in_frame(rectangle)`

`rotate(angle, is_degree=False, is_clockwise=True, reshape=True, fill_value=(0.0,), fast=True)`

`shift(shift, fill_value=(0.0,))`

`MorphologyzerImage`

`add_border(size, fill_value=0)`

`blur(kernel=(5, 5), iterations=1, method='average', sigmax=0)`

`dilate(kernel=(5, 5), iterations=1, dilate_black_pixels=True)`

`erode(kernel=(5, 5), iterations=1, erode_black_pixels=True)`

`resize(factor, interpolation=cv2.INTER_AREA, copy=False)`

`resize_fixed(dim, interpolation=cv2.INTER_AREA, copy=False)`