Otary library, shape your images, image your shapes.
Welcome to Otary
Otary — elegant, readable, and powerful image and 2D geometry Python library.
Features
The main features of Otary are:
-
Unification: Otary offers a cohesive solution for image and geometry manipulation, letting you work seamlessly without switching tools.
-
Readability: Self-explanatory by design. Otary’s clean, readable code eliminates the need for comments, making it easy for beginners to learn and for experts to build efficiently.
-
Performance: optimized for speed and efficiency, making it suitable for high-performance applications. It is built on top of NumPy and OpenCV, which are known for their speed and performance.
-
Interactivity: designed to be Interactive and user-friendly, ideal for Jupyter notebooks and live exploration.
-
Flexibility: provides a flexible and extensible architecture, allowing developers to customize and extend its functionality as needed.
Example
Let me illustrate the usage of Otary with a simple example. Imagine you need to:
- read an image from a pdf file
- draw an rectangle on it, shift and rotate the rectangle
- crop a part of the image
- rotate the cropped image
- apply a threshold
- show the image
In order to compare the use of Otary versus other libraries, I will use the same example but with different libraries. Try it yourself on your favorite LLM (like ChatGPT) by copying the query:
Generate a python code to read an image from a pdf, draw an rectangle on it, shift and rotate the rectangle, crop a part of the image, rotate the cropped image, apply a threshold on the image.
Using Otary you can do it with few lines of code:
import otary as ot
im = ot.Image.from_pdf("path/to/you/file.pdf", page_nb=0)
rectangle = ot.Rectangle([[1, 1], [4, 1], [4, 4], [1, 4]]) * 100
rectangle.shift([50, 50]).rotate(angle=30, is_degree=True)
im = (
im.draw_polygons([rectangle])
.crop(x0=50, y0=50, x1=450, y1=450)
.rotate(angle=90, is_degree=True)
.threshold_simple(thresh=200)
)
im.show()
#!/usr/bin/env python3
"""
Steps:
- Load first page of a PDF as an image
- Draw a rectangle
- Shift & rotate that rectangle (visualized as a rotated box)
- Crop a region of the image
- Rotate the cropped image
- Threshold the (rotated) crop
Dependencies:
pip install pdf2image Pillow opencv-python
# If pdf2image isn't available, install: pip install PyMuPDF
# Note: pdf2image requires Poppler on your system.
Edit the CONFIG section below to suit your needs.
"""
from pathlib import Path
import math
# Pillow & OpenCV
from PIL import Image, ImageDraw
import numpy as np
import cv2
# Try to import a PDF rasterizer
_loader = None
try:
from pdf2image import convert_from_path
_loader = "pdf2image"
except Exception:
try:
import fitz # PyMuPDF
_loader = "pymupdf"
except Exception:
_loader = None
# --------------------------- CONFIG --------------------------- #
PDF_PATH = "example.pdf" # <- put your PDF path here
OUTPUT_DIR = Path("out_steps")
OUTPUT_DIR.mkdir(exist_ok=True)
# Rectangle (axis-aligned) you want to draw first:
rect_x, rect_y, rect_w, rect_h = 200, 150, 400, 250 # pixels
# Shift to apply to the rectangle center (dx, dy):
shift_dx, shift_dy = 120, -40 # pixels
# Rotation to apply to the rectangle (degrees, positive=CCW):
rotate_deg = 25.0
# Crop region from the original image (x, y, w, h):
crop_x, crop_y, crop_w, crop_h = 100, 100, 600, 400
# Rotation to apply to the cropped image (degrees):
crop_rotate_deg = -15.0
# Threshold (use None to use Otsu automatically)
fixed_threshold_value = None # e.g., set to 128 to force a fixed threshold
# -------------------------------------------------------------- #
def load_pdf_first_page_as_image(pdf_path: str, dpi: int = 300) -> Image.Image:
"""Return the first page of a PDF as a Pillow RGB image."""
if _loader == "pdf2image":
pil_pages = convert_from_path(pdf_path, dpi=dpi, first_page=1, last_page=1)
if not pil_pages:
raise RuntimeError("No pages found in PDF.")
return pil_pages[0].convert("RGB")
elif _loader == "pymupdf":
doc = fitz.open(pdf_path)
if doc.page_count == 0:
raise RuntimeError("No pages found in PDF.")
page = doc.load_page(0)
# 300 dpi equivalent scaling
zoom = dpi / 72.0
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img
else:
raise ImportError(
"No PDF rasterizer available. Install either `pdf2image` (plus Poppler) or `PyMuPDF`."
)
def pil_to_cv(img_pil: Image.Image) -> np.ndarray:
"""Pillow RGB -> OpenCV BGR"""
return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
def cv_to_pil(img_cv: np.ndarray) -> Image.Image:
"""OpenCV BGR -> Pillow RGB"""
return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
def draw_axis_aligned_rectangle_pil(img_pil: Image.Image, x, y, w, h, width=4):
"""Draw axis-aligned rectangle on a PIL image."""
draw = ImageDraw.Draw(img_pil)
draw.rectangle([x, y, x + w, y + h], outline=(255, 0, 0), width=width)
return img_pil
def draw_rotated_rectangle_cv(img_cv: np.ndarray, center, size, angle_deg: float, thickness=3, color=(0, 255, 0)):
"""
Draw a rotated rectangle using OpenCV. center=(cx,cy), size=(w,h), angle in degrees CCW.
"""
rect = (center, size, angle_deg)
box = cv2.boxPoints(rect) # 4x2 float32 array of vertices
box = np.int32(box)
cv2.polylines(img_cv, [box], isClosed=True, color=color, thickness=thickness)
return img_cv
def rotate_image_keep_bounds(img_cv: np.ndarray, angle_deg: float) -> np.ndarray:
"""
Rotate an image about its center, expanding bounds so nothing is cropped.
"""
(h, w) = img_cv.shape[:2]
c = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(c, angle_deg, 1.0)
# compute new bounds
cos = abs(M[0, 0])
sin = abs(M[0, 1])
new_w = int((h * sin) + (w * cos))
new_h = int((h * cos) + (w * sin))
# adjust rotation matrix to account for translation
M[0, 2] += (new_w / 2) - c[0]
M[1, 2] += (new_h / 2) - c[1]
rotated = cv2.warpAffine(img_cv, M, (new_w, new_h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE)
return rotated
def threshold_image(img_cv_gray: np.ndarray, fixed_thresh: int | None = None) -> np.ndarray:
"""
Apply binary threshold. If fixed_thresh is None, use Otsu.
"""
if fixed_thresh is None:
_, th = cv2.threshold(img_cv_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
else:
_, th = cv2.threshold(img_cv_gray, int(fixed_thresh), 255, cv2.THRESH_BINARY)
return th
def main():
# 1) Load first page
pil_img = load_pdf_first_page_as_image(PDF_PATH, dpi=300)
pil_img.save(OUTPUT_DIR / "01_loaded_page.png")
# 2) Draw axis-aligned rectangle (Pillow)
pil_with_rect = pil_img.copy()
pil_with_rect = draw_axis_aligned_rectangle_pil(pil_with_rect, rect_x, rect_y, rect_w, rect_h, width=4)
pil_with_rect.save(OUTPUT_DIR / "02_axis_aligned_rect.png")
# Convert to OpenCV for further operations
cv_img = pil_to_cv(pil_with_rect)
# 3) Shift & rotate rectangle (OpenCV rotated box)
# Start from the original rectangle center:
cx = rect_x + rect_w / 2.0
cy = rect_y + rect_h / 2.0
# Apply shift
cx_shifted = cx + shift_dx
cy_shifted = cy + shift_dy
# Draw rotated rectangle (in green)
cv_img_rotrect = cv_img.copy()
cv_img_rotrect = draw_rotated_rectangle_cv(
cv_img_rotrect,
center=(cx_shifted, cy_shifted),
size=(rect_w, rect_h),
angle_deg=rotate_deg,
thickness=3,
color=(0, 255, 0),
)
cv2.imwrite(str(OUTPUT_DIR / "03_shifted_rotated_rect.png"), cv_img_rotrect)
# 4) Crop a region (axis-aligned box on the original image)
x1, y1 = int(crop_x), int(crop_y)
x2, y2 = int(crop_x + crop_w), int(crop_y + crop_h)
h, w = cv_img.shape[:2]
# clamp to image
x1 = max(0, min(w - 1, x1))
y1 = max(0, min(h - 1, y1))
x2 = max(0, min(w, x2))
y2 = max(0, min(h, y2))
crop = cv_img[y1:y2, x1:x2].copy()
cv2.imwrite(str(OUTPUT_DIR / "04_crop.png"), crop)
# 5) Rotate the cropped image (keeping bounds)
crop_rot = rotate_image_keep_bounds(crop, crop_rotate_deg)
cv2.imwrite(str(OUTPUT_DIR / "05_crop_rotated.png"), crop_rot)
# 6) Threshold the (rotated) crop
crop_gray = cv2.cvtColor(crop_rot, cv2.COLOR_BGR2GRAY)
crop_th = threshold_image(crop_gray, fixed_threshold_value)
cv2.imwrite(str(OUTPUT_DIR / "06_crop_threshold.png"), crop_th)
print("Done. See outputs in:", OUTPUT_DIR.resolve())
if __name__ == "__main__":
main()
ChatGPT proposes to re-invent the wheel and over-complicates everything.
Using Otary makes the code:
- Much more readable and hence maintainable
- Much more interactive
- Much simpler, simplifying libraries management by only using one library and not manipulating multiple libraries like Pillow, OpenCV, Scikit-Image, PyMuPDF etc.