import numpy as np
import cv2
from typing import Tuple, List
def to_gray(img):
if img.ndim == 3:
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
return img
def clahe(gray, clip=3.0, tile=(8,8)):
c = cv2.createCLAHE(clipLimit=clip, tileGridSize=tile)
return c.apply(gray)
def denoise(gray, ksize=3):
return cv2.GaussianBlur(gray, (ksize, ksize), 0)
def adaptive_binarize(gray, block=31, C=10):
return cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, block, C)
def deskew(binary):
# Invert: text as 1s
inv = 255 - binary
coords = np.column_stack(np.where(inv > 0))
if coords.size == 0:
return binary, 0.0
rect = cv2.minAreaRect(coords)
angle = rect[-1]
if angle < -45:
angle = 90 + angle
# Rotate
(h, w) = binary.shape[:2]
M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1.0)
rotated = cv2.warpAffine(binary, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated, angle
def remove_lines(binary, scale=40):
# Remove horizontal and vertical lines
horiz = binary.copy()
vert = binary.copy()
cols = horiz.shape[1]
h_size = max(1, cols // scale)
h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_size, 1))
horiz = cv2.erode(horiz, h_kernel, iterations=1)
horiz = cv2.dilate(horiz, h_kernel, iterations=1)
rows = vert.shape[0]
v_size = max(1, rows // scale)
v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_size))
vert = cv2.erode(vert, v_kernel, iterations=1)
vert = cv2.dilate(vert, v_kernel, iterations=1)
mask = cv2.bitwise_or(horiz, vert)
cleaned = cv2.bitwise_and(binary, cv2.bitwise_not(mask))
return cleaned
def find_text_blocks(binary, min_area=800):
# Find contours -> bounding boxes for text regions
cnts, _ = cv2.findContours(255 - binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
boxes = []
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
if w*h >= min_area:
boxes.append((x, y, w, h))
# Sort top-to-bottom, then left-to-right
boxes.sort(key=lambda b: (b[1], b[0]))
return boxes
def visualize_boxes(img_rgb, boxes):
vis = img_rgb.copy()
for (x,y,w,h) in boxes:
cv2.rectangle(vis, (x,y), (x+w,y+h), (0,255,0), 2)
return vis