mid2's picture
Update utils.py
dc0c09a verified
import cv2
import numpy as np
from PIL import Image
import base64
from io import BytesIO
PEAK_SMOOTHING_KERNEL, PEAK_MIN_HEIGHT_PCT, PEAK_MIN_DISTANCE = 11, 0.005, 15
PEAK_EDGE_REGION, PEAK_LOCAL_WINDOW, PEAK_EDGE_MIN_MASS_PCT = 50, 5, 0.002
CANNY_LOW, CANNY_HIGH, CANNY_DARK_THRESH, CANNY_BRIGHT_THRESH = 50, 150, 80, 180
CANNY_EDGE_RATIO_THRESHOLD, REGION_DARK_THRESHOLD, REGION_BRIGHT_THRESHOLD = 0.15, 60, 200
XDOG_SIGMA, XDOG_K, XDOG_PHI, XDOG_TAU, XDOG_EPSILON = 0.5, 1.6, 200, 0.98, 0.01
TEMPLATE_SCALE_RANGE, TEMPLATE_PYRAMID_LEVELS = (0.7, 1.3), 5
TEMPLATE_COARSE_STEPS, TEMPLATE_FINE_STEPS, TEMPLATE_TOP_CANDIDATES = 20, 10, 5
# FIX: Added min-height and box-sizing to prevent layout shifts
EMPTY = "<div style='min-height:550px;height:550px;padding:30px;color:#999;text-align:center;display:flex;align-items:center;justify-content:center;box-sizing:border-box;'>Waiting...</div>"
CONFIG = {
'detector_type': 'orb',
'orb_features': 5000,
'use_template_matching': True,
'use_gms': True,
'use_guided_matching': True,
'guided_search_mode': 'percent',
'guided_search_radius_pct': 8.0,
'guided_search_radius_px': 80,
'bidirectional_matching': True,
'min_guided_matches': 8,
'gms_scale': True,
'keypoint_mask_radius': 2,
'ransac_iterations': 10000,
'ransac_threshold': 5.0,
'min_ransac_sample_distance': 30,
'min_inliers': 4,
'scale_min': 0.3,
'scale_max': 3.0,
}
def get_config(key, default=None):
return CONFIG.get(key, default)
def check_gms_available():
try:
return hasattr(cv2, 'xfeatures2d') and hasattr(cv2.xfeatures2d, 'matchGMS')
except:
return False
GMS_AVAILABLE = check_gms_available()
class Size:
def __init__(self, width, height):
self.width, self.height = width, height
def detect_edge_intensities(gray):
canny = cv2.Canny(gray, CANNY_LOW, CANNY_HIGH)
edge_count = np.sum(canny > 0)
if edge_count < 50:
return {'has_edges': False, 'dark_edges': False, 'bright_edges': False}, canny
edge_y, edge_x = np.where(canny > 0)
edge_intensities = gray[edge_y, edge_x]
dark_count = np.sum(edge_intensities < CANNY_DARK_THRESH)
bright_count = np.sum(edge_intensities > CANNY_BRIGHT_THRESH)
dark_ratio, bright_ratio = dark_count / edge_count, bright_count / edge_count
return {
'has_edges': True,
'dark_edges': dark_ratio > CANNY_EDGE_RATIO_THRESHOLD,
'bright_edges': bright_ratio > CANNY_EDGE_RATIO_THRESHOLD,
'dark_intensity': float(np.median(edge_intensities[edge_intensities < CANNY_DARK_THRESH])) if dark_count > 20 else None,
'bright_intensity': float(np.median(edge_intensities[edge_intensities > CANNY_BRIGHT_THRESH])) if bright_count > 20 else None
}, canny
def get_background_color(gray, canny_edges):
non_edge_mask = canny_edges == 0
if not np.any(non_edge_mask):
return 255
non_edge_pixels = gray[non_edge_mask]
return int(np.median(non_edge_pixels)) if len(non_edge_pixels) >= 100 else 255
def analyze_color_distribution(gray):
hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten()
total = hist.sum()
if total == 0:
return True, 1, "empty", [], 255
edge_info, canny = detect_edge_intensities(gray)
bg_color = get_background_color(gray, canny)
hist_smooth = cv2.GaussianBlur(hist.reshape(1, -1).astype(np.float32), (1, PEAK_SMOOTHING_KERNEL), 0).flatten()
peaks = []
if edge_info.get('dark_edges', False) and edge_info.get('dark_intensity') is not None:
dark_mass = np.sum(hist[:PEAK_EDGE_REGION])
peaks.append((int(edge_info['dark_intensity']), dark_mass, 'dark'))
if edge_info.get('bright_edges', False) and edge_info.get('bright_intensity') is not None:
bright_mass = np.sum(hist[256-PEAK_EDGE_REGION:])
peaks.append((int(edge_info['bright_intensity']), bright_mass, 'bright'))
min_height = total * PEAK_MIN_HEIGHT_PCT
for i in range(PEAK_EDGE_REGION, 256 - PEAK_EDGE_REGION):
if hist_smooth[i] < min_height:
continue
is_peak = all(hist_smooth[i] >= hist_smooth[max(0, i-j)] and hist_smooth[i] >= hist_smooth[min(255, i+j)] for j in range(1, PEAK_LOCAL_WINDOW + 1))
if is_peak and not any(abs(p[0] - i) < PEAK_MIN_DISTANCE for p in peaks):
peaks.append((i, hist_smooth[i], 'middle'))
peaks.sort(key=lambda x: x[0])
n = len(peaks)
if n <= 1:
return True, max(1, n), "single_peak", peaks, bg_color
has_dark = any(p[0] < REGION_DARK_THRESHOLD for p in peaks)
has_bright = any(p[0] > REGION_BRIGHT_THRESHOLD for p in peaks)
has_middle = any(REGION_DARK_THRESHOLD <= p[0] <= REGION_BRIGHT_THRESHOLD for p in peaks)
if has_dark and has_bright and not has_middle:
return True, 2, "bimodal", peaks, bg_color
return False, n, f"multicolor_{n}peaks", peaks, bg_color
def xdog_edge_detection(img, sigma=None, phi=None, tau=None):
sigma = sigma or XDOG_SIGMA
phi = phi or XDOG_PHI
tau = tau or XDOG_TAU
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy()
gray = gray.astype(np.float64) / 255.0
ksize1 = max(3, int(np.ceil(sigma * 3)) * 2 + 1)
ksize2 = max(3, int(np.ceil(sigma * XDOG_K * 3)) * 2 + 1)
g1 = cv2.GaussianBlur(gray, (ksize1, ksize1), sigma)
g2 = cv2.GaussianBlur(gray, (ksize2, ksize2), sigma * XDOG_K)
dog = g1 - tau * g2
if dog.max() != dog.min():
dog = dog / (dog.max() - dog.min() + XDOG_EPSILON)
xdog = np.where(dog >= 0, 1.0, 1.0 + np.tanh(phi * dog))
xdog = ((1.0 - xdog) * 255).astype(np.uint8)
_, binary = cv2.threshold(xdog, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary
def preprocess_lineart(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy()
is_bimodal, n_colors, reason, peaks, bg_color = analyze_color_distribution(gray)
if is_bimodal and n_colors <= 2:
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
if np.mean(binary) / 255.0 > 0.5:
binary = cv2.bitwise_not(binary)
return binary, "otsu", bg_color
binary = xdog_edge_detection(img)
if np.sum(binary > 0) / binary.size > 0.5:
binary = cv2.bitwise_not(binary)
return binary, "xdog", bg_color
def preprocess_color(img):
return xdog_edge_detection(img)
def hex_to_rgb(h):
if h is None:
return (255, 255, 255)
if isinstance(h, (tuple, list)):
return tuple(int(x) for x in h[:3])
h = str(h).strip().lstrip('#')
if len(h) == 3:
h = ''.join([c*2 for c in h])
if len(h) != 6:
return (255, 255, 255)
try:
return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
except:
return (255, 255, 255)
def flatten_rgba_to_rgb(rgba, bg):
if len(rgba.shape) == 2:
return cv2.cvtColor(rgba, cv2.COLOR_GRAY2RGB)
if rgba.shape[2] == 3:
return rgba.copy()
alpha = rgba[:, :, 3:4].astype(np.float32) / 255.0
rgb = rgba[:, :, :3].astype(np.float32)
bg_array = np.array(bg, dtype=np.float32).reshape(1, 1, 3)
return (rgb * alpha + bg_array * (1.0 - alpha)).astype(np.uint8)
def cv2_to_pil(img):
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) if img is not None else None
def img_to_b64(img):
buf = BytesIO()
Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(buf, format="PNG")
return base64.b64encode(buf.getvalue()).decode()
def make_side_by_side(a, b, max_h=500, interp=cv2.INTER_LANCZOS4):
target_h = min(max_h, max(a.shape[0], b.shape[0]))
s1, s2 = target_h / a.shape[0], target_h / b.shape[0]
r1 = cv2.resize(a, (int(a.shape[1]*s1), int(a.shape[0]*s1)), interpolation=interp)
r2 = cv2.resize(b, (int(b.shape[1]*s2), int(b.shape[0]*s2)), interpolation=interp)
h = max(r1.shape[0], r2.shape[0])
if r1.shape[0] < h:
r1 = np.vstack([r1, np.ones((h-r1.shape[0], r1.shape[1], 3), np.uint8)*255])
if r2.shape[0] < h:
r2 = np.vstack([r2, np.ones((h-r2.shape[0], r2.shape[1], 3), np.uint8)*255])
return np.hstack([r1, np.ones((h, 10, 3), np.uint8)*200, r2])
def make_svg(a, b, kp1, kp2, matches, max_m=200, interp=cv2.INTER_LANCZOS4):
# FIX: Define consistent container height
container_height = 550
if not matches:
# FIX: Return container with same fixed height as content containers
return f"<div style='min-height:{container_height}px;height:{container_height}px;padding:30px;color:#999;text-align:center;display:flex;align-items:center;justify-content:center;box-sizing:border-box;'>No matches</div>"
m, total = matches[:max_m], len(matches)
target_h = min(500, max(a.shape[0], b.shape[0]))
s1, s2 = target_h / a.shape[0], target_h / b.shape[0]
h1, w1 = int(a.shape[0] * s1), int(a.shape[1] * s1)
h2, w2 = int(b.shape[0] * s2), int(b.shape[1] * s2)
gap, final_h, total_w = 10, max(h1, h2), w1 + 10 + w2
a_r = cv2.resize(a, (w1, h1), interpolation=interp)
b_r = cv2.resize(b, (w2, h2), interpolation=interp)
np.random.seed(42)
c = np.random.randint(50, 255, (len(m), 3))
lines = ''.join(f'<line x1="{kp1[x.queryIdx].pt[0]*s1:.1f}" y1="{kp1[x.queryIdx].pt[1]*s1:.1f}" x2="{kp2[x.trainIdx].pt[0]*s2 + w1 + gap:.1f}" y2="{kp2[x.trainIdx].pt[1]*s2:.1f}" stroke="rgb({c[i,0]},{c[i,1]},{c[i,2]})" stroke-width="1" opacity="0.7"/>' for i, x in enumerate(m))
circles1 = ''.join(f'<circle cx="{kp1[x.queryIdx].pt[0]*s1:.1f}" cy="{kp1[x.queryIdx].pt[1]*s1:.1f}" r="3" fill="rgb({c[i,0]},{c[i,1]},{c[i,2]})"/>' for i, x in enumerate(m))
circles2 = ''.join(f'<circle cx="{kp2[x.trainIdx].pt[0]*s2 + w1 + gap:.1f}" cy="{kp2[x.trainIdx].pt[1]*s2:.1f}" r="3" fill="rgb({c[i,0]},{c[i,1]},{c[i,2]})"/>' for i, x in enumerate(m))
info = f"{len(m)} of {total} matches" if len(m) < total else f"{total} matches"
# FIX: Wrap in container with fixed height and use preserveAspectRatio
return f'''<div style="min-height:{container_height}px;height:{container_height}px;box-sizing:border-box;overflow:hidden;">
<div style="font-size:12px;color:#666;margin-bottom:4px;height:20px;">{info}</div>
<svg viewBox="0 0 {total_w} {final_h}" preserveAspectRatio="xMidYMid meet" style="width:100%;height:{min(final_h, container_height - 30)}px;max-height:{container_height - 30}px;background:#fafafa;display:block;">
<image href="data:image/png;base64,{img_to_b64(a_r)}" width="{w1}" height="{h1}"/>
<rect x="{w1}" y="0" width="{gap}" height="{final_h}" fill="#e0e0e0"/>
<image x="{w1 + gap}" href="data:image/png;base64,{img_to_b64(b_r)}" width="{w2}" height="{h2}"/>
{lines}{circles1}{circles2}
</svg></div>'''
def edge_filter(matches, kp2, edges, r):
h, w, out = edges.shape[0], edges.shape[1], []
for m in matches:
y, x = int(kp2[m.trainIdx].pt[1]), int(kp2[m.trainIdx].pt[0])
if np.any(edges[max(0,y-r):min(h,y+r+1), max(0,x-r):min(w,x+r+1)]):
out.append(m)
return out
def filter_keypoints_by_mask(keypoints, descriptors, mask, radius=2):
if keypoints is None or descriptors is None or len(keypoints) == 0:
return [], None
h, w = mask.shape[:2]
keep_idx = []
for i, kp in enumerate(keypoints):
x, y = int(round(kp.pt[0])), int(round(kp.pt[1]))
x1, y1 = max(0, x - radius), max(0, y - radius)
x2, y2 = min(w, x + radius + 1), min(h, y + radius + 1)
if x2 > x1 and y2 > y1 and np.any(mask[y1:y2, x1:x2] > 0):
keep_idx.append(i)
if not keep_idx:
return list(keypoints), descriptors
return [keypoints[i] for i in keep_idx], descriptors[keep_idx]
def visualize_template_match(lineart, color, scale, tx, ty, score):
h, w = color.shape[:2]
nw, nh = int(lineart.shape[1] * scale), int(lineart.shape[0] * scale)
if nw <= 0 or nh <= 0:
return color.copy()
scaled = cv2.resize(lineart, (nw, nh), interpolation=cv2.INTER_LANCZOS4)
overlay = color.copy()
txi, tyi = int(tx), int(ty)
dx1, dy1 = max(0, txi), max(0, tyi)
dx2, dy2 = min(w, txi + nw), min(h, tyi + nh)
sx1, sy1 = max(0, -txi), max(0, -tyi)
if dy2 > dy1 and dx2 > dx1:
h_copy, w_copy = dy2 - dy1, dx2 - dx1
roi = overlay[dy1:dy2, dx1:dx2]
scaled_roi = scaled[sy1:sy1+h_copy, sx1:sx1+w_copy]
if len(scaled_roi.shape) == 2:
scaled_roi = cv2.cvtColor(scaled_roi, cv2.COLOR_GRAY2BGR)
overlay[dy1:dy2, dx1:dx2] = cv2.addWeighted(roi, 0.5, scaled_roi, 0.5, 0)
cv2.rectangle(overlay, (dx1, dy1), (dx2-1, dy2-1), (0, 255, 0), 2)
font = cv2.FONT_HERSHEY_SIMPLEX
text = f"Template: scale={scale:.3f} tx={tx:.0f} ty={ty:.0f} score={score:.3f}"
cv2.putText(overlay, text, (10, 30), font, 0.7, (0, 255, 0), 2)
return overlay
def resize_image_match(finished_img, lineart_size):
finished_width, finished_height = finished_img.size
lineart_width, lineart_height = lineart_size
finished_longest = max(finished_width, finished_height)
finished_shortest = min(finished_width, finished_height)
lineart_longest = max(lineart_width, lineart_height)
lineart_shortest = min(lineart_width, lineart_height)
if finished_longest > lineart_longest:
scale = lineart_longest / finished_longest
else:
scale = lineart_shortest / finished_shortest
new_width = int(finished_width * scale)
new_height = int(finished_height * scale)
return finished_img.resize((new_width, new_height), Image.LANCZOS)
def get_guided_search_radius(w, h, config):
if config.get('guided_search_mode', 'percent') == 'percent':
return max(w, h) * config.get('guided_search_radius_pct', 8.0) / 100.0
return config.get('guided_search_radius_px', 80)