import cv2 import numpy as np from PIL import Image import base64 from io import BytesIO PEAK_SMOOTHING_KERNEL, PEAK_MIN_HEIGHT_PCT, PEAK_MIN_DISTANCE = 11, 0.005, 15 PEAK_EDGE_REGION, PEAK_LOCAL_WINDOW, PEAK_EDGE_MIN_MASS_PCT = 50, 5, 0.002 CANNY_LOW, CANNY_HIGH, CANNY_DARK_THRESH, CANNY_BRIGHT_THRESH = 50, 150, 80, 180 CANNY_EDGE_RATIO_THRESHOLD, REGION_DARK_THRESHOLD, REGION_BRIGHT_THRESHOLD = 0.15, 60, 200 XDOG_SIGMA, XDOG_K, XDOG_PHI, XDOG_TAU, XDOG_EPSILON = 0.5, 1.6, 200, 0.98, 0.01 TEMPLATE_SCALE_RANGE, TEMPLATE_PYRAMID_LEVELS = (0.7, 1.3), 5 TEMPLATE_COARSE_STEPS, TEMPLATE_FINE_STEPS, TEMPLATE_TOP_CANDIDATES = 20, 10, 5 # FIX: Added min-height and box-sizing to prevent layout shifts EMPTY = "
Waiting...
" CONFIG = { 'detector_type': 'orb', 'orb_features': 5000, 'use_template_matching': True, 'use_gms': True, 'use_guided_matching': True, 'guided_search_mode': 'percent', 'guided_search_radius_pct': 8.0, 'guided_search_radius_px': 80, 'bidirectional_matching': True, 'min_guided_matches': 8, 'gms_scale': True, 'keypoint_mask_radius': 2, 'ransac_iterations': 10000, 'ransac_threshold': 5.0, 'min_ransac_sample_distance': 30, 'min_inliers': 4, 'scale_min': 0.3, 'scale_max': 3.0, } def get_config(key, default=None): return CONFIG.get(key, default) def check_gms_available(): try: return hasattr(cv2, 'xfeatures2d') and hasattr(cv2.xfeatures2d, 'matchGMS') except: return False GMS_AVAILABLE = check_gms_available() class Size: def __init__(self, width, height): self.width, self.height = width, height def detect_edge_intensities(gray): canny = cv2.Canny(gray, CANNY_LOW, CANNY_HIGH) edge_count = np.sum(canny > 0) if edge_count < 50: return {'has_edges': False, 'dark_edges': False, 'bright_edges': False}, canny edge_y, edge_x = np.where(canny > 0) edge_intensities = gray[edge_y, edge_x] dark_count = np.sum(edge_intensities < CANNY_DARK_THRESH) bright_count = np.sum(edge_intensities > CANNY_BRIGHT_THRESH) dark_ratio, bright_ratio = dark_count / edge_count, bright_count / edge_count return { 'has_edges': True, 'dark_edges': dark_ratio > CANNY_EDGE_RATIO_THRESHOLD, 'bright_edges': bright_ratio > CANNY_EDGE_RATIO_THRESHOLD, 'dark_intensity': float(np.median(edge_intensities[edge_intensities < CANNY_DARK_THRESH])) if dark_count > 20 else None, 'bright_intensity': float(np.median(edge_intensities[edge_intensities > CANNY_BRIGHT_THRESH])) if bright_count > 20 else None }, canny def get_background_color(gray, canny_edges): non_edge_mask = canny_edges == 0 if not np.any(non_edge_mask): return 255 non_edge_pixels = gray[non_edge_mask] return int(np.median(non_edge_pixels)) if len(non_edge_pixels) >= 100 else 255 def analyze_color_distribution(gray): hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten() total = hist.sum() if total == 0: return True, 1, "empty", [], 255 edge_info, canny = detect_edge_intensities(gray) bg_color = get_background_color(gray, canny) hist_smooth = cv2.GaussianBlur(hist.reshape(1, -1).astype(np.float32), (1, PEAK_SMOOTHING_KERNEL), 0).flatten() peaks = [] if edge_info.get('dark_edges', False) and edge_info.get('dark_intensity') is not None: dark_mass = np.sum(hist[:PEAK_EDGE_REGION]) peaks.append((int(edge_info['dark_intensity']), dark_mass, 'dark')) if edge_info.get('bright_edges', False) and edge_info.get('bright_intensity') is not None: bright_mass = np.sum(hist[256-PEAK_EDGE_REGION:]) peaks.append((int(edge_info['bright_intensity']), bright_mass, 'bright')) min_height = total * PEAK_MIN_HEIGHT_PCT for i in range(PEAK_EDGE_REGION, 256 - PEAK_EDGE_REGION): if hist_smooth[i] < min_height: continue is_peak = all(hist_smooth[i] >= hist_smooth[max(0, i-j)] and hist_smooth[i] >= hist_smooth[min(255, i+j)] for j in range(1, PEAK_LOCAL_WINDOW + 1)) if is_peak and not any(abs(p[0] - i) < PEAK_MIN_DISTANCE for p in peaks): peaks.append((i, hist_smooth[i], 'middle')) peaks.sort(key=lambda x: x[0]) n = len(peaks) if n <= 1: return True, max(1, n), "single_peak", peaks, bg_color has_dark = any(p[0] < REGION_DARK_THRESHOLD for p in peaks) has_bright = any(p[0] > REGION_BRIGHT_THRESHOLD for p in peaks) has_middle = any(REGION_DARK_THRESHOLD <= p[0] <= REGION_BRIGHT_THRESHOLD for p in peaks) if has_dark and has_bright and not has_middle: return True, 2, "bimodal", peaks, bg_color return False, n, f"multicolor_{n}peaks", peaks, bg_color def xdog_edge_detection(img, sigma=None, phi=None, tau=None): sigma = sigma or XDOG_SIGMA phi = phi or XDOG_PHI tau = tau or XDOG_TAU gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy() gray = gray.astype(np.float64) / 255.0 ksize1 = max(3, int(np.ceil(sigma * 3)) * 2 + 1) ksize2 = max(3, int(np.ceil(sigma * XDOG_K * 3)) * 2 + 1) g1 = cv2.GaussianBlur(gray, (ksize1, ksize1), sigma) g2 = cv2.GaussianBlur(gray, (ksize2, ksize2), sigma * XDOG_K) dog = g1 - tau * g2 if dog.max() != dog.min(): dog = dog / (dog.max() - dog.min() + XDOG_EPSILON) xdog = np.where(dog >= 0, 1.0, 1.0 + np.tanh(phi * dog)) xdog = ((1.0 - xdog) * 255).astype(np.uint8) _, binary = cv2.threshold(xdog, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return binary def preprocess_lineart(img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy() is_bimodal, n_colors, reason, peaks, bg_color = analyze_color_distribution(gray) if is_bimodal and n_colors <= 2: _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) if np.mean(binary) / 255.0 > 0.5: binary = cv2.bitwise_not(binary) return binary, "otsu", bg_color binary = xdog_edge_detection(img) if np.sum(binary > 0) / binary.size > 0.5: binary = cv2.bitwise_not(binary) return binary, "xdog", bg_color def preprocess_color(img): return xdog_edge_detection(img) def hex_to_rgb(h): if h is None: return (255, 255, 255) if isinstance(h, (tuple, list)): return tuple(int(x) for x in h[:3]) h = str(h).strip().lstrip('#') if len(h) == 3: h = ''.join([c*2 for c in h]) if len(h) != 6: return (255, 255, 255) try: return tuple(int(h[i:i+2], 16) for i in (0, 2, 4)) except: return (255, 255, 255) def flatten_rgba_to_rgb(rgba, bg): if len(rgba.shape) == 2: return cv2.cvtColor(rgba, cv2.COLOR_GRAY2RGB) if rgba.shape[2] == 3: return rgba.copy() alpha = rgba[:, :, 3:4].astype(np.float32) / 255.0 rgb = rgba[:, :, :3].astype(np.float32) bg_array = np.array(bg, dtype=np.float32).reshape(1, 1, 3) return (rgb * alpha + bg_array * (1.0 - alpha)).astype(np.uint8) def cv2_to_pil(img): return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) if img is not None else None def img_to_b64(img): buf = BytesIO() Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(buf, format="PNG") return base64.b64encode(buf.getvalue()).decode() def make_side_by_side(a, b, max_h=500, interp=cv2.INTER_LANCZOS4): target_h = min(max_h, max(a.shape[0], b.shape[0])) s1, s2 = target_h / a.shape[0], target_h / b.shape[0] r1 = cv2.resize(a, (int(a.shape[1]*s1), int(a.shape[0]*s1)), interpolation=interp) r2 = cv2.resize(b, (int(b.shape[1]*s2), int(b.shape[0]*s2)), interpolation=interp) h = max(r1.shape[0], r2.shape[0]) if r1.shape[0] < h: r1 = np.vstack([r1, np.ones((h-r1.shape[0], r1.shape[1], 3), np.uint8)*255]) if r2.shape[0] < h: r2 = np.vstack([r2, np.ones((h-r2.shape[0], r2.shape[1], 3), np.uint8)*255]) return np.hstack([r1, np.ones((h, 10, 3), np.uint8)*200, r2]) def make_svg(a, b, kp1, kp2, matches, max_m=200, interp=cv2.INTER_LANCZOS4): # FIX: Define consistent container height container_height = 550 if not matches: # FIX: Return container with same fixed height as content containers return f"
No matches
" m, total = matches[:max_m], len(matches) target_h = min(500, max(a.shape[0], b.shape[0])) s1, s2 = target_h / a.shape[0], target_h / b.shape[0] h1, w1 = int(a.shape[0] * s1), int(a.shape[1] * s1) h2, w2 = int(b.shape[0] * s2), int(b.shape[1] * s2) gap, final_h, total_w = 10, max(h1, h2), w1 + 10 + w2 a_r = cv2.resize(a, (w1, h1), interpolation=interp) b_r = cv2.resize(b, (w2, h2), interpolation=interp) np.random.seed(42) c = np.random.randint(50, 255, (len(m), 3)) lines = ''.join(f'' for i, x in enumerate(m)) circles1 = ''.join(f'' for i, x in enumerate(m)) circles2 = ''.join(f'' for i, x in enumerate(m)) info = f"{len(m)} of {total} matches" if len(m) < total else f"{total} matches" # FIX: Wrap in container with fixed height and use preserveAspectRatio return f'''
{info}
{lines}{circles1}{circles2}
''' def edge_filter(matches, kp2, edges, r): h, w, out = edges.shape[0], edges.shape[1], [] for m in matches: y, x = int(kp2[m.trainIdx].pt[1]), int(kp2[m.trainIdx].pt[0]) if np.any(edges[max(0,y-r):min(h,y+r+1), max(0,x-r):min(w,x+r+1)]): out.append(m) return out def filter_keypoints_by_mask(keypoints, descriptors, mask, radius=2): if keypoints is None or descriptors is None or len(keypoints) == 0: return [], None h, w = mask.shape[:2] keep_idx = [] for i, kp in enumerate(keypoints): x, y = int(round(kp.pt[0])), int(round(kp.pt[1])) x1, y1 = max(0, x - radius), max(0, y - radius) x2, y2 = min(w, x + radius + 1), min(h, y + radius + 1) if x2 > x1 and y2 > y1 and np.any(mask[y1:y2, x1:x2] > 0): keep_idx.append(i) if not keep_idx: return list(keypoints), descriptors return [keypoints[i] for i in keep_idx], descriptors[keep_idx] def visualize_template_match(lineart, color, scale, tx, ty, score): h, w = color.shape[:2] nw, nh = int(lineart.shape[1] * scale), int(lineart.shape[0] * scale) if nw <= 0 or nh <= 0: return color.copy() scaled = cv2.resize(lineart, (nw, nh), interpolation=cv2.INTER_LANCZOS4) overlay = color.copy() txi, tyi = int(tx), int(ty) dx1, dy1 = max(0, txi), max(0, tyi) dx2, dy2 = min(w, txi + nw), min(h, tyi + nh) sx1, sy1 = max(0, -txi), max(0, -tyi) if dy2 > dy1 and dx2 > dx1: h_copy, w_copy = dy2 - dy1, dx2 - dx1 roi = overlay[dy1:dy2, dx1:dx2] scaled_roi = scaled[sy1:sy1+h_copy, sx1:sx1+w_copy] if len(scaled_roi.shape) == 2: scaled_roi = cv2.cvtColor(scaled_roi, cv2.COLOR_GRAY2BGR) overlay[dy1:dy2, dx1:dx2] = cv2.addWeighted(roi, 0.5, scaled_roi, 0.5, 0) cv2.rectangle(overlay, (dx1, dy1), (dx2-1, dy2-1), (0, 255, 0), 2) font = cv2.FONT_HERSHEY_SIMPLEX text = f"Template: scale={scale:.3f} tx={tx:.0f} ty={ty:.0f} score={score:.3f}" cv2.putText(overlay, text, (10, 30), font, 0.7, (0, 255, 0), 2) return overlay def resize_image_match(finished_img, lineart_size): finished_width, finished_height = finished_img.size lineart_width, lineart_height = lineart_size finished_longest = max(finished_width, finished_height) finished_shortest = min(finished_width, finished_height) lineart_longest = max(lineart_width, lineart_height) lineart_shortest = min(lineart_width, lineart_height) if finished_longest > lineart_longest: scale = lineart_longest / finished_longest else: scale = lineart_shortest / finished_shortest new_width = int(finished_width * scale) new_height = int(finished_height * scale) return finished_img.resize((new_width, new_height), Image.LANCZOS) def get_guided_search_radius(w, h, config): if config.get('guided_search_mode', 'percent') == 'percent': return max(w, h) * config.get('guided_search_radius_pct', 8.0) / 100.0 return config.get('guided_search_radius_px', 80)