Spaces:

mid2
/

Image_Alignment_with_GMS

Sleeping

App Files Files Community

Image_Alignment_with_GMS / utils.py

mid2

Update utils.py

dc0c09a verified 4 months ago

raw

history blame contribute delete

13.8 kB

	import cv2
	import numpy as np
	from PIL import Image
	import base64
	from io import BytesIO

	PEAK_SMOOTHING_KERNEL, PEAK_MIN_HEIGHT_PCT, PEAK_MIN_DISTANCE = 11, 0.005, 15
	PEAK_EDGE_REGION, PEAK_LOCAL_WINDOW, PEAK_EDGE_MIN_MASS_PCT = 50, 5, 0.002
	CANNY_LOW, CANNY_HIGH, CANNY_DARK_THRESH, CANNY_BRIGHT_THRESH = 50, 150, 80, 180
	CANNY_EDGE_RATIO_THRESHOLD, REGION_DARK_THRESHOLD, REGION_BRIGHT_THRESHOLD = 0.15, 60, 200
	XDOG_SIGMA, XDOG_K, XDOG_PHI, XDOG_TAU, XDOG_EPSILON = 0.5, 1.6, 200, 0.98, 0.01
	TEMPLATE_SCALE_RANGE, TEMPLATE_PYRAMID_LEVELS = (0.7, 1.3), 5
	TEMPLATE_COARSE_STEPS, TEMPLATE_FINE_STEPS, TEMPLATE_TOP_CANDIDATES = 20, 10, 5

	# FIX: Added min-height and box-sizing to prevent layout shifts
	EMPTY = "<div style='min-height:550px;height:550px;padding:30px;color:#999;text-align:center;display:flex;align-items:center;justify-content:center;box-sizing:border-box;'>Waiting...</div>"

	CONFIG = {
	'detector_type': 'orb',
	'orb_features': 5000,
	'use_template_matching': True,
	'use_gms': True,
	'use_guided_matching': True,
	'guided_search_mode': 'percent',
	'guided_search_radius_pct': 8.0,
	'guided_search_radius_px': 80,
	'bidirectional_matching': True,
	'min_guided_matches': 8,
	'gms_scale': True,
	'keypoint_mask_radius': 2,
	'ransac_iterations': 10000,
	'ransac_threshold': 5.0,
	'min_ransac_sample_distance': 30,
	'min_inliers': 4,
	'scale_min': 0.3,
	'scale_max': 3.0,
	}

	def get_config(key, default=None):
	return CONFIG.get(key, default)

	def check_gms_available():
	try:
	return hasattr(cv2, 'xfeatures2d') and hasattr(cv2.xfeatures2d, 'matchGMS')
	except:
	return False

	GMS_AVAILABLE = check_gms_available()

	class Size:
	def __init__(self, width, height):
	self.width, self.height = width, height

	def detect_edge_intensities(gray):
	canny = cv2.Canny(gray, CANNY_LOW, CANNY_HIGH)
	edge_count = np.sum(canny > 0)
	if edge_count < 50:
	return {'has_edges': False, 'dark_edges': False, 'bright_edges': False}, canny
	edge_y, edge_x = np.where(canny > 0)
	edge_intensities = gray[edge_y, edge_x]
	dark_count = np.sum(edge_intensities < CANNY_DARK_THRESH)
	bright_count = np.sum(edge_intensities > CANNY_BRIGHT_THRESH)
	dark_ratio, bright_ratio = dark_count / edge_count, bright_count / edge_count
	return {
	'has_edges': True,
	'dark_edges': dark_ratio > CANNY_EDGE_RATIO_THRESHOLD,
	'bright_edges': bright_ratio > CANNY_EDGE_RATIO_THRESHOLD,
	'dark_intensity': float(np.median(edge_intensities[edge_intensities < CANNY_DARK_THRESH])) if dark_count > 20 else None,
	'bright_intensity': float(np.median(edge_intensities[edge_intensities > CANNY_BRIGHT_THRESH])) if bright_count > 20 else None
	}, canny

	def get_background_color(gray, canny_edges):
	non_edge_mask = canny_edges == 0
	if not np.any(non_edge_mask):
	return 255
	non_edge_pixels = gray[non_edge_mask]
	return int(np.median(non_edge_pixels)) if len(non_edge_pixels) >= 100 else 255

	def analyze_color_distribution(gray):
	hist = cv2.calcHist([gray], [0], None, [256], [0, 256]).flatten()
	total = hist.sum()
	if total == 0:
	return True, 1, "empty", [], 255
	edge_info, canny = detect_edge_intensities(gray)
	bg_color = get_background_color(gray, canny)
	hist_smooth = cv2.GaussianBlur(hist.reshape(1, -1).astype(np.float32), (1, PEAK_SMOOTHING_KERNEL), 0).flatten()
	peaks = []
	if edge_info.get('dark_edges', False) and edge_info.get('dark_intensity') is not None:
	dark_mass = np.sum(hist[:PEAK_EDGE_REGION])
	peaks.append((int(edge_info['dark_intensity']), dark_mass, 'dark'))
	if edge_info.get('bright_edges', False) and edge_info.get('bright_intensity') is not None:
	bright_mass = np.sum(hist[256-PEAK_EDGE_REGION:])
	peaks.append((int(edge_info['bright_intensity']), bright_mass, 'bright'))
	min_height = total * PEAK_MIN_HEIGHT_PCT
	for i in range(PEAK_EDGE_REGION, 256 - PEAK_EDGE_REGION):
	if hist_smooth[i] < min_height:
	continue
	is_peak = all(hist_smooth[i] >= hist_smooth[max(0, i-j)] and hist_smooth[i] >= hist_smooth[min(255, i+j)] for j in range(1, PEAK_LOCAL_WINDOW + 1))
	if is_peak and not any(abs(p[0] - i) < PEAK_MIN_DISTANCE for p in peaks):
	peaks.append((i, hist_smooth[i], 'middle'))
	peaks.sort(key=lambda x: x[0])
	n = len(peaks)
	if n <= 1:
	return True, max(1, n), "single_peak", peaks, bg_color
	has_dark = any(p[0] < REGION_DARK_THRESHOLD for p in peaks)
	has_bright = any(p[0] > REGION_BRIGHT_THRESHOLD for p in peaks)
	has_middle = any(REGION_DARK_THRESHOLD <= p[0] <= REGION_BRIGHT_THRESHOLD for p in peaks)
	if has_dark and has_bright and not has_middle:
	return True, 2, "bimodal", peaks, bg_color
	return False, n, f"multicolor_{n}peaks", peaks, bg_color

	def xdog_edge_detection(img, sigma=None, phi=None, tau=None):
	sigma = sigma or XDOG_SIGMA
	phi = phi or XDOG_PHI
	tau = tau or XDOG_TAU
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy()
	gray = gray.astype(np.float64) / 255.0
	ksize1 = max(3, int(np.ceil(sigma * 3)) * 2 + 1)
	ksize2 = max(3, int(np.ceil(sigma * XDOG_K * 3)) * 2 + 1)
	g1 = cv2.GaussianBlur(gray, (ksize1, ksize1), sigma)
	g2 = cv2.GaussianBlur(gray, (ksize2, ksize2), sigma * XDOG_K)
	dog = g1 - tau * g2
	if dog.max() != dog.min():
	dog = dog / (dog.max() - dog.min() + XDOG_EPSILON)
	xdog = np.where(dog >= 0, 1.0, 1.0 + np.tanh(phi * dog))
	xdog = ((1.0 - xdog) * 255).astype(np.uint8)
	_, binary = cv2.threshold(xdog, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	return binary

	def preprocess_lineart(img):
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if len(img.shape) == 3 else img.copy()
	is_bimodal, n_colors, reason, peaks, bg_color = analyze_color_distribution(gray)
	if is_bimodal and n_colors <= 2:
	_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
	if np.mean(binary) / 255.0 > 0.5:
	binary = cv2.bitwise_not(binary)
	return binary, "otsu", bg_color
	binary = xdog_edge_detection(img)
	if np.sum(binary > 0) / binary.size > 0.5:
	binary = cv2.bitwise_not(binary)
	return binary, "xdog", bg_color

	def preprocess_color(img):
	return xdog_edge_detection(img)

	def hex_to_rgb(h):
	if h is None:
	return (255, 255, 255)
	if isinstance(h, (tuple, list)):
	return tuple(int(x) for x in h[:3])
	h = str(h).strip().lstrip('#')
	if len(h) == 3:
	h = ''.join([c*2 for c in h])
	if len(h) != 6:
	return (255, 255, 255)
	try:
	return tuple(int(h[i:i+2], 16) for i in (0, 2, 4))
	except:
	return (255, 255, 255)

	def flatten_rgba_to_rgb(rgba, bg):
	if len(rgba.shape) == 2:
	return cv2.cvtColor(rgba, cv2.COLOR_GRAY2RGB)
	if rgba.shape[2] == 3:
	return rgba.copy()
	alpha = rgba[:, :, 3:4].astype(np.float32) / 255.0
	rgb = rgba[:, :, :3].astype(np.float32)
	bg_array = np.array(bg, dtype=np.float32).reshape(1, 1, 3)
	return (rgb * alpha + bg_array * (1.0 - alpha)).astype(np.uint8)

	def cv2_to_pil(img):
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) if img is not None else None

	def img_to_b64(img):
	buf = BytesIO()
	Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).save(buf, format="PNG")
	return base64.b64encode(buf.getvalue()).decode()

	def make_side_by_side(a, b, max_h=500, interp=cv2.INTER_LANCZOS4):
	target_h = min(max_h, max(a.shape[0], b.shape[0]))
	s1, s2 = target_h / a.shape[0], target_h / b.shape[0]
	r1 = cv2.resize(a, (int(a.shape[1]s1), int(a.shape[0]s1)), interpolation=interp)
	r2 = cv2.resize(b, (int(b.shape[1]s2), int(b.shape[0]s2)), interpolation=interp)
	h = max(r1.shape[0], r2.shape[0])
	if r1.shape[0] < h:
	r1 = np.vstack([r1, np.ones((h-r1.shape[0], r1.shape[1], 3), np.uint8)*255])
	if r2.shape[0] < h:
	r2 = np.vstack([r2, np.ones((h-r2.shape[0], r2.shape[1], 3), np.uint8)*255])
	return np.hstack([r1, np.ones((h, 10, 3), np.uint8)*200, r2])

	def make_svg(a, b, kp1, kp2, matches, max_m=200, interp=cv2.INTER_LANCZOS4):
	# FIX: Define consistent container height
	container_height = 550

	if not matches:
	# FIX: Return container with same fixed height as content containers
	return f"<div style='min-height:{container_height}px;height:{container_height}px;padding:30px;color:#999;text-align:center;display:flex;align-items:center;justify-content:center;box-sizing:border-box;'>No matches</div>"

	m, total = matches[:max_m], len(matches)
	target_h = min(500, max(a.shape[0], b.shape[0]))
	s1, s2 = target_h / a.shape[0], target_h / b.shape[0]
	h1, w1 = int(a.shape[0] * s1), int(a.shape[1] * s1)
	h2, w2 = int(b.shape[0] * s2), int(b.shape[1] * s2)
	gap, final_h, total_w = 10, max(h1, h2), w1 + 10 + w2
	a_r = cv2.resize(a, (w1, h1), interpolation=interp)
	b_r = cv2.resize(b, (w2, h2), interpolation=interp)
	np.random.seed(42)
	c = np.random.randint(50, 255, (len(m), 3))
	lines = ''.join(f'<line x1="{kp1[x.queryIdx].pt[0]s1:.1f}" y1="{kp1[x.queryIdx].pt[1]s1:.1f}" x2="{kp2[x.trainIdx].pt[0]s2 + w1 + gap:.1f}" y2="{kp2[x.trainIdx].pt[1]s2:.1f}" stroke="rgb({c[i,0]},{c[i,1]},{c[i,2]})" stroke-width="1" opacity="0.7"/>' for i, x in enumerate(m))
	circles1 = ''.join(f'<circle cx="{kp1[x.queryIdx].pt[0]s1:.1f}" cy="{kp1[x.queryIdx].pt[1]s1:.1f}" r="3" fill="rgb({c[i,0]},{c[i,1]},{c[i,2]})"/>' for i, x in enumerate(m))
	circles2 = ''.join(f'<circle cx="{kp2[x.trainIdx].pt[0]s2 + w1 + gap:.1f}" cy="{kp2[x.trainIdx].pt[1]s2:.1f}" r="3" fill="rgb({c[i,0]},{c[i,1]},{c[i,2]})"/>' for i, x in enumerate(m))
	info = f"{len(m)} of {total} matches" if len(m) < total else f"{total} matches"

	# FIX: Wrap in container with fixed height and use preserveAspectRatio
	return f'''<div style="min-height:{container_height}px;height:{container_height}px;box-sizing:border-box;overflow:hidden;">
	<div style="font-size:12px;color:#666;margin-bottom:4px;height:20px;">{info}</div>
	<svg viewBox="0 0 {total_w} {final_h}" preserveAspectRatio="xMidYMid meet" style="width:100%;height:{min(final_h, container_height - 30)}px;max-height:{container_height - 30}px;background:#fafafa;display:block;">
	<image href="data:image/png;base64,{img_to_b64(a_r)}" width="{w1}" height="{h1}"/>
	<rect x="{w1}" y="0" width="{gap}" height="{final_h}" fill="#e0e0e0"/>
	<image x="{w1 + gap}" href="data:image/png;base64,{img_to_b64(b_r)}" width="{w2}" height="{h2}"/>
	{lines}{circles1}{circles2}
	</svg></div>'''

	def edge_filter(matches, kp2, edges, r):
	h, w, out = edges.shape[0], edges.shape[1], []
	for m in matches:
	y, x = int(kp2[m.trainIdx].pt[1]), int(kp2[m.trainIdx].pt[0])
	if np.any(edges[max(0,y-r):min(h,y+r+1), max(0,x-r):min(w,x+r+1)]):
	out.append(m)
	return out

	def filter_keypoints_by_mask(keypoints, descriptors, mask, radius=2):
	if keypoints is None or descriptors is None or len(keypoints) == 0:
	return [], None
	h, w = mask.shape[:2]
	keep_idx = []
	for i, kp in enumerate(keypoints):
	x, y = int(round(kp.pt[0])), int(round(kp.pt[1]))
	x1, y1 = max(0, x - radius), max(0, y - radius)
	x2, y2 = min(w, x + radius + 1), min(h, y + radius + 1)
	if x2 > x1 and y2 > y1 and np.any(mask[y1:y2, x1:x2] > 0):
	keep_idx.append(i)
	if not keep_idx:
	return list(keypoints), descriptors
	return [keypoints[i] for i in keep_idx], descriptors[keep_idx]

	def visualize_template_match(lineart, color, scale, tx, ty, score):
	h, w = color.shape[:2]
	nw, nh = int(lineart.shape[1] * scale), int(lineart.shape[0] * scale)
	if nw <= 0 or nh <= 0:
	return color.copy()
	scaled = cv2.resize(lineart, (nw, nh), interpolation=cv2.INTER_LANCZOS4)
	overlay = color.copy()
	txi, tyi = int(tx), int(ty)
	dx1, dy1 = max(0, txi), max(0, tyi)
	dx2, dy2 = min(w, txi + nw), min(h, tyi + nh)
	sx1, sy1 = max(0, -txi), max(0, -tyi)
	if dy2 > dy1 and dx2 > dx1:
	h_copy, w_copy = dy2 - dy1, dx2 - dx1
	roi = overlay[dy1:dy2, dx1:dx2]
	scaled_roi = scaled[sy1:sy1+h_copy, sx1:sx1+w_copy]
	if len(scaled_roi.shape) == 2:
	scaled_roi = cv2.cvtColor(scaled_roi, cv2.COLOR_GRAY2BGR)
	overlay[dy1:dy2, dx1:dx2] = cv2.addWeighted(roi, 0.5, scaled_roi, 0.5, 0)
	cv2.rectangle(overlay, (dx1, dy1), (dx2-1, dy2-1), (0, 255, 0), 2)
	font = cv2.FONT_HERSHEY_SIMPLEX
	text = f"Template: scale={scale:.3f} tx={tx:.0f} ty={ty:.0f} score={score:.3f}"
	cv2.putText(overlay, text, (10, 30), font, 0.7, (0, 255, 0), 2)
	return overlay

	def resize_image_match(finished_img, lineart_size):
	finished_width, finished_height = finished_img.size
	lineart_width, lineart_height = lineart_size
	finished_longest = max(finished_width, finished_height)
	finished_shortest = min(finished_width, finished_height)
	lineart_longest = max(lineart_width, lineart_height)
	lineart_shortest = min(lineart_width, lineart_height)
	if finished_longest > lineart_longest:
	scale = lineart_longest / finished_longest
	else:
	scale = lineart_shortest / finished_shortest
	new_width = int(finished_width * scale)
	new_height = int(finished_height * scale)
	return finished_img.resize((new_width, new_height), Image.LANCZOS)

	def get_guided_search_radius(w, h, config):
	if config.get('guided_search_mode', 'percent') == 'percent':
	return max(w, h) * config.get('guided_search_radius_pct', 8.0) / 100.0
	return config.get('guided_search_radius_px', 80)