ContourFuse / construct_correct_neurons.py
CompressedGemma's picture
Construct source neuron
944a442 verified
#!/usr/bin/env python3
"""Construct neurons to match two Jacobian boundaries at x1 β‰ˆ -0.665558 and x2 β‰ˆ 0.594541."""
import torch
import math
import os
from safetensors.torch import save_file
# Target boundaries and slopes
boundary_x1 = -0.665558
boundary_x2 = 0.594541
left_slope = 11.0
mid_slope = 1.0
right_slope = 0.5
# Right equation: y = 0.5 * x - 0.2489895
right_eq = lambda x: 0.5 * x - 0.2489895
# Middle equation (continuous at boundary_x2):
c_mid = right_eq(boundary_x2) - mid_slope * boundary_x2
mid_eq = lambda x: mid_slope * x + c_mid
# Left equation (continuous at boundary_x1):
c_left = mid_eq(boundary_x1) - left_slope * boundary_x1
left_eq = lambda x: left_slope * x + c_left
print("Constructing neurons to match:")
print(f" Boundary 1 at x = {boundary_x1}")
print(f" Boundary 2 at x = {boundary_x2}")
print(f" Left slope = {left_slope:4.1f}, range: x < {boundary_x1:.6f}")
print(f" Middle slope = {mid_slope:4.1f}, range: {boundary_x1:.6f} < x < {boundary_x2:.6f}")
print(f" Right slope = {right_slope:4.1f}, range: x > {boundary_x2:.6f}")
# Use native PyTorch tensors to avoid redundant casting overhead
W1 = torch.zeros((8, 1), dtype=torch.float32)
b1 = torch.zeros(8, dtype=torch.float32)
W2 = torch.zeros((1, 8), dtype=torch.float32)
b2 = torch.zeros(1, dtype=torch.float32)
# Neuron 0: Always active pure slope carrier
# FIX: Use a robustly large bias so the neuron doesn't turn off during extreme negative activation outliers
W1[0, 0] = 1.0
b1[0] = 100.0 # Guarantees the carrier stays active for x > -100.0
W2[0, 0] = right_slope
# Neuron 1: Active left of boundary_x1 (adds left_slope - mid_slope = 10.0 to slope)
W1[1, 0] = -1.0
b1[1] = boundary_x1
W2[0, 1] = -(left_slope - mid_slope)
# Neuron 2: Active left of boundary_x2 (adds mid_slope - right_slope = 0.5 to slope)
W1[2, 0] = -1.0
b1[2] = boundary_x2
W2[0, 2] = -(mid_slope - right_slope)
# Neurons 3-7: Inactive (zero weights)
# Calculate exact b2 so that the function matches target_y at boundary_x2
target_y = right_eq(boundary_x2)
neuron0_out = W2[0, 0] * (W1[0, 0] * boundary_x2 + b1[0])
b2[0] = target_y - neuron0_out
print("\nConstructed neuron weights:")
print(f"W1:\n{W1.numpy()}")
print(f"b1: {b1.numpy()}")
print(f"W2:\n{W2.numpy()}")
print(f"b2: {b2.numpy()}")
# Verify the construction natively
def mlp_forward(x):
x_t = torch.tensor([[x]], dtype=torch.float32)
h = x_t @ W1.T + b1
h = torch.relu(h)
y = h @ W2.T + b2
return y.item()
print("\n" + "=" * 60)
print("Automated Verification:")
print("=" * 60)
# Test at Boundary 1
y_b1 = mlp_forward(boundary_x1)
expected_y_b1 = mid_eq(boundary_x1)
assert math.isclose(y_b1, expected_y_b1, rel_tol=1e-4, abs_tol=1e-5), f"Boundary 1 mismatch! Expected {expected_y_b1}, got {y_b1}"
print(f"βœ“ Boundary 1 (x = {boundary_x1}) matched")
# Test at Boundary 2
y_b2 = mlp_forward(boundary_x2)
expected_y_b2 = right_eq(boundary_x2)
assert math.isclose(y_b2, expected_y_b2, rel_tol=1e-4, abs_tol=1e-5), f"Boundary 2 mismatch! Expected {expected_y_b2}, got {y_b2}"
print(f"βœ“ Boundary 2 (x = {boundary_x2}) matched")
# Test left slope
x_left1, x_left2 = boundary_x1 - 0.10, boundary_x1 - 0.05
slope_left = (mlp_forward(x_left2) - mlp_forward(x_left1)) / (x_left2 - x_left1)
assert math.isclose(slope_left, left_slope, rel_tol=1e-4, abs_tol=1e-5), f"Left slope mismatch! Expected {left_slope}, got {slope_left}"
print(f"βœ“ Left slope matched: {slope_left:.4f}")
# Test middle slope
x_mid1, x_mid2 = (boundary_x1 + boundary_x2) / 2, ((boundary_x1 + boundary_x2) / 2) + 0.05
slope_mid = (mlp_forward(x_mid2) - mlp_forward(x_mid1)) / (x_mid2 - x_mid1)
assert math.isclose(slope_mid, mid_slope, rel_tol=1e-4, abs_tol=1e-5), f"Middle slope mismatch! Expected {mid_slope}, got {slope_mid}"
print(f"βœ“ Middle slope matched: {slope_mid:.4f}")
# Test right slope
x_right1, x_right2 = boundary_x2 + 0.05, boundary_x2 + 0.10
slope_right = (mlp_forward(x_right2) - mlp_forward(x_right1)) / (x_right2 - x_right1)
assert math.isclose(slope_right, right_slope, rel_tol=1e-4, abs_tol=1e-5), f"Right slope mismatch! Expected {right_slope}, got {slope_right}"
print(f"βœ“ Right slope matched: {slope_right:.4f}")
os.makedirs("test_mlp_hf", exist_ok=True)
# Save the constructed weights
save_file({
"layer1.weight": W1,
"layer1.bias": b1,
"layer2.weight": W2,
"layer2.bias": b2,
}, "test_mlp_hf/model.safetensors")
print("\nSuccessfully saved constructed neuron to test_mlp_hf/model.safetensors")