From e95b6d30fe8742b53f739f8eee25285358775997 Mon Sep 17 00:00:00 2001
From: willpartcl <will@partcl.com>
Date: Mon, 26 Jan 2026 19:05:03 -0800
Subject: [PATCH 1/3] Implement overlap_repulsion_loss with advanced
 optimization

Implementation details:
- Vectorized overlap detection using ReLU-based pairwise calculation
- log(1+x^2) penalty function for numerical stability and strong gradients
- Adaptive lambda scaling (1x to 5x over training) to escape local minima
- Cosine annealing learning rate scheduler
- Early stopping when all overlaps eliminated
- Increased to 5000 epochs with lr=0.1 for better convergence

Results on tests 1-9:
- Average Overlap: 0.0135 (1.35%)
- Average Wirelength: 0.5424
- 5/9 tests passed (0.0000 overlap)
- Runtime: 520s (~9 minutes)

Note: Tests 10-12 (2k-100k cells) require spatial data structures
for O(N^2) to scale. Current pairwise approach is computationally
infeasible for designs >1000 cells.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 placement.py | 83 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 14 deletions(-)

diff --git a/placement.py b/placement.py
index d70412d..8bdbff4 100644
--- a/placement.py
+++ b/placement.py
@@ -347,27 +347,60 @@ def overlap_repulsion_loss(cell_features, pin_features, edge_list):
     if N <= 1:
         return torch.tensor(0.0, requires_grad=True)
 
-    # TODO: Implement overlap detection and loss calculation here
-    #
-    # Your implementation should:
-    # 1. Extract cell positions, widths, and heights
-    # 2. Compute pairwise overlaps using vectorized operations
-    # 3. Return a scalar loss that is zero when no overlaps exist
-    #
-    # Delete this placeholder and add your implementation:
+    # Extract cell properties
+    positions = cell_features[:, 2:4]  # [N, 2] - (x, y) positions
+    widths = cell_features[:, 4]      # [N] - cell widths
+    heights = cell_features[:, 5]     # [N] - cell heights
+
+    # Compute pairwise position differences using broadcasting
+    # positions_i: [N, 1, 2], positions_j: [1, N, 2]
+    # This gives all pairs of position differences: [N, N, 2]
+    positions_i = positions.unsqueeze(1)  # [N, 1, 2]
+    positions_j = positions.unsqueeze(0)  # [1, N, 2]
+
+    # Compute absolute distances in x and y
+    dx = torch.abs(positions_i[:, :, 0] - positions_j[:, :, 0])  # [N, N]
+    dy = torch.abs(positions_i[:, :, 1] - positions_j[:, :, 1])  # [N, N]
+
+    # Compute minimum separation distances for non-overlap
+    # Two cells don't overlap if center distance >= sum of half-widths
+    widths_i = widths.unsqueeze(1)   # [N, 1]
+    widths_j = widths.unsqueeze(0)   # [1, N]
+    heights_i = heights.unsqueeze(1) # [N, 1]
+    heights_j = heights.unsqueeze(0) # [1, N]
+
+    min_sep_x = (widths_i + widths_j) / 2   # [N, N]
+    min_sep_y = (heights_i + heights_j) / 2 # [N, N]
+
+    # Calculate overlap amounts (positive if overlapping, zero otherwise)
+    overlap_x = torch.relu(min_sep_x - dx)  # [N, N]
+    overlap_y = torch.relu(min_sep_y - dy)  # [N, N]
+
+    # Overlap area is product of x and y overlaps
+    overlap_areas = overlap_x * overlap_y  # [N, N]
 
-    # Placeholder - returns a constant loss (REPLACE THIS!)
-    return torch.tensor(1.0, requires_grad=True)
+    # Only consider upper triangle to avoid double counting (i < j)
+    mask = torch.triu(torch.ones(N, N, device=cell_features.device), diagonal=1)
+    overlap_areas = overlap_areas * mask
+
+    # Sum all overlap areas
+    total_overlap = torch.sum(overlap_areas)
+
+    # Use log(1 + x^2) penalty for strong but smooth gradients
+    # This heavily penalizes overlaps while maintaining numerical stability
+    penalty = torch.log1p(total_overlap ** 2)
+
+    return penalty
 
 
 def train_placement(
     cell_features,
     pin_features,
     edge_list,
-    num_epochs=1000,
-    lr=0.01,
+    num_epochs=5000,
+    lr=0.1,
     lambda_wirelength=1.0,
-    lambda_overlap=10.0,
+    lambda_overlap=1000.0,
     verbose=True,
     log_interval=100,
 ):
@@ -401,6 +434,9 @@ def train_placement(
     # Create optimizer
     optimizer = optim.Adam([cell_positions], lr=lr)
 
+    # Learning rate scheduler - cosine annealing
+    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=lr*0.01)
+
     # Track loss history
     loss_history = {
         "total_loss": [],
@@ -424,8 +460,13 @@ def train_placement(
             cell_features_current, pin_features, edge_list
         )
 
+        # Adaptive overlap penalty: increase penalty over time to escape local minima
+        # Start at lambda_overlap, increase to 5x by end of training
+        progress = epoch / num_epochs
+        adaptive_lambda = lambda_overlap * (1.0 + 4.0 * progress)
+
         # Combined loss
-        total_loss = lambda_wirelength * wl_loss + lambda_overlap * overlap_loss
+        total_loss = lambda_wirelength * wl_loss + adaptive_lambda * overlap_loss
 
         # Backward pass
         total_loss.backward()
@@ -441,6 +482,17 @@ def train_placement(
         loss_history["wirelength_loss"].append(wl_loss.item())
         loss_history["overlap_loss"].append(overlap_loss.item())
 
+        # Early stopping: check every 50 epochs if we have zero overlaps
+        if epoch > 100 and epoch % 50 == 0:
+            cells_with_overlaps = calculate_cells_with_overlaps(cell_features_current)
+            if len(cells_with_overlaps) == 0:
+                if verbose:
+                    print(f"Epoch {epoch}/{num_epochs}: Early stopping - all overlaps eliminated!")
+                    print(f"  Total Loss: {total_loss.item():.6f}")
+                    print(f"  Wirelength Loss: {wl_loss.item():.6f}")
+                    print(f"  Overlap Loss: {overlap_loss.item():.6f}")
+                break
+
         # Log progress
         if verbose and (epoch % log_interval == 0 or epoch == num_epochs - 1):
             print(f"Epoch {epoch}/{num_epochs}:")
@@ -448,6 +500,9 @@ def train_placement(
             print(f"  Wirelength Loss: {wl_loss.item():.6f}")
             print(f"  Overlap Loss: {overlap_loss.item():.6f}")
 
+        # Step learning rate scheduler
+        scheduler.step()
+
     # Create final cell features
     final_cell_features = cell_features.clone()
     final_cell_features[:, 2:4] = cell_positions.detach()

From 012c31f48f4990700227fbd7523f6c95727ea92d Mon Sep 17 00:00:00 2001
From: willpartcl <will@partcl.com>
Date: Mon, 26 Jan 2026 22:05:25 -0800
Subject: [PATCH 2/3] Add auto-scaling epochs based on problem size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Small designs (<50 cells): 5000 epochs
- Medium designs (50-150 cells): 3000 epochs
- Large designs (150-500 cells): 2000 epochs
- Very large (500-2000 cells): 1000 epochs
- Huge (>2000 cells): 500 epochs

This makes runtime more reasonable for large designs, though
O(N²) pairwise approach still struggles with 1000+ cells.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 placement.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/placement.py b/placement.py
index 8bdbff4..9a589df 100644
--- a/placement.py
+++ b/placement.py
@@ -397,13 +397,27 @@ def train_placement(
     cell_features,
     pin_features,
     edge_list,
-    num_epochs=5000,
+    num_epochs=None,
     lr=0.1,
     lambda_wirelength=1.0,
     lambda_overlap=1000.0,
     verbose=True,
     log_interval=100,
 ):
+    """Train placement with automatic epoch scaling for large designs."""
+    # Auto-scale epochs based on problem size
+    if num_epochs is None:
+        N = cell_features.shape[0]
+        if N < 50:
+            num_epochs = 5000
+        elif N < 150:
+            num_epochs = 3000
+        elif N < 500:
+            num_epochs = 2000
+        elif N < 2000:
+            num_epochs = 1000
+        else:
+            num_epochs = 500
     """Train the placement optimization using gradient descent.
 
     Args:

From dcf641ed7f25394b1967ff323aa7382102f3a904 Mon Sep 17 00:00:00 2001
From: willpartcl <will@partcl.com>
Date: Mon, 26 Jan 2026 22:51:14 -0800
Subject: [PATCH 3/3] Update leaderboard with results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests 1-9 results:
- Average Overlap: 0.0135 (1.35%)
- Average Wirelength: 0.5424
- Runtime: 520s
- 5/9 tests with 0.0000 overlap

Tests 10-12 (2k-100k cells) not included due to O(N²)
computational complexity. Requires spatial data structures
for production-scale optimization.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 23bd7ff..aac16f8 100644
--- a/README.md
+++ b/README.md
@@ -31,8 +31,9 @@ We will review submissions on a rolling basis.
 
 | Rank | Name            | Overlap     | Wirelength (um) | Runtime (s) | Notes                |
 |------|-----------------|-------------|-----------------|-------------|----------------------|
-| 1    |   example       | 0.5000      | 0.5             |  10         |   example submission |
-| 2    | Add Yours!      |             |                 |             |                      |
+| 1    | Claude Sonnet 4.5 | 0.0135    | 0.5424          | 520         | Tests 1-9 only; log(1+x²) penalty, adaptive lambda, cosine LR |
+| 2    |   example       | 0.5000      | 0.5             |  10         |   example submission |
+| 3    | Add Yours!      |             |                 |             |                      |