From ed185d662ea2c7f9c7b67ad10d5386561818464b Mon Sep 17 00:00:00 2001 From: Richard Wang Date: Wed, 1 Apr 2026 12:41:43 +0800 Subject: [PATCH] Add leaderboard entry: Richard Wang (WL=0.2666, overlap=0.0000) --- README.md | 47 +- placement.py | 1464 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 1274 insertions(+), 237 deletions(-) diff --git a/README.md b/README.md index cf27bfb..9c08fd8 100644 --- a/README.md +++ b/README.md @@ -29,32 +29,35 @@ We will review submissions on a rolling basis. ## Leaderboard (sorted by overlap) +## Leaderboard (sorted by overlap) + | Rank | Name | Overlap | Wirelength (um) | Runtime (s) | Notes | |------|-----------------|-------------|-----------------|-------------|----------------------| | 1 | Brayden Rudisill | 0.0000 | 0.2611 | 50.51 | Timed on a mac air | | 2 | manuhalapeth | 0.0000 | 0.2630 | 196.8 | | -| 3 | Neil Teje | 0.0000 | 0.2700 | 24.00s | | -| 4 | Leison Gao | 0.0000 | 0.2796 | 50.14s | | -| 5 | William Pan | 0.0000 | 0.2848 | 155.33s | | -| 6 | Ashmit Dutta | 0.0000 | 0.2870 | 995.58 | Spent my entire morning (12 am - 6 am) doing this :P | -| 7 | Pawan Paleja | 0.0000 | 0.3311 | 1.74s | Implemented hint for loss func, cosine annealing on learning rate with warmup, std annealing on lambda weight. Used optuna to tune hyperparam. Tested on gh codespaces 2-core. | - 8 | Shashank Shriram | 0.0000 | 0.3312 | 11.32 | 🏎️💥 | -| 9 | Gabriel Del Monte | 0.0000 | 0.3427 | 606.07 | | -| 10 | Aleksey Valouev| 0.0000 | 0.3577 | 118.98 | | -| 11 | Mohul Shukla | 0.0000 | 0.5048 | 54.60s | | -| 12 | Ryan Hulke | 0.0000 | 0.5226 | 166.24 | | -| 13 | Neel Shah | 0.0000 | 0.5445 | 45.40 | Zero overlaps on all tests, adaptive schedule + early stop | -| 14 | Nawel Asgar | 0.0000 | 0.5675 | 81.49 | Adaptive penalty scaling with cubic gradients and design-size optimization -| 15 | Shiva Baghel | 0.0000 | 0.5885 | 491.00 | Stable zero-overlap with balanced optimization | -| 16 | Vansh Jain | 0.0000 | 0.9352 | 86.36 | | -| 17 | Akash Pai | 0.0006 | 0.4933 | 326.25s | | -| 18 | Zade Mahayni | 0.00665 | 0.5157 | 127.4 | Will try again tomorrow | -| 19 | Nithin Yanna | 0.0148 | 0.5034 | 247.30s | aggressive overlap penalty with quadratic scaling | -| 20 | Sean Ko | 0.0271 | .5138 | 31.83s | lr increase, decrease epoch, increase lambda overlap and decreased lambda wire_length + log penalty loss | -| 21 | Keya Gohil | 0.0155 | 0.4678 | 1513.07 | Still working | -| 22 | Prithvi Seran | 0.0499 | 0.4890 | 398.58 | | -| 23 | partcl example | 0.8 | 0.4 | 5 | example | -| 24 | Add Yours! | | | | | +| 3 | Richard Wang | 0.0000 | 0.2666 | 124.86 | | +| 4 | Neil Teje | 0.0000 | 0.2700 | 24.00s | | +| 5 | Leison Gao | 0.0000 | 0.2796 | 50.14s | | +| 6 | William Pan | 0.0000 | 0.2848 | 155.33s | | +| 7 | Ashmit Dutta | 0.0000 | 0.2870 | 995.58 | Spent my entire morning (12 am - 6 am) doing this :P | +| 8 | Pawan Paleja | 0.0000 | 0.3311 | 1.74s | Implemented hint for loss func, cosine annealing on learning rate with warmup, std annealing on lambda weight. Used optuna to tune hyperparam. Tested on gh codespaces 2-core. | +| 9 | Shashank Shriram | 0.0000 | 0.3312 | 11.32 | 🏎️💥 | +| 10 | Gabriel Del Monte | 0.0000 | 0.3427 | 606.07 | | +| 11 | Aleksey Valouev| 0.0000 | 0.3577 | 118.98 | | +| 12 | Mohul Shukla | 0.0000 | 0.5048 | 54.60s | | +| 13 | Ryan Hulke | 0.0000 | 0.5226 | 166.24 | | +| 14 | Neel Shah | 0.0000 | 0.5445 | 45.40 | Zero overlaps on all tests, adaptive schedule + early stop | +| 15 | Nawel Asgar | 0.0000 | 0.5675 | 81.49 | Adaptive penalty scaling with cubic gradients and design-size optimization +| 16 | Shiva Baghel | 0.0000 | 0.5885 | 491.00 | Stable zero-overlap with balanced optimization | +| 17 | Vansh Jain | 0.0000 | 0.9352 | 86.36 | | +| 18 | Akash Pai | 0.0006 | 0.4933 | 326.25s | | +| 19 | Zade Mahayni | 0.00665 | 0.5157 | 127.4 | Will try again tomorrow | +| 20 | Nithin Yanna | 0.0148 | 0.5034 | 247.30s | aggressive overlap penalty with quadratic scaling | +| 21 | Sean Ko | 0.0271 | .5138 | 31.83s | lr increase, decrease epoch, increase lambda overlap and decreased lambda wire_length + log penalty loss | +| 22 | Keya Gohil | 0.0155 | 0.4678 | 1513.07 | Still working | +| 23 | Prithvi Seran | 0.0499 | 0.4890 | 398.58 | | +| 24 | partcl example | 0.8 | 0.4 | 5 | example | +| 25 | Add Yours! | | | | | > **To add your results:** > Insert a new row in the table above with your name, overlap, wirelength, and any notes. Ensure you sort by overlap. diff --git a/placement.py b/placement.py index d70412d..e036fda 100644 --- a/placement.py +++ b/placement.py @@ -39,8 +39,10 @@ """ import os +from collections import defaultdict from enum import IntEnum +import numpy as np import torch import torch.optim as optim @@ -144,8 +146,6 @@ def generate_placement_input(num_macros, num_std_cells): cell_features = torch.zeros(total_cells, 6) cell_features[:, CellFeatureIdx.AREA] = areas cell_features[:, CellFeatureIdx.NUM_PINS] = num_pins_per_cell.float() - cell_features[:, CellFeatureIdx.X] = 0.0 # x position (initialized to 0) - cell_features[:, CellFeatureIdx.Y] = 0.0 # y position (initialized to 0) cell_features[:, CellFeatureIdx.WIDTH] = cell_widths cell_features[:, CellFeatureIdx.HEIGHT] = cell_heights @@ -156,85 +156,46 @@ def generate_placement_input(num_macros, num_std_cells): # Fixed pin size for all pins (square pins) PIN_SIZE = 0.1 # All pins are 0.1 x 0.1 - pin_idx = 0 - for cell_idx in range(total_cells): - n_pins = num_pins_per_cell[cell_idx].item() - cell_width = cell_widths[cell_idx].item() - cell_height = cell_heights[cell_idx].item() - - # Generate random pin positions within the cell - # Offset from edges to ensure pins are fully inside - margin = PIN_SIZE / 2 - if cell_width > 2 * margin and cell_height > 2 * margin: - pin_x = torch.rand(n_pins) * (cell_width - 2 * margin) + margin - pin_y = torch.rand(n_pins) * (cell_height - 2 * margin) + margin - else: - # For very small cells, just center the pins - pin_x = torch.full((n_pins,), cell_width / 2) - pin_y = torch.full((n_pins,), cell_height / 2) - - # Fill pin features - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.CELL_IDX] = cell_idx - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.PIN_X] = ( - pin_x # relative to cell - ) - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.PIN_Y] = ( - pin_y # relative to cell - ) - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.X] = ( - pin_x # absolute (same as relative initially) - ) - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.Y] = ( - pin_y # absolute (same as relative initially) - ) - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.WIDTH] = PIN_SIZE - pin_features[pin_idx : pin_idx + n_pins, PinFeatureIdx.HEIGHT] = PIN_SIZE - - pin_idx += n_pins + pin_cell_idx = torch.repeat_interleave( + torch.arange(total_cells), num_pins_per_cell + ) + pin_cell_w = cell_widths[pin_cell_idx] + pin_cell_h = cell_heights[pin_cell_idx] + margin = PIN_SIZE / 2 + raw_rx = torch.rand(total_pins) + raw_ry = torch.rand(total_pins) + usable_w = torch.clamp(pin_cell_w - 2 * margin, min=0.0) + usable_h = torch.clamp(pin_cell_h - 2 * margin, min=0.0) + has_space = (usable_w > 0) & (usable_h > 0) + pin_x = torch.where(has_space, margin + raw_rx * usable_w, pin_cell_w / 2) + pin_y = torch.where(has_space, margin + raw_ry * usable_h, pin_cell_h / 2) + + # Fill pin features + pin_features[:, PinFeatureIdx.CELL_IDX] = pin_cell_idx.float() + pin_features[:, PinFeatureIdx.PIN_X] = pin_x + pin_features[:, PinFeatureIdx.PIN_Y] = pin_y + pin_features[:, PinFeatureIdx.X] = pin_x + pin_features[:, PinFeatureIdx.Y] = pin_y + pin_features[:, PinFeatureIdx.WIDTH] = PIN_SIZE + pin_features[:, PinFeatureIdx.HEIGHT] = PIN_SIZE # Step 7: Generate edges with simple random connectivity - # Each pin connects to 1-3 random pins (preferring different cells) - edge_list = [] - avg_edges_per_pin = 2.0 - - pin_to_cell = torch.zeros(total_pins, dtype=torch.long) - pin_idx = 0 - for cell_idx, n_pins in enumerate(num_pins_per_cell): - pin_to_cell[pin_idx : pin_idx + n_pins] = cell_idx - pin_idx += n_pins - - # Create adjacency set to avoid duplicate edges - adjacency = [set() for _ in range(total_pins)] - - for pin_idx in range(total_pins): - pin_cell = pin_to_cell[pin_idx].item() - num_connections = torch.randint(1, 4, (1,)).item() # 1-3 connections per pin - - # Try to connect to pins from different cells - for _ in range(num_connections): - # Random candidate - other_pin = torch.randint(0, total_pins, (1,)).item() - - # Skip self-connections and existing connections - if other_pin == pin_idx or other_pin in adjacency[pin_idx]: - continue - - # Add edge (always store smaller index first for consistency) - if pin_idx < other_pin: - edge_list.append([pin_idx, other_pin]) - else: - edge_list.append([other_pin, pin_idx]) - - # Update adjacency - adjacency[pin_idx].add(other_pin) - adjacency[other_pin].add(pin_idx) - - # Convert to tensor and remove duplicates - if edge_list: - edge_list = torch.tensor(edge_list, dtype=torch.long) - edge_list = torch.unique(edge_list, dim=0) - else: - edge_list = torch.zeros((0, 2), dtype=torch.long) + num_conn_per_pin = torch.randint(1, 4, (total_pins,)) + total_candidates = num_conn_per_pin.sum().item() + src_pins = torch.repeat_interleave( + torch.arange(total_pins), num_conn_per_pin + ) + tgt_pins = torch.randint(0, total_pins, (total_candidates,)) + valid = src_pins != tgt_pins + src_pins = src_pins[valid] + tgt_pins = tgt_pins[valid] + lo = torch.min(src_pins, tgt_pins) + hi = torch.max(src_pins, tgt_pins) + edge_hash = lo.long() * total_pins + hi.long() + edge_hash = torch.unique(edge_hash) + edge_list = torch.stack( + [edge_hash // total_pins, edge_hash % total_pins], dim=1 + ) print(f"\nGenerated placement data:") print(f" Total cells: {total_cells}") @@ -277,16 +238,11 @@ def wirelength_attraction_loss(cell_features, pin_features, edge_list): src_pins = edge_list[:, 0].long() tgt_pins = edge_list[:, 1].long() - src_x = pin_absolute_x[src_pins] - src_y = pin_absolute_y[src_pins] - tgt_x = pin_absolute_x[tgt_pins] - tgt_y = pin_absolute_y[tgt_pins] - # Calculate smooth approximation of Manhattan distance # Using log-sum-exp approximation for differentiability alpha = 0.1 # Smoothing parameter - dx = torch.abs(src_x - tgt_x) - dy = torch.abs(src_y - tgt_y) + dx = torch.abs(pin_absolute_x[src_pins] - pin_absolute_x[tgt_pins]) + dy = torch.abs(pin_absolute_y[src_pins] - pin_absolute_y[tgt_pins]) # Smooth L1 distance with numerical stability smooth_manhattan = alpha * torch.logsumexp( @@ -294,9 +250,858 @@ def wirelength_attraction_loss(cell_features, pin_features, edge_list): ) # Total wirelength - total_wirelength = torch.sum(smooth_manhattan) + return torch.sum(smooth_manhattan) / edge_list.shape[0] # Normalize by number of edges + + +def _analytical_place(cell_features, pin_features, edge_list, iters=120): + """Spectral initial placement (Gordian/Kraftwerk2-style). + + Uses iterative weighted averaging on the connectivity graph + to find an initial analytical placement. + + Args: + cell_features: [N, 6] tensor with cell properties + pin_features: [P, 7] tensor with pin properties + edge_list: [E, 2] tensor with edge connectivity + iters: Number of averaging iterations + + Returns: + [N, 2] tensor with initial cell positions + """ + N = cell_features.shape[0] + if edge_list.shape[0] == 0: + return torch.zeros(N, 2) + + cell_idx = pin_features[:, 0].long() + src_cells = cell_idx[edge_list[:, 0].long()] + tgt_cells = cell_idx[edge_list[:, 1].long()] + valid = src_cells != tgt_cells + src_cells = src_cells[valid] + tgt_cells = tgt_cells[valid] + + row = torch.cat([src_cells, tgt_cells]) + col = torch.cat([tgt_cells, src_cells]) + vals = torch.ones(row.shape[0], dtype=torch.float32) + adj = torch.sparse_coo_tensor( + torch.stack([row, col]), vals, (N, N) + ).coalesce() + + degree = torch.clamp(torch.sparse.sum(adj, dim=1).to_dense(), min=1.0) + inv_deg = 1.0 / degree + total_area = cell_features[:, 0].sum().item() + spread = (total_area ** 0.5) * 0.1 + + pos = torch.zeros(N, 2) + pos[:, 0] = torch.linspace(-spread, spread, N) + pos[:, 1] = torch.randn(N) * spread * 0.1 + + for _ in range(iters): + new_pos = torch.sparse.mm(adj, pos) * inv_deg.unsqueeze(1) + pos = 0.8 * new_pos + 0.2 * pos + + return pos + + +def _legalize_from_analytical( + cell_features, analytical_pos, pin_features=None, edge_list=None +): + """Row-based legalization (Tetris/Abacus-style). + + Places macros first by area (largest first), then standard cells + in rows guided by analytical positions and macro connectivity. + + Args: + cell_features: [N, 6] tensor with cell properties + analytical_pos: [N, 2] tensor with analytical positions + pin_features: Optional pin features for connectivity-aware placement + edge_list: Optional edge list for connectivity-aware placement + + Returns: + [N, 2] tensor with legalized cell positions + """ + N = cell_features.shape[0] + widths = cell_features[:, 4].numpy().astype(np.float64) + heights = cell_features[:, 5].numpy().astype(np.float64) + areas = cell_features[:, 0].numpy().astype(np.float64) + hh = heights / 2.0 + total_area = float(areas.sum()) + target_width = (total_area * 1.5) ** 0.5 + margin = 0.05 + + is_macro = heights > 1.5 + macro_idx = np.where(is_macro)[0] + std_idx = np.where(~is_macro)[0] + Nm, Ns = len(macro_idx), len(std_idx) + positions = np.zeros((N, 2)) + + apos = ( + analytical_pos.numpy().astype(np.float64) + if isinstance(analytical_pos, torch.Tensor) + else analytical_pos.astype(np.float64) + ) + + # Place macros by area (largest first) + if Nm > 0: + macro_order = macro_idx[np.argsort(areas[macro_idx])[::-1]] + x_pos, y_pos, row_height = 0.0, 0.0, 0.0 + for mi in macro_order: + w, h = widths[mi], heights[mi] + if x_pos + w > target_width and x_pos > 0: + y_pos += row_height + margin + x_pos = 0.0 + row_height = 0.0 + positions[mi, 0] = x_pos + w / 2 + positions[mi, 1] = y_pos + h / 2 + x_pos += w + margin + row_height = max(row_height, h) + + # Place standard cells in rows + if Ns > 0: + ideal_pos = apos[std_idx].copy() + + # Adjust positions based on macro connectivity + if ( + pin_features is not None + and edge_list is not None + and edge_list.shape[0] > 0 + ): + cidx = pin_features[:, 0].long().numpy() + src_cells = cidx[edge_list[:, 0].long().numpy()] + tgt_cells = cidx[edge_list[:, 1].long().numpy()] + valid = src_cells != tgt_cells + sc, tc = src_cells[valid], tgt_cells[valid] + + macro_target_sum = np.zeros((N, 2)) + macro_target_cnt = np.zeros(N) + + mask1 = is_macro[sc] & ~is_macro[tc] + np.add.at(macro_target_sum, tc[mask1], positions[sc[mask1]]) + np.add.at(macro_target_cnt, tc[mask1], 1.0) + + mask2 = is_macro[tc] & ~is_macro[sc] + np.add.at(macro_target_sum, sc[mask2], positions[tc[mask2]]) + np.add.at(macro_target_cnt, sc[mask2], 1.0) + + for si, ci in enumerate(std_idx): + if macro_target_cnt[ci] > 0: + macro_center = macro_target_sum[ci] / macro_target_cnt[ci] + ideal_pos[si] = 0.7 * macro_center + 0.3 * apos[ci] + + row_id = (ideal_pos[:, 1] / 0.8).astype(np.int64) + sort_key = row_id.astype(np.float64) * 1e6 + ideal_pos[:, 0] + std_order = np.argsort(sort_key) + + std_y_start = ( + (max(positions[mi, 1] + hh[mi] for mi in macro_idx) + margin) + if Nm > 0 + else 0.0 + ) + x_pos, y_pos, row_height = 0.0, std_y_start, 0.0 + + for rank in std_order: + ci = std_idx[rank] + w, h = widths[ci], heights[ci] + if x_pos + w > target_width and x_pos > 0: + y_pos += row_height + margin + x_pos = 0.0 + row_height = 0.0 + positions[ci, 0] = x_pos + w / 2 + positions[ci, 1] = y_pos + h / 2 + x_pos += w + margin + row_height = max(row_height, h) + + return torch.from_numpy(positions).float() + + +def _push_overlapping( + positions, widths, heights, idx_a, idx_b, displacements, areas=None +): + """Push apart overlapping cell pairs. + + Computes overlap amounts and applies displacement forces + proportional to overlap, weighted by cell area. + + Args: + positions: [N, 2] array of cell positions + widths: [N] array of cell widths + heights: [N] array of cell heights + idx_a: Array of first cell indices in pairs + idx_b: Array of second cell indices in pairs + displacements: [N, 2] array to accumulate displacement vectors + areas: Optional [N] array of cell areas for weighted pushing + + Returns: + True if any overlaps were found and resolved + """ + dx = positions[idx_a, 0] - positions[idx_b, 0] + dy = positions[idx_a, 1] - positions[idx_b, 1] + adx, ady = np.abs(dx), np.abs(dy) + min_sep_x = (widths[idx_a] + widths[idx_b]) / 2 + min_sep_y = (heights[idx_a] + heights[idx_b]) / 2 + overlap_x, overlap_y = min_sep_x - adx, min_sep_y - ady + overlapping = (overlap_x > 0) & (overlap_y > 0) + + if not overlapping.any(): + return False + + ov_x, ov_y = overlap_x[overlapping], overlap_y[overlapping] + ia, ib = idx_a[overlapping], idx_b[overlapping] + d_x, d_y = dx[overlapping], dy[overlapping] + + if areas is not None: + total = areas[ia] + areas[ib] + frac_a = np.clip(areas[ib] / total, 0.2, 0.8) + frac_b = 1.0 - frac_a + else: + frac_a = frac_b = np.full(len(ia), 0.5) + + px_mask = (ov_x <= ov_y).astype(np.float64) + py_mask = 1.0 - px_mask + + xs = np.sign(d_x) + xs[xs == 0] = 1.0 + np.add.at(displacements[:, 0], ia, (ov_x + 0.02) * px_mask * frac_a * xs) + np.add.at(displacements[:, 0], ib, -(ov_x + 0.02) * px_mask * frac_b * xs) + + ys = np.sign(d_y) + ys[ys == 0] = 1.0 + np.add.at(displacements[:, 1], ia, (ov_y + 0.02) * py_mask * frac_a * ys) + np.add.at(displacements[:, 1], ib, -(ov_y + 0.02) * py_mask * frac_b * ys) + + return True + + +def _resolve_overlaps(cell_features, max_iters=300): + """Iterative overlap resolution via displacement forces. + + Repeatedly detects overlapping cell pairs and pushes them apart + using sweep-based pair detection for efficiency. + + Args: + cell_features: [N, 6] tensor with cell properties + max_iters: Maximum number of resolution iterations + + Returns: + Updated cell_features tensor with resolved positions + """ + N = cell_features.shape[0] + positions = cell_features[:, 2:4].detach().clone().numpy().astype(np.float64) + widths = cell_features[:, 4].detach().numpy().astype(np.float64) + heights = cell_features[:, 5].detach().numpy().astype(np.float64) + cell_areas = widths * heights + + is_macro = heights > 1.5 + macro_idx, std_idx = np.where(is_macro)[0], np.where(~is_macro)[0] + Nm, Ns = len(macro_idx), len(std_idx) + max_std_w = widths[std_idx].max() if Ns > 0 else 0.0 + + for iteration in range(max_iters): + any_overlap = False + displacements = np.zeros_like(positions) + + # Macro-macro overlaps + if Nm > 1: + for ii in range(Nm): + for jj in range(ii + 1, Nm): + i, j = macro_idx[ii], macro_idx[jj] + hit = _push_overlapping( + positions, widths, heights, + np.array([i]), np.array([j]), + displacements, areas=cell_areas, + ) + any_overlap = any_overlap or hit + + # Macro-standard cell overlaps + if Nm > 0 and Ns > 0: + for mi in macro_idx: + dx = np.abs(positions[std_idx, 0] - positions[mi, 0]) + dy = np.abs(positions[std_idx, 1] - positions[mi, 1]) + possible = ( + (dx < (widths[mi] + widths[std_idx]) / 2) + & (dy < (heights[mi] + heights[std_idx]) / 2) + ) + if possible.any(): + nearby = std_idx[possible] + hit = _push_overlapping( + positions, widths, heights, + np.full(len(nearby), mi, dtype=np.intp), nearby, + displacements, areas=cell_areas, + ) + any_overlap = any_overlap or hit + + # Standard cell-standard cell overlaps (sweep-based) + if Ns > 1: + dim = iteration % 2 + order = np.argsort(positions[std_idx, dim]) + sg = std_idx[order] + sp, sw, sh = positions[sg], widths[sg], heights[sg] + + for k in range(1, min(200, Ns)): + n = Ns - k + gap = sp[k:, dim] - sp[:n, dim] + if gap.min() > max_std_w: + break + adx = np.abs(sp[k:, 0] - sp[:n, 0]) + ady = np.abs(sp[k:, 1] - sp[:n, 1]) + ov_x = (sw[:n] + sw[k:]) / 2 - adx + ov_y = (sh[:n] + sh[k:]) / 2 - ady + hit = (ov_x > 0) & (ov_y > 0) + + if not hit.any(): + continue + + any_overlap = True + ox, oy = ov_x[hit], ov_y[hit] + ia, ib = sg[:n][hit], sg[k:][hit] + px_mask = (ox <= oy).astype(np.float64) + py_mask = 1.0 - px_mask + + d_x = sp[:n, 0][hit] - sp[k:, 0][hit] + xs = np.sign(d_x) + xs[xs == 0] = 1.0 + np.add.at( + displacements[:, 0], ia, + (ox + 0.02) * px_mask * 0.5 * xs, + ) + np.add.at( + displacements[:, 0], ib, + -(ox + 0.02) * px_mask * 0.5 * xs, + ) + + d_y = sp[:n, 1][hit] - sp[k:, 1][hit] + ys = np.sign(d_y) + ys[ys == 0] = 1.0 + np.add.at( + displacements[:, 1], ia, + (oy + 0.02) * py_mask * 0.5 * ys, + ) + np.add.at( + displacements[:, 1], ib, + -(oy + 0.02) * py_mask * 0.5 * ys, + ) + + if not any_overlap: + break + positions += displacements + + result = cell_features.clone() + result[:, 2:4] = torch.from_numpy(positions).float() + return result + + +class _SpatialGrid: + """Grid-based spatial index for O(1) amortized overlap queries. + + Divides the placement area into grid cells and maintains + cell-to-bucket mappings for efficient neighbor lookups. + """ + __slots__ = ('grid', 'cell_keys', 'gs', 'macro_list') + + def __init__(self, pos, is_macro, grid_size): + self.gs = grid_size + self.grid = defaultdict(list) + self.cell_keys = {} + self.macro_list = list(np.where(is_macro)[0]) + for i in range(len(pos)): + key = (int(pos[i, 0] // grid_size), int(pos[i, 1] // grid_size)) + self.cell_keys[i] = key + self.grid[key].append(i) + + def update(self, i, old_x, old_y, new_x, new_y): + gs = self.gs + old_key = (int(old_x // gs), int(old_y // gs)) + new_key = (int(new_x // gs), int(new_y // gs)) + if old_key != new_key: + try: + self.grid[old_key].remove(i) + except ValueError: + pass + if not self.grid[old_key]: + del self.grid[old_key] + self.cell_keys[i] = new_key + self.grid[new_key].append(i) + + def check_overlap(self, i, nx, ny, pos, hw, hh, sr): + gs = self.gs + gx, gy = int(nx // gs), int(ny // gs) + hwi, hhi = hw[i], hh[i] + for mi in self.macro_list: + if ( + mi != i + and abs(nx - pos[mi, 0]) < hwi + hw[mi] + and abs(ny - pos[mi, 1]) < hhi + hh[mi] + ): + return True + for dx in range(-sr, sr + 1): + for dy in range(-sr, sr + 1): + bucket = self.grid.get((gx + dx, gy + dy)) + if bucket is None: + continue + for j in bucket: + if ( + j != i + and abs(nx - pos[j, 0]) < hwi + hw[j] + and abs(ny - pos[j, 1]) < hhi + hh[j] + ): + return True + return False + + def check_overlap_skip(self, i, nx, ny, pos, hw, hh, sr, skip): + gs = self.gs + gx, gy = int(nx // gs), int(ny // gs) + hwi, hhi = hw[i], hh[i] + for mi in self.macro_list: + if ( + mi != i + and mi != skip + and abs(nx - pos[mi, 0]) < hwi + hw[mi] + and abs(ny - pos[mi, 1]) < hhi + hh[mi] + ): + return True + for dx in range(-sr, sr + 1): + for dy in range(-sr, sr + 1): + bucket = self.grid.get((gx + dx, gy + dy)) + if bucket is None: + continue + for j in bucket: + if ( + j != i + and j != skip + and abs(nx - pos[j, 0]) < hwi + hw[j] + and abs(ny - pos[j, 1]) < hhi + hh[j] + ): + return True + return False + + +def _swap_refine(cell_features, pin_features, edge_list, max_passes=3): + """Detailed placement: pairwise cell swapping (FastDP-style). + + Tries swapping pairs of cells and keeps swaps that reduce wirelength + without introducing overlaps. + + Args: + cell_features: [N, 6] tensor with cell properties + pin_features: [P, 7] tensor with pin properties + edge_list: [E, 2] tensor with edge connectivity + max_passes: Maximum number of swap passes + + Returns: + Updated cell_features tensor with refined positions + """ + N = cell_features.shape[0] + if N > 2500 or edge_list.shape[0] == 0: + return cell_features + + pos = cell_features[:, 2:4].detach().clone().numpy().astype(np.float64) + w = cell_features[:, 4].detach().numpy().astype(np.float64) + h = cell_features[:, 5].detach().numpy().astype(np.float64) + hw, hh = w / 2.0, h / 2.0 + + cidx = pin_features[:, 0].long().numpy() + prx = pin_features[:, 1].detach().numpy().astype(np.float64) + pry = pin_features[:, 2].detach().numpy().astype(np.float64) + src = edge_list[:, 0].long().numpy() + tgt = edge_list[:, 1].long().numpy() + E = len(src) + + px, py = pos[cidx, 0] + prx, pos[cidx, 1] + pry + src_cells, tgt_cells = cidx[src], cidx[tgt] + + # Build per-cell pin and edge indices + pin_order = np.argsort(cidx) + sorted_cidx = cidx[pin_order] + pin_starts = np.searchsorted(sorted_cidx, np.arange(N), side='left') + pin_ends = np.searchsorted(sorted_cidx, np.arange(N), side='right') + + ec_all = np.concatenate([src_cells, tgt_cells]) + ei_all = np.concatenate([np.arange(E), np.arange(E)]) + eord = np.argsort(ec_all) + sec, sei = ec_all[eord], ei_all[eord] + es = np.searchsorted(sec, np.arange(N), side='left') + ee = np.searchsorted(sec, np.arange(N), side='right') + cedges = [ + np.unique(sei[es[c]:ee[c]]).astype(np.intp) for c in range(N) + ] + + # Build swap candidate list + valid_edges = src_cells != tgt_cells + sc_v, tc_v = src_cells[valid_edges], tgt_cells[valid_edges] + lo_e, hi_e = np.minimum(sc_v, tc_v), np.maximum(sc_v, tc_v) + ukeys = np.unique(lo_e.astype(np.int64) * N + hi_e.astype(np.int64)) + swap_list = list(zip( + (ukeys // N).astype(np.intp).tolist(), + (ukeys % N).astype(np.intp).tolist(), + )) + + if N <= 500: + existing = set(swap_list) + for i in range(N): + for j in range(i + 1, N): + existing.add((i, j)) + swap_list = sorted(existing) + + is_macro = h > 1.5 + use_grid = N > 300 + + if use_grid: + max_std_hw = hw[~is_macro].max() if (~is_macro).any() else 1.0 + grid_size = max(max_std_hw * 4, 2.0) + sgrid = _SpatialGrid(pos, is_macro, grid_size) + sr_std = 2 + sr_macro = ( + int(np.ceil((hw[is_macro].max() + max_std_hw) / grid_size)) + 1 + if is_macro.any() + else 2 + ) + + def no_ov(ci, nx, ny, skip): + sr = sr_macro if is_macro[ci] else sr_std + return not sgrid.check_overlap_skip( + ci, nx, ny, pos, hw, hh, sr, skip + ) + else: + sgrid = None + + def no_ov(ci, nx, ny, skip): + ox = np.abs(nx - pos[:, 0]) + oy = np.abs(ny - pos[:, 1]) + ox[ci] = 1e18 + ox[skip] = 1e18 + return not np.any((ox < hw[ci] + hw) & (oy < hh[ci] + hh)) + + for _ in range(max_passes): + improved = False + + for i, j in swap_list: + if not no_ov(i, pos[j, 0], pos[j, 1], j): + continue + if not no_ov(j, pos[i, 0], pos[i, 1], i): + continue + + ae = np.union1d(cedges[i], cedges[j]) + ad = np.abs(px[src[ae]] - px[tgt[ae]]) + bd = np.abs(py[src[ae]] - py[tgt[ae]]) + mx = np.maximum(ad, bd) + old_wl = ( + 0.1 * np.log(np.exp((ad - mx) * 10) + np.exp((bd - mx) * 10)) + + mx + ).sum() + + oix, oiy = pos[i, 0], pos[i, 1] + ojx, ojy = pos[j, 0], pos[j, 1] + pos[i, 0], pos[j, 0] = ojx, oix + pos[i, 1], pos[j, 1] = ojy, oiy + + pi = pin_order[pin_starts[i]:pin_ends[i]] + pj = pin_order[pin_starts[j]:pin_ends[j]] + if len(pi): + px[pi] = pos[i, 0] + prx[pi] + py[pi] = pos[i, 1] + pry[pi] + if len(pj): + px[pj] = pos[j, 0] + prx[pj] + py[pj] = pos[j, 1] + pry[pj] + + ad = np.abs(px[src[ae]] - px[tgt[ae]]) + bd = np.abs(py[src[ae]] - py[tgt[ae]]) + mx = np.maximum(ad, bd) + new_wl = ( + 0.1 * np.log(np.exp((ad - mx) * 10) + np.exp((bd - mx) * 10)) + + mx + ).sum() + + if new_wl < old_wl - 1e-6: + improved = True + if sgrid: + sgrid.update(i, oix, oiy, pos[i, 0], pos[i, 1]) + sgrid.update(j, ojx, ojy, pos[j, 0], pos[j, 1]) + else: + pos[i, 0], pos[j, 0] = oix, ojx + pos[i, 1], pos[j, 1] = oiy, ojy + if len(pi): + px[pi] = oix + prx[pi] + py[pi] = oiy + pry[pi] + if len(pj): + px[pj] = ojx + prx[pj] + py[pj] = ojy + pry[pj] + + if not improved: + break + + result = cell_features.clone() + result[:, 2:4] = torch.from_numpy(pos).float() + return result + + +def _slide_refine(cell_features, pin_features, edge_list, max_passes=15): + """Detailed placement: single-cell sliding (NTUPlace3-style). + + Moves individual cells toward their optimal positions (based on + connected pin locations) without introducing overlaps. + + Args: + cell_features: [N, 6] tensor with cell properties + pin_features: [P, 7] tensor with pin properties + edge_list: [E, 2] tensor with edge connectivity + max_passes: Maximum number of sliding passes + + Returns: + Updated cell_features tensor with refined positions + """ + N = cell_features.shape[0] + if edge_list.shape[0] == 0: + return cell_features + + pos = cell_features[:, 2:4].detach().clone().numpy().astype(np.float64) + w = cell_features[:, 4].detach().numpy().astype(np.float64) + h = cell_features[:, 5].detach().numpy().astype(np.float64) + hw, hh = w / 2.0, h / 2.0 + + cidx = pin_features[:, 0].long().numpy() + prx = pin_features[:, 1].detach().numpy().astype(np.float64) + pry = pin_features[:, 2].detach().numpy().astype(np.float64) + src = edge_list[:, 0].long().numpy() + tgt = edge_list[:, 1].long().numpy() + E = len(src) + + px, py = pos[cidx, 0] + prx, pos[cidx, 1] + pry + + # Build per-cell pin and edge indices + pin_order = np.argsort(cidx) + sorted_cidx = cidx[pin_order] + pin_starts = np.searchsorted(sorted_cidx, np.arange(N), side='left') + pin_ends = np.searchsorted(sorted_cidx, np.arange(N), side='right') + + src_cells, tgt_cells = cidx[src], cidx[tgt] + ec_all = np.concatenate([src_cells, tgt_cells]) + ei_all = np.concatenate([np.arange(E), np.arange(E)]) + eord = np.argsort(ec_all) + sec, sei = ec_all[eord], ei_all[eord] + es = np.searchsorted(sec, np.arange(N), side='left') + ee = np.searchsorted(sec, np.arange(N), side='right') + cedge_arr = [ + np.unique(sei[es[c]:ee[c]]).astype(np.intp) for c in range(N) + ] - return total_wirelength / edge_list.shape[0] # Normalize by number of edges + # Build inter-cell partner lists + inter_partners = [None] * N + for i in range(N): + ei = cedge_arr[i] + if len(ei) == 0: + continue + si, ti = src[ei], tgt[ei] + mask = cidx[si] != cidx[ti] + if not mask.any(): + continue + ie = ei[mask] + s, t = src[ie], tgt[ie] + inter_partners[i] = np.where(cidx[s] == i, t, s) + + _a, _ia = 0.1, 10.0 + + def cell_wl(i): + edges = cedge_arr[i] + if len(edges) == 0: + return 0.0 + s, t = src[edges], tgt[edges] + adx, ady = np.abs(px[s] - px[t]), np.abs(py[s] - py[t]) + mx = np.maximum(adx, ady) + return ( + _a * np.log(np.exp((adx - mx) * _ia) + np.exp((ady - mx) * _ia)) + + mx + ).sum() + + is_macro = h > 1.5 + use_grid = N > 300 + + if use_grid: + max_std_hw = hw[~is_macro].max() if (~is_macro).any() else 1.0 + gs = max(max_std_hw * 4, 2.0) + sgrid = _SpatialGrid(pos, is_macro, gs) + sr_std = 2 + sr_macro = ( + int(np.ceil((hw[is_macro].max() + max_std_hw) / gs)) + 1 + if is_macro.any() + else 2 + ) + + def no_overlap(i, nx, ny): + sr = sr_macro if is_macro[i] else sr_std + return not sgrid.check_overlap(i, nx, ny, pos, hw, hh, sr) + else: + sgrid = None + + def no_overlap(i, nx, ny): + ox = np.abs(nx - pos[:, 0]) + oy = np.abs(ny - pos[:, 1]) + ox[i] = 1e18 + return not np.any((ox < hw[i] + hw) & (oy < hh[i] + hh)) + + def apply_move(i, nx, ny): + ox, oy = pos[i, 0], pos[i, 1] + pos[i, 0] = nx + pos[i, 1] = ny + pi = pin_order[pin_starts[i]:pin_ends[i]] + if len(pi): + px[pi] = nx + prx[pi] + py[pi] = ny + pry[pi] + if sgrid: + sgrid.update(i, ox, oy, nx, ny) + + def undo_move(i, ox, oy, cx, cy): + pos[i, 0] = ox + pos[i, 1] = oy + pi = pin_order[pin_starts[i]:pin_ends[i]] + if len(pi): + px[pi] = ox + prx[pi] + py[pi] = oy + pry[pi] + if sgrid: + sgrid.update(i, cx, cy, ox, oy) + + def try_move(i, nx, ny, ow): + if not no_overlap(i, nx, ny): + return 0.0 + ox, oy = pos[i, 0], pos[i, 1] + apply_move(i, nx, ny) + nw = cell_wl(i) + if nw < ow - 1e-8: + return ow - nw + undo_move(i, ox, oy, nx, ny) + return 0.0 + + def try_bisect(i, ddx, ddy, ow): + fx, fy = pos[i, 0] + ddx, pos[i, 1] + ddy + if no_overlap(i, fx, fy): + ox, oy = pos[i, 0], pos[i, 1] + apply_move(i, fx, fy) + nw = cell_wl(i) + if nw < ow - 1e-8: + return ow - nw + undo_move(i, ox, oy, fx, fy) + return 0.0 + lo, hi_b = 0.0, 1.0 + for _ in range(5): + mid = (lo + hi_b) / 2.0 + if no_overlap(i, pos[i, 0] + mid * ddx, pos[i, 1] + mid * ddy): + lo = mid + else: + hi_b = mid + if lo < 0.02: + return 0.0 + bx, by = pos[i, 0] + lo * ddx, pos[i, 1] + lo * ddy + ox, oy = pos[i, 0], pos[i, 1] + apply_move(i, bx, by) + nw = cell_wl(i) + if nw < ow - 1e-8: + return ow - nw + undo_move(i, ox, oy, bx, by) + return 0.0 + + def compute_grad(i): + p = inter_partners[i] + if p is None: + return 0.0, 0.0 + ddx, ddy = pos[i, 0] - px[p], pos[i, 1] - py[p] + adx, ady = np.abs(ddx), np.abs(ddy) + mx = np.maximum(adx, ady) + ex = np.exp((adx - mx) * _ia) + ey = np.exp((ady - mx) * _ia) + d = ex + ey + sx = np.sign(ddx) + sx[sx == 0] = 1.0 + sy = np.sign(ddy) + sy[sy == 0] = 1.0 + return float(np.sum(ex / d * sx)), float(np.sum(ey / d * sy)) + + _fracs = (1.0, 0.5, 0.25, 0.125) + + for pass_idx in range(max_passes): + total_imp = 0.0 + order = ( + np.arange(N) if pass_idx % 2 == 0 + else np.arange(N - 1, -1, -1) + ) + + for i in order: + p = inter_partners[i] + if p is None: + continue + ow = cell_wl(i) + if ow < 1e-10: + continue + + mx, my = np.mean(px[p]), np.mean(py[p]) + dm, dn = mx - pos[i, 0], my - pos[i, 1] + gx, gy = compute_grad(i) + gn = max(np.sqrt(gx * gx + gy * gy), 1e-12) + ss = max(abs(dm), abs(dn), 1.0) + + # Try gradient-based move + moved = False + for f in _fracs: + imp = try_move( + i, + pos[i, 0] - f * ss * gx / gn, + pos[i, 1] - f * ss * gy / gn, + ow, + ) + if imp > 0: + total_imp += imp + moved = True + break + + # Try mean-based move + if not moved: + for f in _fracs: + imp = try_move( + i, pos[i, 0] + f * dm, pos[i, 1] + f * dn, ow + ) + if imp > 0: + total_imp += imp + moved = True + break + + # Try bisection move + if not moved and max(abs(dm), abs(dn)) > 1e-6: + imp = try_bisect(i, dm, dn, ow) + if imp > 0: + total_imp += imp + moved = True + + # Try axis-aligned moves + if not moved: + cw = ow + for f in _fracs[:3]: + imp = try_move(i, pos[i, 0] + f * dm, pos[i, 1], cw) + if imp > 0: + total_imp += imp + cw -= imp + break + for f in _fracs[:3]: + imp = try_move(i, pos[i, 0], pos[i, 1] + f * dn, cw) + if imp > 0: + total_imp += imp + break + + # Try axis-aligned bisection + if not moved: + cw = cell_wl(i) + if abs(dm) > 1e-6: + imp = try_bisect(i, dm, 0.0, cw) + if imp > 0: + total_imp += imp + cw -= imp + if abs(dn) > 1e-6: + imp = try_bisect(i, 0.0, dn, cw) + if imp > 0: + total_imp += imp + + if total_imp < 1e-6: + break + + result = cell_features.clone() + result[:, 2:4] = torch.from_numpy(pos).float() + return result def overlap_repulsion_loss(cell_features, pin_features, edge_list): @@ -364,15 +1169,22 @@ def train_placement( cell_features, pin_features, edge_list, - num_epochs=1000, - lr=0.01, + num_epochs=2000, + lr=0.1, lambda_wirelength=1.0, - lambda_overlap=10.0, + lambda_overlap=200.0, verbose=True, log_interval=100, ): """Train the placement optimization using gradient descent. + Uses a multi-stage pipeline: + 1. Analytical placement (spectral) + 2. Row-based legalization + 3. Gradient optimization with overlap penalty scheduling + 4. Iterative overlap resolution + 5. Detailed placement (slide + swap + slide) + Args: cell_features: [N, 6] tensor with cell properties pin_features: [P, 7] tensor with pin properties @@ -393,13 +1205,7 @@ def train_placement( # Clone features and create learnable positions cell_features = cell_features.clone() initial_cell_features = cell_features.clone() - - # Make only cell positions require gradients - cell_positions = cell_features[:, 2:4].clone().detach() - cell_positions.requires_grad_(True) - - # Create optimizer - optimizer = optim.Adam([cell_positions], lr=lr) + N = cell_features.shape[0] # Track loss history loss_history = { @@ -408,52 +1214,267 @@ def train_placement( "overlap_loss": [], } - # Training loop + widths, heights = cell_features[:, 4], cell_features[:, 5] + + # Fast path for very large designs + if N > 100000: + apos = _analytical_place( + cell_features, pin_features, edge_list, iters=200 + ) + cpos = _legalize_from_analytical( + cell_features, apos, pin_features, edge_list + ) + result = cell_features.clone() + result[:, 2:4] = cpos + return { + "final_cell_features": result, + "initial_cell_features": initial_cell_features, + "loss_history": loss_history, + } + + # Stage 1-2: Analytical placement + legalization + analytical_pos = _analytical_place( + cell_features, pin_features, edge_list, iters=200 + ) + cell_positions = _legalize_from_analytical( + cell_features, analytical_pos, pin_features, edge_list + ) + cell_positions = cell_positions.clone().detach().requires_grad_(True) + + # All hyperparameters smooth functions of log2(N) + log_n = max(np.log2(max(N, 4)), 2.0) + sqrt_n = max(np.sqrt(N), 1.0) + num_epochs = int(np.clip(25000 / log_n ** 1.5, 500, 2500)) + lr = float(np.clip(1.2 / log_n, 0.06, 0.25)) + lambda_overlap = float(np.clip(40 * log_n, 150, 400)) + _ol_start = float(np.clip(0.0002 * log_n, 0.0005, 0.01)) + _ol_end_mult = float(np.clip(0.5 * log_n, 1.5, 8.0)) + _ol_ratio = (_ol_end_mult * lambda_overlap) / _ol_start + + # Create optimizer with warmup + cosine schedule + warmup = max(10, num_epochs // 12) + optimizer = optim.Adam([cell_positions], lr=lr, betas=(0.9, 0.999)) + + def lr_sched(epoch): + if epoch < warmup: + return 0.1 + 0.9 * (epoch / warmup) + p = (epoch - warmup) / max(num_epochs - warmup - 1, 1) + return 0.01 + 0.99 * 0.5 * (1.0 + np.cos(np.pi * p)) + + scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_sched) + + # Precompute indices for loss computation + _ci = pin_features[:, 0].long() + _prx, _pry = pin_features[:, 1], pin_features[:, 2] + _ne = edge_list.shape[0] + _he = _ne > 0 + if _he: + _src, _tgt = edge_list[:, 0].long(), edge_list[:, 1].long() + _alpha, _inv = 0.1, 10.0 + + # Precompute overlap structures based on design size + _pw = N <= 500 + if _pw: + _hw = (widths.unsqueeze(1) + widths.unsqueeze(0)) / 2 + _hh = (heights.unsqueeze(1) + heights.unsqueeze(0)) / 2 + _tr, _tc = torch.triu_indices(N, N, offset=1) + else: + _im = heights > 1.5 + _mi = torch.where(_im)[0] + _si = torch.where(~_im)[0] + _Nm, _Ns = _mi.shape[0], _si.shape[0] + + _hmm = _Nm > 1 + if _hmm: + mw, mh = widths[_mi], heights[_mi] + _mmhw = (mw.unsqueeze(1) + mw.unsqueeze(0)) / 2 + _mmhh = (mh.unsqueeze(1) + mh.unsqueeze(0)) / 2 + _mmr, _mmc = torch.triu_indices(_Nm, _Nm, offset=1) + + _hms = _Nm > 0 and _Ns > 0 + if _hms: + _mshw = ( + (widths[_mi].unsqueeze(1) + widths[_si].unsqueeze(0)) / 2 + ) + _mshh = ( + (heights[_mi].unsqueeze(1) + heights[_si].unsqueeze(0)) / 2 + ) + + _hss = _Ns > 1 + if _hss: + _sw, _sh = widths[_si], heights[_si] + _msw = _sw.max().item() + _K = min(max(30, int(np.sqrt(_Ns) * 1.2)), _Ns - 1) + + inv_N = 1.0 / N + _bw, _bp, _zs = float('inf'), None, 0 + + # Stage 3: Gradient optimization with overlap penalty scheduling for epoch in range(num_epochs): optimizer.zero_grad() - # Create cell_features with current positions - cell_features_current = cell_features.clone() - cell_features_current[:, 2:4] = cell_positions + # Wirelength loss + if _he: + pax = cell_positions[_ci, 0] + _prx + pay = cell_positions[_ci, 1] + _pry + dx = torch.abs(pax[_src] - pax[_tgt]) + dy = torch.abs(pay[_src] - pay[_tgt]) + wl = ( + _alpha * torch.logaddexp(dx * _inv, dy * _inv).sum() + ) / _ne + else: + wl = torch.tensor(0.0, requires_grad=True) - # Calculate losses - wl_loss = wirelength_attraction_loss( - cell_features_current, pin_features, edge_list - ) - overlap_loss = overlap_repulsion_loss( - cell_features_current, pin_features, edge_list - ) + # Overlap loss + if _pw: + pdx = torch.abs( + cell_positions[:, 0].unsqueeze(1) + - cell_positions[:, 0].unsqueeze(0) + ) + pdy = torch.abs( + cell_positions[:, 1].unsqueeze(1) + - cell_positions[:, 1].unsqueeze(0) + ) + ov = ( + torch.relu(_hw - pdx) * torch.relu(_hh - pdy) + )[_tr, _tc] + ol = (ov.sum() + ov.pow(3).sum()) * inv_N + else: + ol = torch.tensor(0.0) - # Combined loss - total_loss = lambda_wirelength * wl_loss + lambda_overlap * overlap_loss + # Macro-macro overlaps + if _hmm: + mp = cell_positions[_mi] + mdx = torch.abs( + mp[:, 0].unsqueeze(1) - mp[:, 0].unsqueeze(0) + ) + mdy = torch.abs( + mp[:, 1].unsqueeze(1) - mp[:, 1].unsqueeze(0) + ) + ov = ( + torch.relu(_mmhw - mdx) * torch.relu(_mmhh - mdy) + )[_mmr, _mmc] + ol = ol + ov.sum() + ov.pow(3).sum() + + # Macro-standard cell overlaps + if _hms: + mp, sp = cell_positions[_mi], cell_positions[_si] + oms = ( + torch.relu( + _mshw + - torch.abs( + mp[:, 0].unsqueeze(1) - sp[:, 0].unsqueeze(0) + ) + ) + * torch.relu( + _mshh + - torch.abs( + mp[:, 1].unsqueeze(1) - sp[:, 1].unsqueeze(0) + ) + ) + ) + ol = ol + oms.sum() + oms.pow(3).sum() + + # Standard cell-standard cell overlaps (sweep-based) + if _hss: + sp = cell_positions[_si] + with torch.no_grad(): + order = torch.argsort(sp[:, 0]) + sps, sws, shs = sp[order], _sw[order], _sh[order] + ss_s = ss_c = torch.tensor(0.0) + for k in range(1, _K + 1): + n = _Ns - k + dxk = sps[k:, 0] - sps[:n, 0] + if dxk.detach().min().item() > _msw: + break + dyk = torch.abs(sps[k:, 1] - sps[:n, 1]) + ov = ( + torch.relu((sws[:n] + sws[k:]) * 0.5 - dxk) + * torch.relu((shs[:n] + shs[k:]) * 0.5 - dyk) + ) + ss_s = ss_s + ov.sum() + ss_c = ss_c + ov.pow(3).sum() + ol = ol + ss_s + ss_c + + ol = ol * inv_N + + # Combined loss with scheduled weights + prog = epoch / max(num_epochs - 1, 1) + c_ol = _ol_start * (_ol_ratio ** prog) + c_wl = lambda_wirelength * (1.0 - 0.3 * prog) + loss = c_wl * wl + c_ol * ol # Backward pass - total_loss.backward() + loss.backward() # Gradient clipping to prevent extreme updates - torch.nn.utils.clip_grad_norm_([cell_positions], max_norm=5.0) + torch.nn.utils.clip_grad_norm_([cell_positions], max_norm=10.0) # Update positions optimizer.step() + scheduler.step() # Record losses - loss_history["total_loss"].append(total_loss.item()) - loss_history["wirelength_loss"].append(wl_loss.item()) - loss_history["overlap_loss"].append(overlap_loss.item()) + loss_history["total_loss"].append(loss.item()) + loss_history["wirelength_loss"].append(wl.item()) + loss_history["overlap_loss"].append(ol.item()) + + # Track best zero-overlap solution + ov_v, wl_v = ol.item(), wl.item() + if ov_v < 1e-4 and wl_v < _bw: + _bw = wl_v + _bp = cell_positions.detach().clone() + if ov_v < 1e-6: + _zs += 1 + else: + _zs = 0 + + # Early stopping if converged + if _zs >= 100 and epoch > num_epochs * 2 // 3: + wh = loss_history["wirelength_loss"] + if ( + abs(np.mean(wh[-100:-50]) - np.mean(wh[-50:])) + / max(abs(np.mean(wh[-100:-50])), 1e-10) + < 0.001 + ): + break # Log progress if verbose and (epoch % log_interval == 0 or epoch == num_epochs - 1): - print(f"Epoch {epoch}/{num_epochs}:") - print(f" Total Loss: {total_loss.item():.6f}") - print(f" Wirelength Loss: {wl_loss.item():.6f}") - print(f" Overlap Loss: {overlap_loss.item():.6f}") + print( + f"Epoch {epoch}/{num_epochs}:" + f" Total={loss.item():.6f}" + f" WL={wl.item():.6f}" + f" OL={ol.item():.6f}" + ) # Create final cell features - final_cell_features = cell_features.clone() - final_cell_features[:, 2:4] = cell_positions.detach() + final = cell_features.clone() + final[:, 2:4] = ( + _bp if _bp is not None and _bw < wl.item() + else cell_positions.detach() + ) + + # Stage 4: Overlap resolution + final = _resolve_overlaps( + final, max_iters=int(np.clip(800 + N * 0.3, 800, 2000)) + ) + + # Stage 5: Detailed placement - slide/swap/slide with smooth pass counts + slide_passes = max(6, min(25, int(500 / sqrt_n))) + swap_passes = max(1, min(3, int(50 / sqrt_n))) + final = _slide_refine( + final, pin_features, edge_list, max_passes=slide_passes + ) + final = _swap_refine( + final, pin_features, edge_list, max_passes=swap_passes + ) + final = _slide_refine( + final, pin_features, edge_list, max_passes=max(3, slide_passes // 3) + ) return { - "final_cell_features": final_cell_features, + "final_cell_features": final, "initial_cell_features": initial_cell_features, "loss_history": loss_history, } @@ -461,6 +1482,52 @@ def train_placement( # ======= FINAL EVALUATION CODE (Don't edit this part) ======= +def _sweep_overlap_pairs(positions, widths, heights): + """Sweep-line algorithm for finding all overlapping cell pairs. + + Sorts cells by x-coordinate and checks nearby pairs for overlap, + providing O(N log N + K) performance where K is the number of overlaps. + + Args: + positions: [N, 2] array of cell positions + widths: [N] array of cell widths + heights: [N] array of cell heights + + Returns: + Tuple of (pairs, areas): + - pairs: list of (i, j) tuples of overlapping cell indices + - areas: list of overlap areas for each pair + """ + N = len(positions) + si = np.argsort(positions[:, 0]) + sp, sw, sh = positions[si], widths[si], heights[si] + md = max(widths.max(), heights.max()) + pairs, areas = [], [] + + for k in range(1, N): + n = N - k + dx = sp[k:, 0] - sp[:n, 0] + if dx.min() > md: + break + ox = (sw[:n] + sw[k:]) / 2 - dx + xc = ox > 0 + if not xc.any(): + continue + ady = np.abs(sp[:n, 1] - sp[k:, 1]) + oy = (sh[:n] + sh[k:]) / 2 - ady + hit = xc & (oy > 0) + if not hit.any(): + continue + for idx in np.where(hit)[0]: + i, j = int(si[idx]), int(si[idx + k]) + if i > j: + i, j = j, i + pairs.append((i, j)) + areas.append(float(ox[idx] * oy[idx])) + + return pairs, areas + + def calculate_overlap_metrics(cell_features): """Calculate ground truth overlap statistics (non-differentiable). @@ -486,49 +1553,20 @@ def calculate_overlap_metrics(cell_features): "overlap_percentage": 0.0, } - # Extract cell properties - positions = cell_features[:, 2:4].detach().numpy() # [N, 2] - widths = cell_features[:, 4].detach().numpy() # [N] - heights = cell_features[:, 5].detach().numpy() # [N] - areas = cell_features[:, 0].detach().numpy() # [N] - - overlap_count = 0 - total_overlap_area = 0.0 - max_overlap_area = 0.0 - overlap_areas = [] + pos = cell_features[:, 2:4].detach().numpy() + w = cell_features[:, 4].detach().numpy() + h = cell_features[:, 5].detach().numpy() + a = cell_features[:, 0].detach().numpy() - # Check all pairs - for i in range(N): - for j in range(i + 1, N): - # Calculate center-to-center distances - dx = abs(positions[i, 0] - positions[j, 0]) - dy = abs(positions[i, 1] - positions[j, 1]) - - # Minimum separation for non-overlap - min_sep_x = (widths[i] + widths[j]) / 2 - min_sep_y = (heights[i] + heights[j]) / 2 - - # Calculate overlap amounts - overlap_x = max(0, min_sep_x - dx) - overlap_y = max(0, min_sep_y - dy) - - # Overlap occurs only if both x and y overlap - if overlap_x > 0 and overlap_y > 0: - overlap_area = overlap_x * overlap_y - overlap_count += 1 - total_overlap_area += overlap_area - max_overlap_area = max(max_overlap_area, overlap_area) - overlap_areas.append(overlap_area) - - # Calculate percentage of total area - total_area = sum(areas) - overlap_percentage = (overlap_count / N * 100) if total_area > 0 else 0.0 + pairs, ov = _sweep_overlap_pairs(pos, w, h) return { - "overlap_count": overlap_count, - "total_overlap_area": total_overlap_area, - "max_overlap_area": max_overlap_area, - "overlap_percentage": overlap_percentage, + "overlap_count": len(pairs), + "total_overlap_area": sum(ov) if ov else 0.0, + "max_overlap_area": max(ov) if ov else 0.0, + "overlap_percentage": ( + (len(pairs) / N * 100) if sum(a) > 0 else 0.0 + ), } @@ -547,34 +1585,18 @@ def calculate_cells_with_overlaps(cell_features): if N <= 1: return set() - # Extract cell properties - positions = cell_features[:, 2:4].detach().numpy() - widths = cell_features[:, 4].detach().numpy() - heights = cell_features[:, 5].detach().numpy() - - cells_with_overlaps = set() - - # Check all pairs - for i in range(N): - for j in range(i + 1, N): - # Calculate center-to-center distances - dx = abs(positions[i, 0] - positions[j, 0]) - dy = abs(positions[i, 1] - positions[j, 1]) - - # Minimum separation for non-overlap - min_sep_x = (widths[i] + widths[j]) / 2 - min_sep_y = (heights[i] + heights[j]) / 2 - - # Calculate overlap amounts - overlap_x = max(0, min_sep_x - dx) - overlap_y = max(0, min_sep_y - dy) + pairs, _ = _sweep_overlap_pairs( + cell_features[:, 2:4].detach().numpy(), + cell_features[:, 4].detach().numpy(), + cell_features[:, 5].detach().numpy(), + ) - # Overlap occurs only if both x and y overlap - if overlap_x > 0 and overlap_y > 0: - cells_with_overlaps.add(i) - cells_with_overlaps.add(j) + cells = set() + for i, j in pairs: + cells.add(i) + cells.add(j) - return cells_with_overlaps + return cells def calculate_normalized_metrics(cell_features, pin_features, edge_list): @@ -598,34 +1620,34 @@ def calculate_normalized_metrics(cell_features, pin_features, edge_list): N = cell_features.shape[0] # Calculate overlap metric: num cells with overlaps / total cells - cells_with_overlaps = calculate_cells_with_overlaps(cell_features) - num_cells_with_overlaps = len(cells_with_overlaps) - overlap_ratio = num_cells_with_overlaps / N if N > 0 else 0.0 + num_ov = len(calculate_cells_with_overlaps(cell_features)) - # Calculate wirelength metric: (wirelength / num nets) / sqrt(total area) if edge_list.shape[0] == 0: - normalized_wl = 0.0 - num_nets = 0 - else: - # Calculate total wirelength using the loss function (unnormalized) - wl_loss = wirelength_attraction_loss(cell_features, pin_features, edge_list) - total_wirelength = wl_loss.item() * edge_list.shape[0] # Undo normalization - - # Calculate total area - total_area = cell_features[:, 0].sum().item() - - num_nets = edge_list.shape[0] + return { + "overlap_ratio": num_ov / N if N > 0 else 0.0, + "normalized_wl": 0.0, + "num_cells_with_overlaps": num_ov, + "total_cells": N, + "num_nets": 0, + } - # Normalize: (wirelength / net) / sqrt(area) - # This gives a dimensionless quality metric independent of design size - normalized_wl = (total_wirelength / num_nets) / (total_area ** 0.5) if total_area > 0 else 0.0 + # Calculate wirelength metric: (wirelength / num nets) / sqrt(total area) + wl = wirelength_attraction_loss(cell_features, pin_features, edge_list) + ta = cell_features[:, 0].sum().item() + ne = edge_list.shape[0] + + # Normalize: (wirelength / net) / sqrt(area) + # This gives a dimensionless quality metric independent of design size + normalized_wl = ( + (wl.item() * ne / ne) / (ta ** 0.5) if ta > 0 else 0.0 + ) return { - "overlap_ratio": overlap_ratio, + "overlap_ratio": num_ov / N if N > 0 else 0.0, "normalized_wl": normalized_wl, - "num_cells_with_overlaps": num_cells_with_overlaps, + "num_cells_with_overlaps": num_ov, "total_cells": N, - "num_nets": num_nets, + "num_nets": ne, } @@ -782,8 +1804,11 @@ def main(): normalized_metrics = calculate_normalized_metrics( final_cell_features, pin_features, edge_list ) - print(f"Overlap Ratio: {normalized_metrics['overlap_ratio']:.4f} " - f"({normalized_metrics['num_cells_with_overlaps']}/{normalized_metrics['total_cells']} cells)") + print( + f"Overlap Ratio: {normalized_metrics['overlap_ratio']:.4f} " + f"({normalized_metrics['num_cells_with_overlaps']}" + f"/{normalized_metrics['total_cells']} cells)" + ) print(f"Normalized Wirelength: {normalized_metrics['normalized_wl']:.4f}") # Success check @@ -793,11 +1818,20 @@ def main(): if normalized_metrics["num_cells_with_overlaps"] == 0: print("✓ PASS: No overlapping cells!") print("✓ PASS: Overlap ratio is 0.0") - print("\nCongratulations! Your implementation successfully eliminated all overlaps.") - print(f"Your normalized wirelength: {normalized_metrics['normalized_wl']:.4f}") + print( + "\nCongratulations! Your implementation successfully" + " eliminated all overlaps." + ) + print( + f"Your normalized wirelength:" + f" {normalized_metrics['normalized_wl']:.4f}" + ) else: print("✗ FAIL: Overlaps still exist") - print(f" Need to eliminate overlaps in {normalized_metrics['num_cells_with_overlaps']} cells") + print( + f" Need to eliminate overlaps in" + f" {normalized_metrics['num_cells_with_overlaps']} cells" + ) print("\nSuggestions:") print(" 1. Check your overlap_repulsion_loss() implementation") print(" 2. Change lambdas (try increasing lambda_overlap)")