diff --git a/.gitignore b/.gitignore index fdd0c6d..5e78125 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,7 @@ *.gif *.bmp +placement_experiments.py +README_AND_NOTES.md + **/__pycache__/** \ No newline at end of file diff --git a/README.md b/README.md index cf27bfb..e761714 100644 --- a/README.md +++ b/README.md @@ -31,30 +31,31 @@ We will review submissions on a rolling basis. | Rank | Name | Overlap | Wirelength (um) | Runtime (s) | Notes | |------|-----------------|-------------|-----------------|-------------|----------------------| -| 1 | Brayden Rudisill | 0.0000 | 0.2611 | 50.51 | Timed on a mac air | -| 2 | manuhalapeth | 0.0000 | 0.2630 | 196.8 | | -| 3 | Neil Teje | 0.0000 | 0.2700 | 24.00s | | -| 4 | Leison Gao | 0.0000 | 0.2796 | 50.14s | | -| 5 | William Pan | 0.0000 | 0.2848 | 155.33s | | -| 6 | Ashmit Dutta | 0.0000 | 0.2870 | 995.58 | Spent my entire morning (12 am - 6 am) doing this :P | -| 7 | Pawan Paleja | 0.0000 | 0.3311 | 1.74s | Implemented hint for loss func, cosine annealing on learning rate with warmup, std annealing on lambda weight. Used optuna to tune hyperparam. Tested on gh codespaces 2-core. | - 8 | Shashank Shriram | 0.0000 | 0.3312 | 11.32 | 🏎️💥 | -| 9 | Gabriel Del Monte | 0.0000 | 0.3427 | 606.07 | | -| 10 | Aleksey Valouev| 0.0000 | 0.3577 | 118.98 | | -| 11 | Mohul Shukla | 0.0000 | 0.5048 | 54.60s | | -| 12 | Ryan Hulke | 0.0000 | 0.5226 | 166.24 | | -| 13 | Neel Shah | 0.0000 | 0.5445 | 45.40 | Zero overlaps on all tests, adaptive schedule + early stop | -| 14 | Nawel Asgar | 0.0000 | 0.5675 | 81.49 | Adaptive penalty scaling with cubic gradients and design-size optimization -| 15 | Shiva Baghel | 0.0000 | 0.5885 | 491.00 | Stable zero-overlap with balanced optimization | -| 16 | Vansh Jain | 0.0000 | 0.9352 | 86.36 | | -| 17 | Akash Pai | 0.0006 | 0.4933 | 326.25s | | -| 18 | Zade Mahayni | 0.00665 | 0.5157 | 127.4 | Will try again tomorrow | -| 19 | Nithin Yanna | 0.0148 | 0.5034 | 247.30s | aggressive overlap penalty with quadratic scaling | -| 20 | Sean Ko | 0.0271 | .5138 | 31.83s | lr increase, decrease epoch, increase lambda overlap and decreased lambda wire_length + log penalty loss | -| 21 | Keya Gohil | 0.0155 | 0.4678 | 1513.07 | Still working | -| 22 | Prithvi Seran | 0.0499 | 0.4890 | 398.58 | | -| 23 | partcl example | 0.8 | 0.4 | 5 | example | -| 24 | Add Yours! | | | | | +| 1 | Sidhartha Parhi | 0.0000 | 0.2549 | 458.28 | Multi-stage alternating optimization and initial centering regularization to find the Pareto Front. | +| 2 | Brayden Rudisill | 0.0000 | 0.2611 | 50.51 | Timed on a mac air | +| 3 | manuhalapeth | 0.0000 | 0.2630 | 196.8 | | +| 4 | Neil Teje | 0.0000 | 0.2700 | 24.00s | | +| 5 | Leison Gao | 0.0000 | 0.2796 | 50.14s | | +| 6 | William Pan | 0.0000 | 0.2848 | 155.33s | | +| 7 | Ashmit Dutta | 0.0000 | 0.2870 | 995.58 | Spent my entire morning (12 am - 6 am) doing this :P | +| 8 | Pawan Paleja | 0.0000 | 0.3311 | 1.74s | Implemented hint for loss func, cosine annealing on learning rate with warmup, std annealing on lambda weight. Used optuna to tune hyperparam. Tested on gh codespaces 2-core. | + 9 | Shashank Shriram | 0.0000 | 0.3312 | 11.32 | 🏎️💥 | +| 10 | Gabriel Del Monte | 0.0000 | 0.3427 | 606.07 | | +| 11 | Aleksey Valouev| 0.0000 | 0.3577 | 118.98 | | +| 12 | Mohul Shukla | 0.0000 | 0.5048 | 54.60s | | +| 13 | Ryan Hulke | 0.0000 | 0.5226 | 166.24 | | +| 14 | Neel Shah | 0.0000 | 0.5445 | 45.40 | Zero overlaps on all tests, adaptive schedule + early stop | +| 15 | Nawel Asgar | 0.0000 | 0.5675 | 81.49 | Adaptive penalty scaling with cubic gradients and design-size optimization +| 16 | Shiva Baghel | 0.0000 | 0.5885 | 491.00 | Stable zero-overlap with balanced optimization | +| 17 | Vansh Jain | 0.0000 | 0.9352 | 86.36 | | +| 18 | Akash Pai | 0.0006 | 0.4933 | 326.25s | | +| 19 | Zade Mahayni | 0.00665 | 0.5157 | 127.4 | Will try again tomorrow | +| 20 | Nithin Yanna | 0.0148 | 0.5034 | 247.30s | aggressive overlap penalty with quadratic scaling | +| 21 | Sean Ko | 0.0271 | .5138 | 31.83s | lr increase, decrease epoch, increase lambda overlap and decreased lambda wire_length + log penalty loss | +| 22 | Keya Gohil | 0.0155 | 0.4678 | 1513.07 | Still working | +| 23 | Prithvi Seran | 0.0499 | 0.4890 | 398.58 | | +| 24 | partcl example | 0.8 | 0.4 | 5 | example | +| 25 | Add Yours! | | | | | > **To add your results:** > Insert a new row in the table above with your name, overlap, wirelength, and any notes. Ensure you sort by overlap. diff --git a/placement.py b/placement.py index d70412d..b50e721 100644 --- a/placement.py +++ b/placement.py @@ -44,7 +44,6 @@ import torch import torch.optim as optim - # Feature index enums for cleaner code access class CellFeatureIdx(IntEnum): """Indices for cell feature tensor columns.""" @@ -83,6 +82,26 @@ class PinFeatureIdx(IntEnum): # Output directory OUTPUT_DIR = os.path.dirname(os.path.abspath(__file__)) +# Training Hyperparameters +LR = 0.01 +PARETO_NUM_EPOCHS = 5000 +TRAINING_STAGES = { + "0": 32000, + "1": 8000, + "2": 20000, + "3": 10000, + "4": 10000, + "5": 30000, + "6": 50000, + "7": 15000 +} +LAMBDA_WIRELENGTH = 100000000.0 +LAMBDA_OVERLAP = 65000000.0 +LAMBDA_PARETO = 1000.0 # 100.0 +LAMBDA_CENTERING = 1.0 +ALPHA_MANHATTAN = 0.1 # Smoothing parameter +COS_LR_MIN_FCTR = 0.01 + # ======= SETUP ======= def generate_placement_input(num_macros, num_std_cells): @@ -262,8 +281,8 @@ def wirelength_attraction_loss(cell_features, pin_features, edge_list): Returns: Scalar loss value """ - if edge_list.shape[0] == 0: - return torch.tensor(0.0, requires_grad=True) + # if edge_list.shape[0] == 0: + # return torch.tensor(0.0, requires_grad=True) # Update absolute pin positions based on cell positions cell_positions = cell_features[:, 2:4] # [N, 2] @@ -284,21 +303,36 @@ def wirelength_attraction_loss(cell_features, pin_features, edge_list): # Calculate smooth approximation of Manhattan distance # Using log-sum-exp approximation for differentiability - alpha = 0.1 # Smoothing parameter dx = torch.abs(src_x - tgt_x) dy = torch.abs(src_y - tgt_y) # Smooth L1 distance with numerical stability - smooth_manhattan = alpha * torch.logsumexp( - torch.stack([dx / alpha, dy / alpha], dim=0), dim=0 + smooth_manhattan = ALPHA_MANHATTAN * torch.logsumexp( + torch.stack([dx / ALPHA_MANHATTAN, dy / ALPHA_MANHATTAN], dim=0), dim=0 ) # Total wirelength total_wirelength = torch.sum(smooth_manhattan) - return total_wirelength / edge_list.shape[0] # Normalize by number of edges + # Normalize by number of edges + total_wirelength = total_wirelength / edge_list.shape[0] + + return total_wirelength + + +wirelength_attraction_loss_jit = torch.compile(wirelength_attraction_loss) + +@torch.compile +def centering_regularization(cell_features, pin_features, edge_list): + """Calculate regularization term to center the cells on the chip.""" + x = cell_features[:, CellFeatureIdx.X] + y = cell_features[:, CellFeatureIdx.Y] + centering_reg = LAMBDA_CENTERING * (torch.sum((x**2 + y**2)**0.5) / x.shape[0]) + return centering_reg + +@torch.compile def overlap_repulsion_loss(cell_features, pin_features, edge_list): """Calculate loss to prevent cell overlaps. @@ -343,31 +377,43 @@ def overlap_repulsion_loss(cell_features, pin_features, edge_list): Returns: Scalar loss value (should be 0 when no overlaps exist) """ - N = cell_features.shape[0] - if N <= 1: - return torch.tensor(0.0, requires_grad=True) + def calculate_overlap_1d(x_or_y, w_or_h): + overlap = (w_or_h.unsqueeze(0) + w_or_h.unsqueeze(1)) / 2. + overlap = overlap - torch.abs(x_or_y.unsqueeze(0) - x_or_y.unsqueeze(1)) + overlap = torch.relu(overlap) + overlap = torch.triu(overlap, diagonal=1) + return overlap + + # N = cell_features.shape[0] + # if N <= 1: + # return torch.tensor(0.0, requires_grad=True) + + # areas = cell_features[:, CellFeatureIdx.AREA] # = areas + # num_pins = cell_features[:, CellFeatureIdx.NUM_PINS] = num_pins_per_cell.float() + x = cell_features[:, CellFeatureIdx.X] + y = cell_features[:, CellFeatureIdx.Y] + w = cell_features[:, CellFeatureIdx.WIDTH] + h = cell_features[:, CellFeatureIdx.HEIGHT] + + overlap_x = calculate_overlap_1d(x, w) + overlap_y = calculate_overlap_1d(y, h) - # TODO: Implement overlap detection and loss calculation here - # - # Your implementation should: - # 1. Extract cell positions, widths, and heights - # 2. Compute pairwise overlaps using vectorized operations - # 3. Return a scalar loss that is zero when no overlaps exist - # - # Delete this placeholder and add your implementation: + loss = overlap_x * overlap_y + # loss = torch.mean(loss) + num_non_zero = (loss != 0).sum().clamp(min=1) + loss = loss.sum() / num_non_zero - # Placeholder - returns a constant loss (REPLACE THIS!) - return torch.tensor(1.0, requires_grad=True) + return loss def train_placement( cell_features, pin_features, edge_list, - num_epochs=1000, - lr=0.01, - lambda_wirelength=1.0, - lambda_overlap=10.0, + num_epochs=None, + lr=LR, + lambda_wirelength=LAMBDA_WIRELENGTH, + lambda_overlap=LAMBDA_OVERLAP, verbose=True, log_interval=100, ): @@ -390,6 +436,72 @@ def train_placement( - initial_cell_features: Original cell positions (for comparison) - loss_history: Loss values over time """ + N = cell_features.shape[0] + + def pareto_alternating_optimization(training_stage, epoch_curr): + """Alternate objectives to find an optimal solution within the Pareto front.""" + nonlocal scheduler + if (epoch_curr // PARETO_NUM_EPOCHS) % 2 == 0: + lambda_overlap_final = LAMBDA_PARETO if training_stage in {0, 2, 6} else lambda_overlap + lambda_wirelength_final = lambda_wirelength + else: + lambda_overlap_final = lambda_overlap + lambda_wirelength_final = LAMBDA_PARETO if training_stage in {0, 2, 6} else lambda_wirelength + if scheduler is not None: + scheduler = None + return lambda_overlap_final, lambda_wirelength_final + + def overlap_optimization(training_stage): + """Prioritize overlap objective.""" + nonlocal scheduler + lambda_overlap_final = lambda_overlap + lambda_wirelength_final = 0. + if scheduler is None and N >= 25 and training_stage in {5, 7}: + for pg in optimizer.param_groups: + pg['lr'] = lr + scheduler = optim.lr_scheduler.CosineAnnealingLR( + optimizer, T_max=get_stage_epochs(N, str(training_stage), TRAINING_STAGES), eta_min=lr * COS_LR_MIN_FCTR + ) + return lambda_overlap_final, lambda_wirelength_final + + def get_stage_epochs(N, training_stage, training_stages): + num_epochs_stage = training_stages[training_stage] + training_stage = int(training_stage) + if training_stage == 1: + if N < 50: + num_epochs_stage = 1500 + elif N < 150: + num_epochs_stage = 4000 + elif N < 250: + num_epochs_stage = 6000 + elif training_stage in {3, 5, 7}: + if N < 150: + num_epochs_stage = 1000 + elif N < 250: + num_epochs_stage = 2000 + return num_epochs_stage + + def get_cumulative_epochs(N, training_stages): + epoch_stages = set() + num_epochs_tot = 0 + for training_stage in training_stages.keys(): + num_epochs_tot += get_stage_epochs(N, training_stage, training_stages) + epoch_stages.add(num_epochs_tot) + + return num_epochs_tot, epoch_stages + + # @torch.compile + # def freeze_adam_state(optimizer, params): + # for p in params: + # # p.grad.zero_() + # p.grad = None + # state = optimizer.state[p] + # if state: + # state['exp_avg'].zero_() + # state['exp_avg_sq'].zero_() + # if 'max_exp_avg_sq' in state: + # state['max_exp_avg_sq'].zero_() + # Clone features and create learnable positions cell_features = cell_features.clone() initial_cell_features = cell_features.clone() @@ -399,7 +511,25 @@ def train_placement( cell_positions.requires_grad_(True) # Create optimizer - optimizer = optim.Adam([cell_positions], lr=lr) + optimizer = optim.Adam([cell_positions], lr=lr) #, foreach=False, fused=False) + # optimizer = optim.SGD([cell_positions], lr=lr) + scheduler = None + # scheduler = optim.lr_scheduler.CosineAnnealingLR( + # optimizer, T_max=num_epochs, eta_min=lr*0.3 + # ) + + # Initialize training stage and epochs + training_stage = 0 + epoch_curr = 0 + epoch_stages = None + if num_epochs is None: + num_epochs, epoch_stages = get_cumulative_epochs(N, TRAINING_STAGES) + + # Initialize macro position freezing + freeze_macros = freeze_std_cells = False + areas = cell_features[:, CellFeatureIdx.AREA] + idx_macros = (areas >= MIN_MACRO_AREA) & (areas < MAX_MACRO_AREA) + idx_std_cells = torch.isin(areas, torch.tensor(STANDARD_CELL_AREAS)) # Track loss history loss_history = { @@ -416,30 +546,57 @@ def train_placement( cell_features_current = cell_features.clone() cell_features_current[:, 2:4] = cell_positions + # Update training stage and macro position freezing + if epoch in epoch_stages: + training_stage += 1 + if training_stage == 3 or training_stage >= 5: + freeze_macros = False + if training_stage >= 6: + freeze_std_cells = True + elif training_stage == 2 or training_stage == 4: + freeze_macros = True + + epoch_curr = 0 + + # Set training hyperparams and optimization method + if training_stage % 2 == 0: + lambda_overlap_final, lambda_wirelength_final = pareto_alternating_optimization(training_stage, epoch_curr) + else: + lambda_overlap_final, lambda_wirelength_final = overlap_optimization(training_stage) + # Calculate losses - wl_loss = wirelength_attraction_loss( - cell_features_current, pin_features, edge_list - ) - overlap_loss = overlap_repulsion_loss( - cell_features_current, pin_features, edge_list - ) + overlap_loss = overlap_repulsion_loss(cell_features_current, pin_features, edge_list) + wl_loss = wirelength_attraction_loss_jit(cell_features_current, pin_features, edge_list) + # Stage 0 should also regularize for centering the cells on the chip, + # as this leads to an easier manifold to optimize, and it reduces wirelength on average. + if training_stage in {0, 1}: + wl_loss += centering_regularization(cell_features_current, pin_features, edge_list) # Combined loss - total_loss = lambda_wirelength * wl_loss + lambda_overlap * overlap_loss + total_loss = lambda_wirelength_final * wl_loss + lambda_overlap_final * overlap_loss # Backward pass total_loss.backward() # Gradient clipping to prevent extreme updates - torch.nn.utils.clip_grad_norm_([cell_positions], max_norm=5.0) + torch.nn.utils.clip_grad_norm_([cell_positions], max_norm=2.5) + + # Freeze appropriate cell positions based on training stage + if freeze_macros: + cell_positions.grad[idx_macros] = 0. + elif freeze_std_cells: + cell_positions[idx_std_cells].grad = None # Update positions optimizer.step() + if scheduler is not None: + scheduler.step() # Record losses - loss_history["total_loss"].append(total_loss.item()) - loss_history["wirelength_loss"].append(wl_loss.item()) - loss_history["overlap_loss"].append(overlap_loss.item()) + if verbose and (epoch % log_interval == 0 or epoch == num_epochs - 1): + loss_history["total_loss"].append(total_loss.item()) + loss_history["wirelength_loss"].append(wl_loss.item()) + loss_history["overlap_loss"].append(overlap_loss.item()) # Log progress if verbose and (epoch % log_interval == 0 or epoch == num_epochs - 1): @@ -448,6 +605,8 @@ def train_placement( print(f" Wirelength Loss: {wl_loss.item():.6f}") print(f" Overlap Loss: {overlap_loss.item():.6f}") + epoch_curr += 1 + # Create final cell features final_cell_features = cell_features.clone() final_cell_features[:, 2:4] = cell_positions.detach()