diff --git a/README.md b/README.md index 18aef43..c142a12 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,15 @@ The `config` folder contains example configuration files for different architect 4. **Tensor parallelism works automatically** because PyTorch Lightning handles the distributed setup and PyTorch's Distributed Tensor API shards your model across GPUs without requiring changes to your component code +## Development + +Format and lint the codebase: + +```bash +uv run ruff format . +uv run ruff check --fix . +``` + ## Testing Run the test suite to ensure everything is working correctly: diff --git a/crosslayer_transcoder/data/__init__.py b/crosslayer_transcoder/data/__init__.py index 52b22ec..81d524b 100644 --- a/crosslayer_transcoder/data/__init__.py +++ b/crosslayer_transcoder/data/__init__.py @@ -5,9 +5,6 @@ with efficient multiprocessing data generation. """ -import torch -import torch.multiprocessing as mp - from .activation_sources import ActivationComputer, DiskActivationSource from .data_generator import DataGeneratorProcess diff --git a/crosslayer_transcoder/data/activation_sources.py b/crosslayer_transcoder/data/activation_sources.py index 3a6e9e2..4ef3650 100644 --- a/crosslayer_transcoder/data/activation_sources.py +++ b/crosslayer_transcoder/data/activation_sources.py @@ -79,8 +79,7 @@ def _extract_activations(self, model: Any, tokens: torch.Tensor, mask: torch.Ten """ mlp_ins = [] mlp_outs = [] - with model.trace(tokens) as tracer: - + with model.trace(tokens): # Extract from all transformer layers for i in range(self.n_layers): # MLP input (after layer norm) @@ -147,7 +146,7 @@ def _setup_file(self) -> None: # self.tensor_handle = self.file_handle[self.accessor] self.position = 0 except Exception as e: - raise RuntimeError(f"Failed to open activation file {self.file_path}: {e}") + raise RuntimeError(f"Failed to open activation file {self.file_path}: {e}") from e def get_next_batch(self, batch_size: Optional[int] = None) -> torch.Tensor: """ diff --git a/crosslayer_transcoder/data/data_generator.py b/crosslayer_transcoder/data/data_generator.py index 5bd5e2f..8e0fc38 100644 --- a/crosslayer_transcoder/data/data_generator.py +++ b/crosslayer_transcoder/data/data_generator.py @@ -8,12 +8,9 @@ import os from typing import Optional -import nnsight import torch from datasets import load_dataset -from torch.utils.data import DataLoader -from crosslayer_transcoder.data import text_dataset from crosslayer_transcoder.data.activation_sources import ActivationComputer, DiskActivationSource from crosslayer_transcoder.data.deployment_policy import DeploymentPolicy @@ -205,7 +202,7 @@ def cleanup(self): for child in children: try: child.terminate() - except: + except Exception: pass except ImportError: pass # psutil not available diff --git a/crosslayer_transcoder/data/deployment_policy.py b/crosslayer_transcoder/data/deployment_policy.py index 3481343..99cb437 100644 --- a/crosslayer_transcoder/data/deployment_policy.py +++ b/crosslayer_transcoder/data/deployment_policy.py @@ -4,7 +4,6 @@ """ import logging -import re from abc import ABC, abstractmethod from enum import Enum from typing import Any, Dict, List, Optional, Tuple diff --git a/crosslayer_transcoder/data/generation_loop.py b/crosslayer_transcoder/data/generation_loop.py index 021ec92..f5c6225 100644 --- a/crosslayer_transcoder/data/generation_loop.py +++ b/crosslayer_transcoder/data/generation_loop.py @@ -13,7 +13,6 @@ from crosslayer_transcoder.data import text_dataset from crosslayer_transcoder.data.activation_sources import ActivationComputer, DiskActivationSource from crosslayer_transcoder.data.deployment_policy import ( - BaseDeploymentPolicy, DeploymentPolicy, create_deployment_policy, ) @@ -138,7 +137,7 @@ def generation_loop( # Generate new activations gen_start = time.time() activations = self._generate_activations() - gen_time = time.time() - gen_start + time.time() - gen_start # Take only as many activations as we have indices num_indices = len(indices_to_refresh) @@ -256,7 +255,7 @@ def refill_from_disk(self, batch_size: int = 40_000): self.shared_buffer.set_activations(indices_to_fill, samples) # Calculate refill rate for this batch - batch_time = time.time() - batch_start_time + time.time() - batch_start_time batch_refilled = len(indices_to_fill) total_refilled += batch_refilled diff --git a/crosslayer_transcoder/data/process_monitor.py b/crosslayer_transcoder/data/process_monitor.py index a84bd37..7738e5c 100644 --- a/crosslayer_transcoder/data/process_monitor.py +++ b/crosslayer_transcoder/data/process_monitor.py @@ -56,7 +56,7 @@ def update_dashboard(self, status: str, buffer_stats: Dict[str, Any], current_de # Format uptime with more stable display if uptime > 60: - uptime_str = f"{int(uptime/60)}:{int(uptime%60):02d}" + uptime_str = f"{int(uptime / 60)}:{int(uptime % 60):02d}" else: uptime_str = f"{int(uptime)}s" diff --git a/crosslayer_transcoder/data/shared_memory.py b/crosslayer_transcoder/data/shared_memory.py index f9405ec..58e617b 100644 --- a/crosslayer_transcoder/data/shared_memory.py +++ b/crosslayer_transcoder/data/shared_memory.py @@ -6,14 +6,11 @@ import atexit import logging import multiprocessing as mp -import threading import time from multiprocessing import shared_memory -from typing import Any, Dict, List, Tuple +from typing import Any, Dict -import numpy as np import torch -import torch.multiprocessing as torch_mp # No config imports needed in this module diff --git a/crosslayer_transcoder/main.py b/crosslayer_transcoder/main.py index 2afce89..d9e4c7f 100755 --- a/crosslayer_transcoder/main.py +++ b/crosslayer_transcoder/main.py @@ -17,9 +17,7 @@ class CrossLayerTranscoderCLI(LightningCLI): def add_arguments_to_parser(self, parser): """Add custom argument linking and configuration.""" # Link model and data parameters that should be consistent - parser.link_arguments( - "data.init_args.n_layers", "model.init_args.nonlinearity.init_args.n_layers" - ) + parser.link_arguments("data.init_args.n_layers", "model.init_args.nonlinearity.init_args.n_layers") parser.link_arguments("data.init_args.n_layers", "model.init_args.n_layers") parser.link_arguments( "model.init_args.d_features", @@ -34,7 +32,7 @@ def main(): os.environ.setdefault("WANDB_CACHE_DIR", f"{os.getcwd()}/wandb_cache") # Create CLI with subclass mode to support class_path configuration - cli = CrossLayerTranscoderCLI( + CrossLayerTranscoderCLI( model_class=L.LightningModule, # Use base class for subclass mode datamodule_class=L.LightningDataModule, # Use base class for subclass mode subclass_mode_model=True, # Enable subclass mode for model diff --git a/crosslayer_transcoder/metrics/replacement_model_accuracy.py b/crosslayer_transcoder/metrics/replacement_model_accuracy.py index 2e11c01..d869d47 100644 --- a/crosslayer_transcoder/metrics/replacement_model_accuracy.py +++ b/crosslayer_transcoder/metrics/replacement_model_accuracy.py @@ -102,7 +102,7 @@ def update(self, clt, max_batches=20): with torch.no_grad(): for i, (tokens, mask) in enumerate(self.loader): torch.cuda.empty_cache() - print(f"computing replacement model", i) + print("computing replacement model", i) tokens = self.handle_device(tokens) mask = self.handle_device(mask) if i >= max_batches: diff --git a/crosslayer_transcoder/model/clt.py b/crosslayer_transcoder/model/clt.py index cc58891..921f2ed 100644 --- a/crosslayer_transcoder/model/clt.py +++ b/crosslayer_transcoder/model/clt.py @@ -78,7 +78,9 @@ def decode( "from_layer to_layer -> batch_size to_layer d_acts", ) - def forward(self, acts: Float[torch.Tensor, "batch_size n_layers d_acts"]) -> Tuple[ + def forward( + self, acts: Float[torch.Tensor, "batch_size n_layers d_acts"] + ) -> Tuple[ Float[torch.Tensor, "batch_size n_layers d_features"], Float[torch.Tensor, "batch_size n_layers d_features"], Float[torch.Tensor, "batch_size n_layers d_acts"], @@ -158,12 +160,12 @@ def __init__(self, d_acts: int, d_features: int, n_layers: int): self.d_acts = d_acts self.d_features = d_features self.n_layers = n_layers - self.register_parameter(f"W", nn.Parameter(torch.empty((n_layers, d_features, d_acts)))) + self.register_parameter("W", nn.Parameter(torch.empty((n_layers, d_features, d_acts)))) self.reset_parameters() def reset_parameters(self): dec_uniform_thresh = 1 / ((self.d_acts * self.n_layers) ** 0.5) - self.get_parameter(f"W").data.uniform_(-dec_uniform_thresh, dec_uniform_thresh) + self.get_parameter("W").data.uniform_(-dec_uniform_thresh, dec_uniform_thresh) @torch.no_grad() def forward_layer( @@ -173,7 +175,7 @@ def forward_layer( features = features[:, :, layer, :] return einsum( features, - self.get_parameter(f"W")[layer], + self.get_parameter("W")[layer], "batch_size seq d_features, d_features d_acts -> batch_size seq d_acts", ) @@ -199,7 +201,7 @@ def __init__(self, d_acts: int, d_features: int, n_layers: int): self.n_layers = n_layers for i in range(n_layers): self.register_parameter(f"W_{i}", nn.Parameter(torch.empty((i + 1, d_features, d_acts)))) - self.register_parameter(f"b", nn.Parameter(torch.empty((n_layers, d_acts)))) + self.register_parameter("b", nn.Parameter(torch.empty((n_layers, d_acts)))) self.reset_parameters() def reset_parameters(self): @@ -273,7 +275,9 @@ def initialize_standardizers(self, batch: Float[torch.Tensor, "batch_size io n_l self.input_standardizer.initialize_from_batch(batch) self.output_standardizer.initialize_from_batch(batch) - def forward(self, acts: Float[torch.Tensor, "batch_size n_layers d_acts"]) -> Tuple[ + def forward( + self, acts: Float[torch.Tensor, "batch_size n_layers d_acts"] + ) -> Tuple[ Float[torch.Tensor, "batch_size n_layers d_features"], # pre_actvs Float[torch.Tensor, "batch_size n_layers d_features"], # features Float[torch.Tensor, "batch_size n_layers d_acts"], # recons_norm diff --git a/crosslayer_transcoder/model/clt_lightning.py b/crosslayer_transcoder/model/clt_lightning.py index 39a874a..17305ea 100644 --- a/crosslayer_transcoder/model/clt_lightning.py +++ b/crosslayer_transcoder/model/clt_lightning.py @@ -1,18 +1,12 @@ import gc -import os -import subprocess -import time from typing import Optional, Tuple import lightning as L -import psutil import torch -import torch.nn as nn -from einops import einsum +import wandb from jaxtyping import Float from torch.distributed.tensor.parallel import parallelize_module -import wandb from crosslayer_transcoder.metrics.dead_features import DeadFeatures from crosslayer_transcoder.metrics.replacement_model_accuracy import ( ReplacementModelAccuracy, @@ -22,8 +16,6 @@ CrossLayerTranscoder, Decoder, ) -from crosslayer_transcoder.model.jumprelu import JumpReLU -from crosslayer_transcoder.model.topk import BatchTopK class CrossLayerTranscoderModule(L.LightningModule): @@ -55,9 +47,7 @@ def __init__( ): super().__init__(*args, **kwargs) - self.save_hyperparameters( - ignore=["model", "replacement_model", "dead_features"] - ) + self.save_hyperparameters(ignore=["model", "replacement_model", "dead_features"]) # torch.cuda.memory._record_memory_history(max_entries=100_000) # Store pre-constructed modules @@ -143,9 +133,7 @@ def log_training_metrics(self, features, recons_norm, recons, mlp_out, batch_idx ss_err = (mlp_out_norm - recons_norm) ** 2 ss_err = ss_err.sum(dim=0) - ss_total = ((mlp_out_norm - mlp_out_norm.mean(dim=0, keepdim=True)) ** 2).sum( - dim=0 - ) + ss_total = ((mlp_out_norm - mlp_out_norm.mean(dim=0, keepdim=True)) ** 2).sum(dim=0) fvu = (ss_err / ss_total).mean() # (n_layers, d_model) self.log("metrics/fraction_of_variance_unexplained", fvu) fvu_per_layer = (ss_err / ss_total).mean(dim=-1) @@ -189,8 +177,7 @@ def log_training_metrics(self, features, recons_norm, recons, mlp_out, batch_idx self.log("metrics/recons_standardized_std", recons_norm.std()) self.log( "metrics/L0_avg_per_layer", - torch.count_nonzero(active_features) - / (features.shape[0] * features.shape[1]), + torch.count_nonzero(active_features) / (features.shape[0] * features.shape[1]), ) # Magnitude of feature activations - memory efficient version @@ -216,9 +203,7 @@ def log_training_metrics(self, features, recons_norm, recons, mlp_out, batch_idx # Log L0 table per layer if batch_idx % 500 == 1: - l0_per_layer = ( - torch.count_nonzero(active_features, dim=(0, 2)) / features.shape[0] - ) + l0_per_layer = torch.count_nonzero(active_features, dim=(0, 2)) / features.shape[0] if self.logger and isinstance(self.logger.experiment, wandb.wandb_run.Run): table = wandb.Table( @@ -226,11 +211,7 @@ def log_training_metrics(self, features, recons_norm, recons, mlp_out, batch_idx columns=["layer", "L0"], ) self.logger.experiment.log( - { - "layers/L0_per_layer": wandb.plot.bar( - table, "layer", "L0", title="L0 per Layer" - ) - }, + {"layers/L0_per_layer": wandb.plot.bar(table, "layer", "L0", title="L0 per Layer")}, step=self.global_step, ) @@ -259,15 +240,11 @@ def log_training_metrics(self, features, recons_norm, recons, mlp_out, batch_idx ): for layer in range(dead_log_freqs.shape[0]): self.logger.experiment.log( - { - f"layers/log_feature_density/layer_{layer}": dead_log_freqs[ - layer - ] - }, + {f"layers/log_feature_density/layer_{layer}": dead_log_freqs[layer]}, step=self.global_step, ) self.logger.experiment.log( - {f"training/log_feature_density": dead_log_freqs.flatten()}, + {"training/log_feature_density": dead_log_freqs.flatten()}, step=self.global_step, ) self.log("training/log_feature_density_mean", dead_log_freqs.mean()) @@ -433,9 +410,7 @@ def training_step(self, batch, batch_idx): if isinstance(self.model.decoder, CrosslayerDecoder): dec_norms = torch.zeros_like(features[:1]) for l in range(self.model.decoder.n_layers): - W = self.model.decoder.get_parameter( - f"W_{l}" - ) # (from_layer, d_features, d_acts) + W = self.model.decoder.get_parameter(f"W_{l}") # (from_layer, d_features, d_acts) dec_norms[:, : l + 1] = dec_norms[:, : l + 1] + (W**2).sum(dim=-1) dec_norms = dec_norms.sqrt() @@ -443,17 +418,11 @@ def training_step(self, batch, batch_idx): dec_norms = torch.sqrt((self.model.decoder.W**2).sum(dim=-1)) weighted_features = features * dec_norms * self.c - self.log( - "model/weighted_features_mean", weighted_features.detach().mean().cpu() - ) + self.log("model/weighted_features_mean", weighted_features.detach().mean().cpu()) if self.use_tanh: - weighted_features = torch.tanh( - weighted_features - ) # (batch_size, n_layers, d_features) - sparsity = ( - self.current_sparsity_penalty() * weighted_features.sum(dim=[1, 2]).mean() - ) + weighted_features = torch.tanh(weighted_features) # (batch_size, n_layers, d_features) + sparsity = self.current_sparsity_penalty() * weighted_features.sum(dim=[1, 2]).mean() self.log("training/sparsity_loss", sparsity) # Compute Pre-activation Loss diff --git a/crosslayer_transcoder/model/parallel.py b/crosslayer_transcoder/model/parallel.py index 0dc9777..f2f8ac1 100644 --- a/crosslayer_transcoder/model/parallel.py +++ b/crosslayer_transcoder/model/parallel.py @@ -1,43 +1,25 @@ # mypy: allow-untyped-defs # Copyright (c) Meta Platforms, Inc. and affiliates -from abc import ABC, abstractmethod -from collections.abc import Sequence -from dataclasses import dataclass -from functools import cached_property, partial -from typing import Any, Optional, Tuple, Union, cast +from functools import partial +from typing import Optional, cast -import lightning as L import torch import torch.nn as nn -from einops import einsum, rearrange -from jaxtyping import Float -from torch.distributed._tensor.placement_types import Partial, Replicate, Shard from torch.distributed.device_mesh import DeviceMesh from torch.distributed.tensor import ( - DeviceMesh, DTensor, - Replicate, - Shard, distribute_module, distribute_tensor, ) from torch.distributed.tensor._dtensor_spec import DTensorSpec -from torch.distributed.tensor._op_schema import OpSchema # OpSpec, from torch.distributed.tensor._op_schema import ( OpStrategy, PlacementStrategy, - RuntimeSchemaInfo, - StrategyType, - _is_inplace_op, ) -from torch.distributed.tensor._ops.utils import is_tensor_dim_sharded, normalize_dim, register_op_strategy -from torch.distributed.tensor.parallel import RowwiseParallel, parallelize_module +from torch.distributed.tensor._ops.utils import register_op_strategy from torch.distributed.tensor.parallel.style import ParallelStyle from torch.distributed.tensor.placement_types import Placement, Replicate, Shard -from torch.nn.modules.activation import ReLU -import wandb -from crosslayer_transcoder.metrics.replacement_model_accuracy import ReplacementModelAccuracy from crosslayer_transcoder.model.jumprelu import JumpReLU aten = torch.ops.aten # convenience alias @@ -73,7 +55,7 @@ def select_int_strategy(op_schema): if shard_dim == selected_dim: raise NotImplementedError( - "Selecting along a dimension that is sharded " "is not supported on PyTorch-2.7.1." + "Selecting along a dimension that is sharded is not supported on PyTorch-2.7.1." ) # shift shard index left if we dropped an earlier dimension if shard_dim > selected_dim: diff --git a/crosslayer_transcoder/model/standardize.py b/crosslayer_transcoder/model/standardize.py index 45b1b63..d5e0fc8 100644 --- a/crosslayer_transcoder/model/standardize.py +++ b/crosslayer_transcoder/model/standardize.py @@ -1,6 +1,6 @@ -from einops import einsum import torch import torch.nn as nn +from einops import einsum from jaxtyping import Float @@ -8,19 +8,13 @@ class Standardizer(nn.Module): def __init__(self, **kwargs): super().__init__() - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): pass - def forward( - self, batch: Float[torch.Tensor, "batch_size n_layers actv_dim"], layer="all" - ): + def forward(self, batch: Float[torch.Tensor, "batch_size n_layers actv_dim"], layer="all"): return batch - def standardize( - self, batch: Float[torch.Tensor, "batch_size n_layers actv_dim"], layer="all" - ): + def standardize(self, batch: Float[torch.Tensor, "batch_size n_layers actv_dim"], layer="all"): return batch @@ -32,9 +26,7 @@ def __init__(self, n_layers, activation_dim): self.is_initialized = False @torch.no_grad() - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): batch = batch[:, 0] self.mean.data = batch.mean(dim=0) self.std.data = batch.std(dim=0) @@ -80,9 +72,7 @@ def __init__(self, n_layers, activation_dim): self.register_buffer("std", torch.empty(n_layers, activation_dim)) self.is_initialized = False - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): self.mean.data = batch[:, 1].mean(dim=0) self.std.data = batch[:, 1].std(dim=0) self.std.data.clamp_(min=1e-8) @@ -137,9 +127,7 @@ def __init__(self): super().__init__() @torch.no_grad() - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): pass def forward( @@ -162,9 +150,7 @@ def __init__(self, n_layers, n_exclude: int = 0): self.is_initialized = False @torch.no_grad() - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): batch = batch[:, 0] # exclude the n_exclude largest and smallest values topk = batch.topk(self.n_exclude, dim=-1, sorted=False) @@ -198,9 +184,7 @@ def __init__(self, n_layers, n_exclude: int = 0): self.n_exclude = n_exclude self.is_initialized = False - def initialize_from_batch( - self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"] - ): + def initialize_from_batch(self, batch: Float[torch.Tensor, "batch_size io n_layers actv_dim"]): batch = batch[:, 1] # exclude the n_exclude largest and smallest values topk = batch.topk(self.n_exclude, dim=-1, sorted=False) @@ -233,6 +217,4 @@ def standardize( if layer == "all": return (mlp_out - self.mean[None, :, None]) / self.std[None, :, None] else: - return (mlp_out - self.mean[None, layer, None]) / self.std[ - None, layer, None - ] + return (mlp_out - self.mean[None, layer, None]) / self.std[None, layer, None] diff --git a/crosslayer_transcoder/utils/buffer.py b/crosslayer_transcoder/utils/buffer.py index b60a0e8..f6bfd66 100644 --- a/crosslayer_transcoder/utils/buffer.py +++ b/crosslayer_transcoder/utils/buffer.py @@ -1,6 +1,6 @@ +import h5py import torch from torch.utils.data import Dataset -import h5py class DiscBuffer(Dataset): @@ -33,4 +33,4 @@ def __getitem__(self, idx): def close(self): # Close the HDF5 file when done if self.h5: - self.h5.close() \ No newline at end of file + self.h5.close() diff --git a/crosslayer_transcoder/utils/convert_activations_to_float16.py b/crosslayer_transcoder/utils/convert_activations_to_float16.py index deebad7..1a97daa 100755 --- a/crosslayer_transcoder/utils/convert_activations_to_float16.py +++ b/crosslayer_transcoder/utils/convert_activations_to_float16.py @@ -65,7 +65,7 @@ def convert_h5_float32_to_float16( new_size_gb = dataset.size * 2 / (1024**3) # float16 = 2 bytes print(f"Original size: {original_size_gb:.2f} GB") print(f"New size: {new_size_gb:.2f} GB") - print(f"Size reduction: {(1 - new_size_gb/original_size_gb)*100:.1f}%") + print(f"Size reduction: {(1 - new_size_gb / original_size_gb) * 100:.1f}%") # Create output file with h5py.File(output_path, "w") as f_out: @@ -81,7 +81,7 @@ def convert_h5_float32_to_float16( for start_idx in range(0, total_samples, chunk_size): end_idx = min(start_idx + chunk_size, total_samples) - current_chunk_size = end_idx - start_idx + end_idx - start_idx # Read chunk from input chunk_data = dataset[start_idx:end_idx] @@ -125,7 +125,7 @@ def convert_h5_float32_to_float16( total_time = time.time() - start_time final_rate = total_samples / total_time - print(f"\nāœ… Conversion complete!") + print("\nāœ… Conversion complete!") print(f"Total time: {total_time:.1f}s") print(f"Average rate: {final_rate:,.0f} samples/s") print(f"Output file: {output_path}") @@ -143,13 +143,9 @@ def main(): """Main function for command line usage.""" import argparse - parser = argparse.ArgumentParser( - description="Convert HDF5 activations from float32 to float16" - ) + parser = argparse.ArgumentParser(description="Convert HDF5 activations from float32 to float16") parser.add_argument("input_file", help="Path to input HDF5 file (float32)") - parser.add_argument( - "-o", "--output", help="Path to output HDF5 file (default: input_fp16.h5)" - ) + parser.add_argument("-o", "--output", help="Path to output HDF5 file (default: input_fp16.h5)") parser.add_argument( "-c", "--chunk-size", diff --git a/crosslayer_transcoder/utils/debug.ipynb b/crosslayer_transcoder/utils/debug.ipynb index d6939d4..e14cca1 100644 --- a/crosslayer_transcoder/utils/debug.ipynb +++ b/crosslayer_transcoder/utils/debug.ipynb @@ -28,7 +28,7 @@ } ], "source": [ - "t = torch.load('../lifetime_active.pt').cpu()\n", + "t = torch.load(\"../lifetime_active.pt\").cpu()\n", "t.shape" ] }, @@ -86,7 +86,7 @@ ], "source": [ "t1 = t2\n", - "t2 = torch.load('../lifetime_active.pt').cpu()\n", + "t2 = torch.load(\"../lifetime_active.pt\").cpu()\n", "plt.figure(figsize=(20, 5))\n", "plt.hist((t2 - t1).flatten(), bins=100, range=(0, 100))\n", "plt.show()" diff --git a/crosslayer_transcoder/utils/shuffle_activations.py b/crosslayer_transcoder/utils/shuffle_activations.py index 8ae963c..43dcc49 100644 --- a/crosslayer_transcoder/utils/shuffle_activations.py +++ b/crosslayer_transcoder/utils/shuffle_activations.py @@ -9,7 +9,6 @@ import h5py import numpy as np -from tqdm import tqdm def shuffle_activations(input_path, output_path, seed=42): @@ -54,7 +53,7 @@ def shuffle_activations(input_path, output_path, seed=42): print("Writing shuffled data...") with h5py.File(output_path, "w") as f_out: # Create dataset and write entire tensor at once - shuffled_tensor = f_out.create_dataset( + f_out.create_dataset( "tensor", data=data, # Write all data at once chunks=True, # Enable chunking for better I/O @@ -81,7 +80,6 @@ def verify_shuffle(original_path, shuffled_path, num_samples=100): h5py.File(original_path, "r") as f_orig, h5py.File(shuffled_path, "r") as f_shuf, ): - orig_tensor = f_orig["tensor"] shuf_tensor = f_shuf["tensor"] @@ -91,16 +89,14 @@ def verify_shuffle(original_path, shuffled_path, num_samples=100): batch_size = orig_tensor.shape[0] # Sample indices for verification - test_indices = np.random.choice( - batch_size, min(num_samples, batch_size), replace=False - ) + test_indices = np.random.choice(batch_size, min(num_samples, batch_size), replace=False) # Read only the samples we need for verification - much more efficient orig_samples = orig_tensor[test_indices] shuf_samples = shuf_tensor[test_indices] differences_found = 0 - for i, idx in enumerate(test_indices): + for i, _idx in enumerate(test_indices): if not np.array_equal(orig_samples[i], shuf_samples[i]): differences_found += 1 @@ -112,9 +108,7 @@ def verify_shuffle(original_path, shuffled_path, num_samples=100): if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Shuffle activations along batch dimension" - ) + parser = argparse.ArgumentParser(description="Shuffle activations along batch dimension") parser.add_argument( "--input", default="/var/local/glang/activations/clt-activations-10M.h5", @@ -125,12 +119,8 @@ def verify_shuffle(original_path, shuffled_path, num_samples=100): default="/var/local/glang/activations/clt-activations-10M-shuffled.h5", help="Output HDF5 file path", ) - parser.add_argument( - "--seed", type=int, default=42, help="Random seed for reproducible shuffling" - ) - parser.add_argument( - "--verify", action="store_true", help="Verify the shuffle after completion" - ) + parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducible shuffling") + parser.add_argument("--verify", action="store_true", help="Verify the shuffle after completion") args = parser.parse_args() diff --git a/crosslayer_transcoder/utils/store_actvs benchmark.ipynb b/crosslayer_transcoder/utils/store_actvs benchmark.ipynb index 4206fc2..f554d6b 100644 --- a/crosslayer_transcoder/utils/store_actvs benchmark.ipynb +++ b/crosslayer_transcoder/utils/store_actvs benchmark.ipynb @@ -24,13 +24,14 @@ } ], "source": [ - "import torch\n", - "import nnsight\n", - "import datasets\n", + "import os\n", + "\n", "import activation_server.text_dataset as text_dataset\n", - "from torch.utils.data import DataLoader\n", + "import datasets\n", "import h5py\n", - "import os\n", + "import nnsight\n", + "import torch\n", + "from torch.utils.data import DataLoader\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"\"\n", "\n", @@ -59,7 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = nnsight.LanguageModel('openai-community/gpt2', device_map='auto', dispatch=True)" + "model = nnsight.LanguageModel(\"openai-community/gpt2\", device_map=\"auto\", dispatch=True)" ] }, { @@ -119,7 +120,7 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = datasets.load_dataset('Skylion007/openwebtext', split='train')" + "dataset = datasets.load_dataset(\"Skylion007/openwebtext\", split=\"train\")" ] }, { @@ -221,6 +222,7 @@ ], "source": [ "import time\n", + "\n", "from tqdm import tqdm\n", "\n", "# Run for 10 seconds to get average throughput\n", @@ -239,10 +241,10 @@ "samples_per_sec = total_samples / elapsed\n", "batches_per_sec = total_batches / elapsed\n", "\n", - "print(f\"\\nThroughput:\")\n", + "print(\"\\nThroughput:\")\n", "print(f\"Samples/sec: {samples_per_sec:.1f}\")\n", "print(f\"Batches/sec: {batches_per_sec:.1f}\")\n", - "print(f\"Average batch size: {total_samples/total_batches:.1f}\")" + "print(f\"Average batch size: {total_samples / total_batches:.1f}\")" ] }, { @@ -253,7 +255,7 @@ "outputs": [], "source": [ "def extract_activations(model, tokens):\n", - " with model.trace(tokens) as tracer:\n", + " with model.trace(tokens):\n", " mlp_ins = []\n", " mlp_outs = []\n", " for i in range(12):\n", @@ -303,8 +305,8 @@ ], "source": [ "import time\n", - "from tqdm import tqdm\n", "\n", + "from tqdm import tqdm\n", "\n", "# Run benchmark for 10 seconds to measure activation extraction throughput\n", "start_time = time.time()\n", @@ -316,14 +318,14 @@ "for batch in tqdm(text_dataset_loader):\n", " if time.time() > end_time:\n", " break\n", - " \n", + "\n", " # prepend BOS token like in main loop\n", " batch = torch.roll(batch, shifts=1, dims=1)\n", " batch[:, 0] = model.config.bos_token_id\n", - " \n", + "\n", " # Extract activations\n", " mlp_acts = extract_activations(model, batch)\n", - " \n", + "\n", " total_tokens += batch.numel()\n", " total_batches += 1\n", "\n", @@ -331,10 +333,10 @@ "tokens_per_sec = total_tokens / elapsed\n", "batches_per_sec = total_batches / elapsed\n", "\n", - "print(f\"\\nActivation Extraction Throughput:\")\n", + "print(\"\\nActivation Extraction Throughput:\")\n", "print(f\"Tokens/sec: {tokens_per_sec:,.1f}\")\n", "print(f\"Batches/sec: {batches_per_sec:.1f}\")\n", - "print(f\"Average batch size: {total_tokens/total_batches:.1f} tokens\")\n" + "print(f\"Average batch size: {total_tokens / total_batches:.1f} tokens\")" ] }, { @@ -357,8 +359,7 @@ ], "source": [ "with model.trace(batch):\n", - " print('processing')\n", - " " + " print(\"processing\")" ] }, { @@ -637,14 +638,14 @@ } ], "source": [ - "store_path = '/var/local/glang/activations'\n", - "filename = 'clt-activations-10M.h5'\n", + "store_path = \"/var/local/glang/activations\"\n", + "filename = \"clt-activations-10M.h5\"\n", "store_size = 10000000\n", "actv_size = model.config.n_embd\n", "\n", "with h5py.File(os.path.join(store_path, filename), \"w\") as f:\n", " h5_dataset = f.create_dataset(\n", - " 'tensor', (store_size, 2, model.config.n_layer, model.config.n_embd), dtype='float32'\n", + " \"tensor\", (store_size, 2, model.config.n_layer, model.config.n_embd), dtype=\"float32\"\n", " )\n", "\n", " h5_pointer = 0\n", @@ -662,9 +663,7 @@ " n_acts = mlp_acts.shape[0]\n", "\n", " if h5_pointer + n_acts > store_size:\n", - " h5_dataset[h5_pointer:] = (\n", - " mlp_acts[: int(store_size - h5_pointer)].cpu().numpy()\n", - " )\n", + " h5_dataset[h5_pointer:] = mlp_acts[: int(store_size - h5_pointer)].cpu().numpy()\n", " break\n", " else:\n", " h5_dataset[h5_pointer : h5_pointer + n_acts] = mlp_acts.cpu().numpy()\n", diff --git a/crosslayer_transcoder/utils/store_actvs.ipynb b/crosslayer_transcoder/utils/store_actvs.ipynb index 6229c23..9188acf 100644 --- a/crosslayer_transcoder/utils/store_actvs.ipynb +++ b/crosslayer_transcoder/utils/store_actvs.ipynb @@ -25,13 +25,14 @@ } ], "source": [ - "import torch\n", - "import nnsight\n", + "import os\n", + "\n", "import datasets\n", + "import h5py\n", + "import nnsight\n", "import text_dataset\n", + "import torch\n", "from torch.utils.data import DataLoader\n", - "import h5py\n", - "import os\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"3\"\n", "\n", @@ -60,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = nnsight.LanguageModel('openai-community/gpt2', device_map='auto', dispatch=True)" + "model = nnsight.LanguageModel(\"openai-community/gpt2\", device_map=\"auto\", dispatch=True)" ] }, { @@ -120,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "dataset = datasets.load_dataset('Skylion007/openwebtext', split='train')" + "dataset = datasets.load_dataset(\"Skylion007/openwebtext\", split=\"train\")" ] }, { @@ -178,7 +179,7 @@ "outputs": [], "source": [ "def extract_activations(model, tokens):\n", - " with model.trace(tokens) as tracer:\n", + " with model.trace(tokens):\n", " mlp_ins = []\n", " mlp_outs = []\n", " for i in range(12):\n", @@ -459,14 +460,14 @@ } ], "source": [ - "store_path = '/var/local/glang/activations'\n", - "filename = 'clt-activations-10M.h5'\n", + "store_path = \"/var/local/glang/activations\"\n", + "filename = \"clt-activations-10M.h5\"\n", "store_size = 10000000\n", "actv_size = model.config.n_embd\n", "\n", "with h5py.File(os.path.join(store_path, filename), \"w\") as f:\n", " h5_dataset = f.create_dataset(\n", - " 'tensor', (store_size, 2, model.config.n_layer, model.config.n_embd), dtype='float32'\n", + " \"tensor\", (store_size, 2, model.config.n_layer, model.config.n_embd), dtype=\"float32\"\n", " )\n", "\n", " h5_pointer = 0\n", @@ -484,9 +485,7 @@ " n_acts = mlp_acts.shape[0]\n", "\n", " if h5_pointer + n_acts > store_size:\n", - " h5_dataset[h5_pointer:] = (\n", - " mlp_acts[: int(store_size - h5_pointer)].cpu().numpy()\n", - " )\n", + " h5_dataset[h5_pointer:] = mlp_acts[: int(store_size - h5_pointer)].cpu().numpy()\n", " break\n", " else:\n", " h5_dataset[h5_pointer : h5_pointer + n_acts] = mlp_acts.cpu().numpy()\n", diff --git a/crosslayer_transcoder/utils/store_actvs_benchmark.py b/crosslayer_transcoder/utils/store_actvs_benchmark.py index bb7c204..a88adfb 100644 --- a/crosslayer_transcoder/utils/store_actvs_benchmark.py +++ b/crosslayer_transcoder/utils/store_actvs_benchmark.py @@ -1,15 +1,13 @@ import os import time +import activation_server.text_dataset as text_dataset import datasets -import h5py import nnsight import torch from torch.utils.data import DataLoader from tqdm import tqdm -import activation_server.text_dataset as text_dataset - os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -32,9 +30,7 @@ def create_model(use_fp16=False): """Create model with optional FP16""" - model = nnsight.LanguageModel( - "openai-community/gpt2", device_map="cpu", dispatch=True - ) + model = nnsight.LanguageModel("openai-community/gpt2", device_map="cpu", dispatch=True) model.requires_grad_(False) if use_fp16: @@ -50,7 +46,7 @@ def create_model(use_fp16=False): @torch.no_grad() def extract_activations(model, tokens): - with model.trace(tokens) as tracer: + with model.trace(tokens): mlp_ins = [] mlp_outs = [] for i in range(12): @@ -108,7 +104,7 @@ def benchmark_config(batch_size, use_fp16=False, duration=30): batch[:, 0] = model.config.bos_token_id # Extract activations - mlp_acts = extract_activations(model, batch) + extract_activations(model, batch) total_tokens += batch.numel() total_batches += 1 @@ -121,8 +117,8 @@ def benchmark_config(batch_size, use_fp16=False, duration=30): tokens_per_sec = total_tokens / elapsed print(f"Tokens/sec: {tokens_per_sec:,.1f}") - print(f"Batches/sec: {total_batches/elapsed:.1f}") - print(f"Avg batch size: {total_tokens/total_batches:.1f} tokens") + print(f"Batches/sec: {total_batches / elapsed:.1f}") + print(f"Avg batch size: {total_tokens / total_batches:.1f} tokens") return tokens_per_sec @@ -140,11 +136,7 @@ def benchmark_config(batch_size, use_fp16=False, duration=30): print("\n=== RESULTS ===") baseline = results.get("batch_40", 2100) - for config, tokens_per_sec in sorted( - results.items(), key=lambda x: x[1], reverse=True - ): + for config, tokens_per_sec in sorted(results.items(), key=lambda x: x[1], reverse=True): if tokens_per_sec > 0: improvement = tokens_per_sec / baseline - print( - f"{config:15}: {tokens_per_sec:8,.1f} tokens/sec ({improvement:.1f}x)" - ) + print(f"{config:15}: {tokens_per_sec:8,.1f} tokens/sec ({improvement:.1f}x)") diff --git a/crosslayer_transcoder/utils/store_actvs_benchmark_optimized.py b/crosslayer_transcoder/utils/store_actvs_benchmark_optimized.py index 51aace9..0eaf869 100644 --- a/crosslayer_transcoder/utils/store_actvs_benchmark_optimized.py +++ b/crosslayer_transcoder/utils/store_actvs_benchmark_optimized.py @@ -1,14 +1,13 @@ import os import time +import activation_server.text_dataset as text_dataset import datasets import nnsight import torch from torch.utils.data import DataLoader from tqdm import tqdm -import activation_server.text_dataset as text_dataset - os.environ["CUDA_VISIBLE_DEVICES"] = "" # Set optimal thread count for single model @@ -21,9 +20,7 @@ def create_model(use_float16=False): """Create and optimize model""" - model = nnsight.LanguageModel( - "openai-community/gpt2", device_map="cpu", dispatch=True - ) + model = nnsight.LanguageModel("openai-community/gpt2", device_map="cpu", dispatch=True) model.requires_grad_(False) if use_float16: @@ -36,7 +33,7 @@ def extract_activations_optimized(model, tokens, use_float16=False): """Optimized activation extraction""" # Don't convert tokens to float16 - they must stay as integers for embedding - with model.trace(tokens) as tracer: + with model.trace(tokens): mlp_ins = [] mlp_outs = [] for i in range(12): @@ -59,9 +56,7 @@ def extract_activations_optimized(model, tokens, use_float16=False): def benchmark_single_model(batch_size=40, use_float16=False, duration=30): """Benchmark single model with different batch sizes""" - print( - f"\n=== Single Model Benchmark (batch_size={batch_size}, fp16={use_float16}) ===" - ) + print(f"\n=== Single Model Benchmark (batch_size={batch_size}, fp16={use_float16}) ===") model = create_model(use_float16) dataset = datasets.load_dataset("Skylion007/openwebtext", split="train") @@ -101,7 +96,7 @@ def benchmark_single_model(batch_size=40, use_float16=False, duration=30): batch[:, 0] = model.config.bos_token_id # Extract activations - mlp_acts = extract_activations_optimized(model, batch, use_float16) + extract_activations_optimized(model, batch, use_float16) total_tokens += batch.numel() total_batches += 1 @@ -110,8 +105,8 @@ def benchmark_single_model(batch_size=40, use_float16=False, duration=30): tokens_per_sec = total_tokens / elapsed print(f"Tokens/sec: {tokens_per_sec:,.1f}") - print(f"Batches/sec: {total_batches/elapsed:.1f}") - print(f"Average batch size: {total_tokens/total_batches:.1f} tokens") + print(f"Batches/sec: {total_batches / elapsed:.1f}") + print(f"Average batch size: {total_tokens / total_batches:.1f} tokens") return tokens_per_sec @@ -123,9 +118,7 @@ def benchmark_single_model(batch_size=40, use_float16=False, duration=30): # Test different batch sizes for batch_size in [40, 80, 160, 320]: try: - results[f"batch_{batch_size}"] = benchmark_single_model( - batch_size, False, 20 - ) + results[f"batch_{batch_size}"] = benchmark_single_model(batch_size, False, 20) except Exception as e: print(f"Failed batch_size={batch_size}: {e}") @@ -182,8 +175,6 @@ def benchmark_single_model(batch_size=40, use_float16=False, duration=30): print(f"Failed 16 workers: {e}") print("\n=== RESULTS SUMMARY ===") - for config, tokens_per_sec in sorted( - results.items(), key=lambda x: x[1], reverse=True - ): + for config, tokens_per_sec in sorted(results.items(), key=lambda x: x[1], reverse=True): improvement = tokens_per_sec / baseline print(f"{config:20}: {tokens_per_sec:8,.1f} tokens/sec ({improvement:.1f}x)") diff --git a/crosslayer_transcoder/utils/utils.py b/crosslayer_transcoder/utils/utils.py index 3f38020..f0c6fb5 100644 --- a/crosslayer_transcoder/utils/utils.py +++ b/crosslayer_transcoder/utils/utils.py @@ -28,11 +28,9 @@ def get_webtext_dataloader(model, dataset_name="Skylion007/openwebtext", batch_s def plot_actvs(actvs): - if type(actvs) == list: + if isinstance(actvs, list): # actvs: list(n_layers), every entry tensor batch_size x seq_len x d_acts - actvs = torch.stack( - [actv[0, 50, 300:500] for actv in actvs] - ) # n_layers x d_acts + actvs = torch.stack([actv[0, 50, 300:500] for actv in actvs]) # n_layers x d_acts else: # batch_size x n_layers x d_acts actvs = actvs[0, :, 300:500] # n_layers x d_acts actvs = actvs.cpu().numpy() diff --git a/pyproject.toml b/pyproject.toml index 9077485..d54bee1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,22 @@ dev-dependencies = [ "jupyterlab>=4.4.2", "ipykernel>=6.29.5", "pytest>=8.4.1", + "ruff>=0.9.0", ] -[tool.black] +[tool.ruff] line-length = 110 +target-version = "py312" + +[tool.ruff.lint] +select = ["E", "W", "F", "I", "B"] +ignore = [ + "E501", # line length (handled by formatter) + "E402", # module-import-not-at-top-of-file (intentional for env vars) + "F722", # forward-annotation-syntax-error (jaxtyping uses special syntax) + "F821", # undefined-name (jaxtyping type annotations) + "E741", # ambiguous-variable-name (common in ML: l, x, y, etc.) +] + +[tool.ruff.lint.isort] +known-first-party = ["crosslayer_transcoder"] diff --git a/setup.sh b/setup.sh index 48d1062..12b02ef 100755 --- a/setup.sh +++ b/setup.sh @@ -5,9 +5,9 @@ RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' -NC='\033[0m' # No Color +NC='\033[0m' -echo -e "${BLUE}šŸš€ Setting up crosslayer-transcoder environment...${NC}" +echo -e "${BLUE}šŸš€ Setting up crosslayer-transcoder...${NC}" # Function to check if command exists command_exists() { @@ -16,120 +16,31 @@ command_exists() { # Install uv if not present if ! command_exists uv; then - echo -e "${YELLOW}šŸ“¦ Installing uv (fast Python package manager)...${NC}" + echo -e "${YELLOW}šŸ“¦ Installing uv...${NC}" curl -LsSf https://astral.sh/uv/install.sh | sh - - # Source the shell configuration to make uv available export PATH="$HOME/.local/bin:$HOME/.cargo/bin:$PATH" if ! command_exists uv; then - echo -e "${RED}āŒ Failed to install uv. Please install manually: https://docs.astral.sh/uv/getting-started/installation/${NC}" + echo -e "${RED}āŒ Failed to install uv. See: https://docs.astral.sh/uv/${NC}" exit 1 fi - echo -e "${GREEN}āœ… uv installed successfully!${NC}" + echo -e "${GREEN}āœ… uv installed${NC}" else - echo -e "${GREEN}āœ… uv is already installed${NC}" -fi - -# Create .venv and install dependencies -echo -e "${YELLOW}šŸ”§ Creating virtual environment and installing dependencies...${NC}" - -# Remove existing .venv if it exists -if [ -d ".venv" ]; then - echo -e "${YELLOW}šŸ—‘ļø Removing existing .venv directory...${NC}" - rm -rf .venv + echo -e "${GREEN}āœ… uv already installed${NC}" fi -# Use uv to create venv and install dependencies -# This will automatically: -# - Install the correct Python version (3.12) -# - Create .venv directory -# - Install all dependencies from pyproject.toml -export PATH="$HOME/.local/bin:$PATH" -uv sync --dev --python 3.12 +# Install dependencies +echo -e "${YELLOW}šŸ”§ Installing dependencies...${NC}" +uv sync if [ $? -eq 0 ]; then - echo -e "${YELLOW}šŸ”§ Setting up permissions and PATH...${NC}" - - # Fix permissions for all executables in .venv/bin - chmod +x .venv/bin/* - - # Add uv to PATH permanently by updating shell profile - UV_PATH_EXPORT='export PATH="$HOME/.local/bin:$PATH"' - - # Check which shell profile to update - UPDATED_PROFILE=false - - # Try .bashrc first (most common) - if [ -f "$HOME/.bashrc" ] || [ "$SHELL" = "/bin/bash" ]; then - if [ ! -f "$HOME/.bashrc" ]; then - touch "$HOME/.bashrc" - fi - if ! grep -q "\.local/bin" "$HOME/.bashrc"; then - echo "" >> "$HOME/.bashrc" - echo "# Added by crosslayer-transcoder setup" >> "$HOME/.bashrc" - echo "$UV_PATH_EXPORT" >> "$HOME/.bashrc" - echo -e "${GREEN}āœ… Added uv to PATH in ~/.bashrc${NC}" - UPDATED_PROFILE=true - fi - fi - - # Try .zshrc for zsh users - if [ -f "$HOME/.zshrc" ]; then - if ! grep -q "\.local/bin" "$HOME/.zshrc"; then - echo "" >> "$HOME/.zshrc" - echo "# Added by crosslayer-transcoder setup" >> "$HOME/.zshrc" - echo "$UV_PATH_EXPORT" >> "$HOME/.zshrc" - echo -e "${GREEN}āœ… Added uv to PATH in ~/.zshrc${NC}" - UPDATED_PROFILE=true - fi - fi - - # Fallback to .profile (works for all POSIX shells) - if [ "$UPDATED_PROFILE" = false ] && [ -f "$HOME/.profile" ]; then - if ! grep -q "\.local/bin" "$HOME/.profile"; then - echo "" >> "$HOME/.profile" - echo "# Added by crosslayer-transcoder setup" >> "$HOME/.profile" - echo "$UV_PATH_EXPORT" >> "$HOME/.profile" - echo -e "${GREEN}āœ… Added uv to PATH in ~/.profile${NC}" - UPDATED_PROFILE=true - fi - fi - - # Create .bashrc as final fallback - if [ "$UPDATED_PROFILE" = false ]; then - echo "" >> "$HOME/.bashrc" - echo "# Added by crosslayer-transcoder setup" >> "$HOME/.bashrc" - echo "$UV_PATH_EXPORT" >> "$HOME/.bashrc" - echo -e "${GREEN}āœ… Created ~/.bashrc and added uv to PATH${NC}" - fi - - echo -e "${GREEN}āœ… Environment setup complete!${NC}" - echo "" - echo -e "${BLUE}šŸ“ To activate the environment, run:${NC}" - echo -e "${YELLOW} source .venv/bin/activate${NC}" - echo -e "${BLUE} (or simply: .venv/bin/activate)${NC}" + echo -e "${GREEN}āœ… Setup complete!${NC}" echo "" - echo -e "${BLUE}šŸ“ To run Jupyter Lab:${NC}" - echo -e "${YELLOW} .venv/bin/jupyter lab${NC}" - echo "" - echo -e "${BLUE}šŸ“ To run Python scripts:${NC}" - echo -e "${YELLOW} .venv/bin/python your_script.py${NC}" - echo "" - echo -e "${BLUE}šŸ“ To add new dependencies:${NC}" - echo -e "${YELLOW} uv add package_name${NC}" - echo "" - echo -e "${BLUE}šŸ“ To make uv available globally, restart your terminal or run:${NC}" - if [ -f "$HOME/.bashrc" ]; then - echo -e "${YELLOW} source ~/.bashrc${NC}" - elif [ -f "$HOME/.zshrc" ]; then - echo -e "${YELLOW} source ~/.zshrc${NC}" - elif [ -f "$HOME/.profile" ]; then - echo -e "${YELLOW} source ~/.profile${NC}" - else - echo -e "${YELLOW} source ~/.bashrc${NC}" - fi + echo -e "${BLUE}Usage:${NC}" + echo -e " ${YELLOW}uv run python script.py${NC} - Run a Python script" + echo -e " ${YELLOW}uv add package_name${NC} - Add a dependency" + echo -e " ${YELLOW}uv sync --dev${NC} - Install dev tools (ruff, pytest, jupyter)" else - echo -e "${RED}āŒ Setup failed. Please check the error messages above.${NC}" + echo -e "${RED}āŒ Setup failed${NC}" exit 1 -fi \ No newline at end of file +fi diff --git a/tests/benchmark_data_loader.py b/tests/benchmark_data_loader.py index c029195..eda33c3 100644 --- a/tests/benchmark_data_loader.py +++ b/tests/benchmark_data_loader.py @@ -3,10 +3,11 @@ Comprehensive data loader benchmark for ActivationDataModule. Tests both shared memory and simple buffer modes. """ + import os import sys import time -from typing import Any, Dict, Optional +from typing import Dict, Optional # Add project root to Python path for reliable imports project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -80,8 +81,7 @@ def benchmark_loader(loader, mode_name: str, duration: float = 30.0) -> float: elapsed = time.time() - start_time current_mb_s = (total_bytes / (1024 * 1024)) / elapsed print( - f" Batch {batch_count}: {batch_time*1000:.1f}ms, " - f"Running avg: {current_mb_s:.1f} MB/s" + f" Batch {batch_count}: {batch_time * 1000:.1f}ms, Running avg: {current_mb_s:.1f} MB/s" ) except Exception as e: @@ -104,7 +104,7 @@ def benchmark_loader(loader, mode_name: str, duration: float = 30.0) -> float: # Show buffer stats if available if hasattr(loader, "get_stats"): stats = loader.get_stats() - print(f" Final buffer status:") + print(" Final buffer status:") print(f" Valid samples: {stats['valid_samples']:,} ({stats['valid_percentage']:.1f}%)") print(f" Buffer memory: {stats['total_memory_gb']:.2f} GB") @@ -252,7 +252,7 @@ def main(): batch_size = 5000 duration = 30.0 - print(f"āš™ļø Configuration:") + print("āš™ļø Configuration:") print(f" Batch size: {batch_size:,}") print(f" Test duration: {duration}s per mode") print(f" PyTorch version: {torch.__version__}") @@ -270,7 +270,7 @@ def main(): results["DataModule Simple"] = result # Final comparison - print(f"\nšŸ† FINAL COMPARISON") + print("\nšŸ† FINAL COMPARISON") print("=" * 60) if results: max_name_len = max(len(name) for name in results.keys()) @@ -295,7 +295,7 @@ def main(): else: print(" āŒ No successful benchmarks completed") - print(f"\nāœ… Benchmark complete!") + print("\nāœ… Benchmark complete!") if __name__ == "__main__": diff --git a/tests/benchmark_text_dataloader.py b/tests/benchmark_text_dataloader.py index d29d25f..4b32498 100644 --- a/tests/benchmark_text_dataloader.py +++ b/tests/benchmark_text_dataloader.py @@ -8,11 +8,10 @@ import nnsight import torch +from data import text_dataset from datasets import load_dataset from torch.utils.data import DataLoader -from data import text_dataset - def benchmark_textdataset( dataset_name: str = "Skylion007/openwebtext", @@ -35,7 +34,7 @@ def benchmark_textdataset( max_batches: Maximum batches to process max_duration: Maximum time to run benchmark (seconds) """ - print(f"=== TextDataset Benchmark ===") + print("=== TextDataset Benchmark ===") print(f"Dataset: {dataset_name}") print(f"Model: {model_name}") print(f"Batch size: {batch_size}") @@ -175,9 +174,7 @@ def benchmark_textdataset( print(f"Dataset load: {dataset_load_time:.2f}s") print(f"TextDataset: {textdataset_time:.2f}s") print(f"DataLoader init: {dataloader_time:.2f}s") - print( - f"Total setup: {model_load_time + dataset_load_time + textdataset_time + dataloader_time:.2f}s" - ) + print(f"Total setup: {model_load_time + dataset_load_time + textdataset_time + dataloader_time:.2f}s") def compare_workers(): @@ -187,7 +184,7 @@ def compare_workers(): worker_configs = [0, 1, 2, 4] for num_workers in worker_configs: - print(f"\n{'='*50}") + print(f"\n{'=' * 50}") print(f"Testing with {num_workers} workers") print("=" * 50) @@ -201,25 +198,15 @@ def compare_workers(): if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser( - description="Benchmark TextDataset token throughput" - ) - parser.add_argument( - "--dataset", default="Skylion007/openwebtext", help="Dataset name" - ) + parser = argparse.ArgumentParser(description="Benchmark TextDataset token throughput") + parser.add_argument("--dataset", default="Skylion007/openwebtext", help="Dataset name") parser.add_argument("--model", default="openai-community/gpt2", help="Model name") parser.add_argument("--batch-size", type=int, default=32, help="Batch size") parser.add_argument("--seq-len", type=int, default=1024, help="Sequence length") parser.add_argument("--workers", type=int, default=0, help="Number of workers") - parser.add_argument( - "--max-batches", type=int, default=10, help="Max batches to process" - ) - parser.add_argument( - "--max-duration", type=float, default=60.0, help="Max duration (seconds)" - ) - parser.add_argument( - "--compare-workers", action="store_true", help="Compare different worker counts" - ) + parser.add_argument("--max-batches", type=int, default=10, help="Max batches to process") + parser.add_argument("--max-duration", type=float, default=60.0, help="Max duration (seconds)") + parser.add_argument("--compare-workers", action="store_true", help="Compare different worker counts") args = parser.parse_args() diff --git a/tests/test_dead_features.py b/tests/test_dead_features.py index 6afbf10..2d6cd7b 100644 --- a/tests/test_dead_features.py +++ b/tests/test_dead_features.py @@ -51,9 +51,9 @@ def test_initialization(self, metric_params): assert metric.n_features == n_features assert metric.n_layers == n_layers - assert metric.return_per_layer == False - assert metric.return_neuron_indices == False - assert metric.return_log_freqs == False + assert not metric.return_per_layer + assert not metric.return_neuron_indices + assert not metric.return_log_freqs assert metric.n_active.shape == (n_layers, n_features) assert torch.all(metric.n_active == 0) assert metric.n_total.shape == (1,) @@ -69,9 +69,9 @@ def test_initialization_with_flags(self, metric_params): return_neuron_indices=True, ) - assert metric.return_per_layer == True - assert metric.return_neuron_indices == True - assert metric.return_log_freqs == False + assert metric.return_per_layer + assert metric.return_neuron_indices + assert not metric.return_log_freqs class TestDeadFeaturesZeroInput: diff --git a/tests/test_deployment_policy.py b/tests/test_deployment_policy.py index cb55dae..60b952e 100644 --- a/tests/test_deployment_policy.py +++ b/tests/test_deployment_policy.py @@ -4,10 +4,9 @@ Tests that deployment policies correctly handle model instantiation and device management. """ -from unittest.mock import Mock, patch +from unittest.mock import Mock import pytest -import torch from crosslayer_transcoder.data import ActivationDataModule from crosslayer_transcoder.data.deployment_policy import ( @@ -67,13 +66,13 @@ def test_gpu_only_device_parsing(self): policy = create_deployment_policy(DeploymentPolicy.GPU_ONLY, device_map="cuda:3") devices, is_multi_gpu = policy._parse_device_map() assert devices == ["cuda:3"] - assert is_multi_gpu == False + assert not is_multi_gpu # Test multi-GPU policy = create_deployment_policy(DeploymentPolicy.GPU_ONLY, device_map="cuda:0,1,2,3") devices, is_multi_gpu = policy._parse_device_map() assert devices == ["cuda:0", "cuda:1", "cuda:2", "cuda:3"] - assert is_multi_gpu == True + assert is_multi_gpu def test_gpu_only_device_tracking_logic(self): """Test that GPU-only policy correctly calculates device tracking without loading models.""" diff --git a/tests/test_process_monitor.py b/tests/test_process_monitor.py index dd8f416..e0a32ae 100644 --- a/tests/test_process_monitor.py +++ b/tests/test_process_monitor.py @@ -3,8 +3,6 @@ Tests for process monitor functionality. """ -from unittest.mock import Mock, patch - import pytest from crosslayer_transcoder.data.process_monitor import ProcessMonitor, WandBProcessMonitor @@ -55,7 +53,7 @@ def test_device_code_mapping(self): def test_dashboard_device_string_generation(self): """Test that dashboard correctly displays device strings.""" - monitor = ProcessMonitor() + ProcessMonitor() # Test different device types for dashboard display test_cases = [ diff --git a/tests/test_standardization_folding.py b/tests/test_standardization_folding.py index 8136775..20b960c 100644 --- a/tests/test_standardization_folding.py +++ b/tests/test_standardization_folding.py @@ -8,7 +8,6 @@ DimensionwiseOutputStandardizer, ) - torch.manual_seed(42) DTYPE = torch.float64 BATCH_SIZE, D_ACTS, D_FEATS, N_LAYERS = 100, 768, 32, 12 @@ -29,9 +28,7 @@ def assert_allclose(lhs, rhs, rtol=1e-7, atol=1e-9): def test_math_sanity_check(): encoder = Encoder(d_acts=D_ACTS, d_features=D_FEATS, n_layers=N_LAYERS).to(DTYPE) - input_std = DimensionwiseInputStandardizer( - n_layers=N_LAYERS, activation_dim=D_ACTS - ).to(DTYPE) + input_std = DimensionwiseInputStandardizer(n_layers=N_LAYERS, activation_dim=D_ACTS).to(DTYPE) input_std.initialize_from_batch(batch=BATCH) resid = BATCH[:, 0] @@ -68,9 +65,7 @@ def test_math_sanity_check(): def test_encoder_standarization_folding(): encoder = Encoder(d_acts=D_ACTS, d_features=D_FEATS, n_layers=N_LAYERS).to(DTYPE) - input_std = DimensionwiseInputStandardizer( - n_layers=N_LAYERS, activation_dim=D_ACTS - ).to(DTYPE) + input_std = DimensionwiseInputStandardizer(n_layers=N_LAYERS, activation_dim=D_ACTS).to(DTYPE) input_std.initialize_from_batch(batch=BATCH) @@ -92,12 +87,8 @@ def test_encoder_standarization_folding(): def test_decoder_standarization_folding(): - decoder = CrosslayerDecoder( - d_acts=D_ACTS, d_features=D_FEATS, n_layers=N_LAYERS - ).to(DTYPE) - output_std = DimensionwiseOutputStandardizer( - n_layers=N_LAYERS, activation_dim=D_ACTS - ).to(DTYPE) + decoder = CrosslayerDecoder(d_acts=D_ACTS, d_features=D_FEATS, n_layers=N_LAYERS).to(DTYPE) + output_std = DimensionwiseOutputStandardizer(n_layers=N_LAYERS, activation_dim=D_ACTS).to(DTYPE) output_std.initialize_from_batch(batch=BATCH) @@ -119,8 +110,6 @@ def test_decoder_standarization_folding(): b_dec_folded = output_std.fold_in_decoder_bias(decoder.b) folded_params["b"] = torch.nn.Parameter(b_dec_folded.clone().detach()) - recons_folded = functional_call( - decoder, folded_params, test_features, {"layer": "all"} - ) + recons_folded = functional_call(decoder, folded_params, test_features, {"layer": "all"}) assert_allclose(recons, recons_folded) diff --git a/tests/test_text_dataset.py b/tests/test_text_dataset.py index 8ed8f74..e0d3b4c 100644 --- a/tests/test_text_dataset.py +++ b/tests/test_text_dataset.py @@ -10,7 +10,7 @@ from datasets import load_dataset from torch.utils.data import DataLoader -from crosslayer_transcoder.data.text_dataset import TextDataset, worker_init_fn +from crosslayer_transcoder.data.text_dataset import TextDataset class MockDataset: @@ -65,7 +65,7 @@ def test_init(self, text_dataset, dataset, model): assert text_dataset.to_tokens == model.tokenizer assert text_dataset.batch_size == 4 assert text_dataset.seq_len == 128 - assert text_dataset.drop_last_batch == True + assert text_dataset.drop_last_batch assert text_dataset.hf_text_accessor == "text" assert text_dataset.token_pointer == 0 assert text_dataset.batch_pointer == 0 @@ -186,7 +186,10 @@ def test_with_dataloader(self, text_dataset): """Test TextDataset with PyTorch DataLoader.""" # Test with single worker dataloader = DataLoader( - text_dataset, batch_size=None, shuffle=False, num_workers=0 # TextDataset handles batching + text_dataset, + batch_size=None, + shuffle=False, + num_workers=0, # TextDataset handles batching ) batch, mask = next(iter(dataloader)) @@ -350,9 +353,9 @@ def test_custom_english_sentences_roundtrip(self, model): # The tokens should match (at least the first part) min_len = min(len(manual_tokens), len(batch_tokens)) - assert torch.equal( - torch.tensor(manual_tokens[:min_len]), batch_tokens[:min_len] - ), f"Token mismatch for sentence {i}: expected {manual_tokens[:min_len]}, got {batch_tokens[:min_len].tolist()}" + assert torch.equal(torch.tensor(manual_tokens[:min_len]), batch_tokens[:min_len]), ( + f"Token mismatch for sentence {i}: expected {manual_tokens[:min_len]}, got {batch_tokens[:min_len].tolist()}" + ) def test_short_sequences_padding_and_mask(self, model): """Test that short sequences (< seq_len) are properly padded and masked.""" @@ -393,30 +396,30 @@ def test_short_sequences_padding_and_mask(self, model): # Check that the actual tokens match manual tokenization actual_tokens = batch[i][:expected_length] - assert torch.equal( - actual_tokens, torch.tensor(manual_tokens) - ), f"Token mismatch for sentence {i}: expected {manual_tokens}, got {actual_tokens.tolist()}" + assert torch.equal(actual_tokens, torch.tensor(manual_tokens)), ( + f"Token mismatch for sentence {i}: expected {manual_tokens}, got {actual_tokens.tolist()}" + ) # Check mask behavior sentence_mask = mask[i] # First `expected_length` positions should be True - assert torch.all( - sentence_mask[:expected_length] - ), f"Mask should be True for first {expected_length} positions in sentence {i}" + assert torch.all(sentence_mask[:expected_length]), ( + f"Mask should be True for first {expected_length} positions in sentence {i}" + ) # Remaining positions should be False (padding) if expected_length < 50: - assert torch.all( - ~sentence_mask[expected_length:] - ), f"Mask should be False for padding positions after {expected_length} in sentence {i}" + assert torch.all(~sentence_mask[expected_length:]), ( + f"Mask should be False for padding positions after {expected_length} in sentence {i}" + ) # Check that padded positions contain zeros if expected_length < 50: padding_tokens = batch[i][expected_length:] - assert torch.all( - padding_tokens == 0 - ), f"Padding positions should be zero for sentence {i}, got {padding_tokens[:5].tolist()}..." + assert torch.all(padding_tokens == 0), ( + f"Padding positions should be zero for sentence {i}, got {padding_tokens[:5].tolist()}..." + ) # Verify roundtrip with only valid tokens valid_tokens = batch[i][mask[i]] @@ -436,16 +439,16 @@ def test_short_sequences_padding_and_mask(self, model): assert len(decoded_text) > 0 # Check that no padding tokens were included in decoding - assert ( - len(valid_tokens) == expected_length - ), f"Valid tokens length {len(valid_tokens)} should match expected {expected_length}" + assert len(valid_tokens) == expected_length, ( + f"Valid tokens length {len(valid_tokens)} should match expected {expected_length}" + ) # Additional verification: ensure no errors are raised # and the function handles short sequences gracefully try: # This should work without any issues next(text_dataset) - assert False, "Should have raised StopIteration since we exhausted the dataset" + raise AssertionError("Should have raised StopIteration since we exhausted the dataset") except StopIteration: # This is expected behavior pass diff --git a/tests/test_topk.py b/tests/test_topk.py index 2ce6e10..d89c6e0 100644 --- a/tests/test_topk.py +++ b/tests/test_topk.py @@ -244,6 +244,6 @@ def test_nnsight_compatibility(nnsight_model, topk_fixture, request): with nnsight_model.trace("test"): actvs = nnsight_model.transformer.h[0].mlp.output topk.to(actvs.device) - topk_result = topk(actvs) + topk(actvs) except Exception as e: pytest.fail(f"nnsight compatibility test failed for {topk_fixture}: {e}") diff --git a/uv.lock b/uv.lock index 5cd028c..6514728 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" resolution-markers = [ "sys_platform == 'linux'", @@ -357,6 +357,7 @@ dev = [ { name = "ipykernel" }, { name = "jupyterlab" }, { name = "pytest" }, + { name = "ruff" }, ] [package.metadata] @@ -383,6 +384,7 @@ dev = [ { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyterlab", specifier = ">=4.4.2" }, { name = "pytest", specifier = ">=8.4.1" }, + { name = "ruff", specifier = ">=0.9.0" }, ] [[package]] @@ -1894,6 +1896,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/f2/c2d64f6564f32af913bf5f3f7ae41c7c263c5ae4c4e8f1a17af8af66cd46/rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728", size = 225399, upload-time = "2025-05-21T12:43:53.351Z" }, ] +[[package]] +name = "ruff" +version = "0.14.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/77/9a7fe084d268f8855d493e5031ea03fa0af8cc05887f638bf1c4e3363eb8/ruff-0.14.11.tar.gz", hash = "sha256:f6dc463bfa5c07a59b1ff2c3b9767373e541346ea105503b4c0369c520a66958", size = 5993417, upload-time = "2026-01-08T19:11:58.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/a6/a4c40a5aaa7e331f245d2dc1ac8ece306681f52b636b40ef87c88b9f7afd/ruff-0.14.11-py3-none-linux_armv6l.whl", hash = "sha256:f6ff2d95cbd335841a7217bdfd9c1d2e44eac2c584197ab1385579d55ff8830e", size = 12951208, upload-time = "2026-01-08T19:12:09.218Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5c/360a35cb7204b328b685d3129c08aca24765ff92b5a7efedbdd6c150d555/ruff-0.14.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f6eb5c1c8033680f4172ea9c8d3706c156223010b8b97b05e82c59bdc774ee6", size = 13330075, upload-time = "2026-01-08T19:12:02.549Z" }, + { url = "https://files.pythonhosted.org/packages/1b/9e/0cc2f1be7a7d33cae541824cf3f95b4ff40d03557b575912b5b70273c9ec/ruff-0.14.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f2fc34cc896f90080fca01259f96c566f74069a04b25b6205d55379d12a6855e", size = 12257809, upload-time = "2026-01-08T19:12:00.366Z" }, + { url = "https://files.pythonhosted.org/packages/a7/e5/5faab97c15bb75228d9f74637e775d26ac703cc2b4898564c01ab3637c02/ruff-0.14.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53386375001773ae812b43205d6064dae49ff0968774e6befe16a994fc233caa", size = 12678447, upload-time = "2026-01-08T19:12:13.899Z" }, + { url = "https://files.pythonhosted.org/packages/1b/33/e9767f60a2bef779fb5855cab0af76c488e0ce90f7bb7b8a45c8a2ba4178/ruff-0.14.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a697737dce1ca97a0a55b5ff0434ee7205943d4874d638fe3ae66166ff46edbe", size = 12758560, upload-time = "2026-01-08T19:11:42.55Z" }, + { url = "https://files.pythonhosted.org/packages/eb/84/4c6cf627a21462bb5102f7be2a320b084228ff26e105510cd2255ea868e5/ruff-0.14.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6845ca1da8ab81ab1dce755a32ad13f1db72e7fba27c486d5d90d65e04d17b8f", size = 13599296, upload-time = "2026-01-08T19:11:30.371Z" }, + { url = "https://files.pythonhosted.org/packages/88/e1/92b5ed7ea66d849f6157e695dc23d5d6d982bd6aa8d077895652c38a7cae/ruff-0.14.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e36ce2fd31b54065ec6f76cb08d60159e1b32bdf08507862e32f47e6dde8bcbf", size = 15048981, upload-time = "2026-01-08T19:12:04.742Z" }, + { url = "https://files.pythonhosted.org/packages/61/df/c1bd30992615ac17c2fb64b8a7376ca22c04a70555b5d05b8f717163cf9f/ruff-0.14.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590bcc0e2097ecf74e62a5c10a6b71f008ad82eb97b0a0079e85defe19fe74d9", size = 14633183, upload-time = "2026-01-08T19:11:40.069Z" }, + { url = "https://files.pythonhosted.org/packages/04/e9/fe552902f25013dd28a5428a42347d9ad20c4b534834a325a28305747d64/ruff-0.14.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:53fe71125fc158210d57fe4da26e622c9c294022988d08d9347ec1cf782adafe", size = 14050453, upload-time = "2026-01-08T19:11:37.555Z" }, + { url = "https://files.pythonhosted.org/packages/ae/93/f36d89fa021543187f98991609ce6e47e24f35f008dfe1af01379d248a41/ruff-0.14.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a35c9da08562f1598ded8470fcfef2afb5cf881996e6c0a502ceb61f4bc9c8a3", size = 13757889, upload-time = "2026-01-08T19:12:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/b7/9f/c7fb6ecf554f28709a6a1f2a7f74750d400979e8cd47ed29feeaa1bd4db8/ruff-0.14.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:0f3727189a52179393ecf92ec7057c2210203e6af2676f08d92140d3e1ee72c1", size = 13955832, upload-time = "2026-01-08T19:11:55.064Z" }, + { url = "https://files.pythonhosted.org/packages/db/a0/153315310f250f76900a98278cf878c64dfb6d044e184491dd3289796734/ruff-0.14.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:eb09f849bd37147a789b85995ff734a6c4a095bed5fd1608c4f56afc3634cde2", size = 12586522, upload-time = "2026-01-08T19:11:35.356Z" }, + { url = "https://files.pythonhosted.org/packages/2f/2b/a73a2b6e6d2df1d74bf2b78098be1572191e54bec0e59e29382d13c3adc5/ruff-0.14.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c61782543c1231bf71041461c1f28c64b961d457d0f238ac388e2ab173d7ecb7", size = 12724637, upload-time = "2026-01-08T19:11:47.796Z" }, + { url = "https://files.pythonhosted.org/packages/f0/41/09100590320394401cd3c48fc718a8ba71c7ddb1ffd07e0ad6576b3a3df2/ruff-0.14.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82ff352ea68fb6766140381748e1f67f83c39860b6446966cff48a315c3e2491", size = 13145837, upload-time = "2026-01-08T19:11:32.87Z" }, + { url = "https://files.pythonhosted.org/packages/3b/d8/e035db859d1d3edf909381eb8ff3e89a672d6572e9454093538fe6f164b0/ruff-0.14.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:728e56879df4ca5b62a9dde2dd0eb0edda2a55160c0ea28c4025f18c03f86984", size = 13850469, upload-time = "2026-01-08T19:12:11.694Z" }, + { url = "https://files.pythonhosted.org/packages/4e/02/bb3ff8b6e6d02ce9e3740f4c17dfbbfb55f34c789c139e9cd91985f356c7/ruff-0.14.11-py3-none-win32.whl", hash = "sha256:337c5dd11f16ee52ae217757d9b82a26400be7efac883e9e852646f1557ed841", size = 12851094, upload-time = "2026-01-08T19:11:45.163Z" }, + { url = "https://files.pythonhosted.org/packages/58/f1/90ddc533918d3a2ad628bc3044cdfc094949e6d4b929220c3f0eb8a1c998/ruff-0.14.11-py3-none-win_amd64.whl", hash = "sha256:f981cea63d08456b2c070e64b79cb62f951aa1305282974d4d5216e6e0178ae6", size = 14001379, upload-time = "2026-01-08T19:11:52.591Z" }, + { url = "https://files.pythonhosted.org/packages/c4/1c/1dbe51782c0e1e9cfce1d1004752672d2d4629ea46945d19d731ad772b3b/ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0", size = 12938644, upload-time = "2026-01-08T19:11:50.027Z" }, +] + [[package]] name = "safetensors" version = "0.5.3"