From 71260ccaf2d77daecf5be950dd61d507af09c12d Mon Sep 17 00:00:00 2001 From: haochengxia Date: Mon, 16 Feb 2026 17:37:41 -0600 Subject: [PATCH 1/4] Support cache size ratio with reader --- README.md | 6 +- libcachesim/__init__.pyi | 216 ++++++++++++++++++++++++++++---- libcachesim/cache.py | 158 ++++++++++++++--------- libcachesim/synthetic_reader.py | 8 +- scripts/install.sh | 24 +++- src/libCacheSim | 2 +- tests/test_cache.py | 12 ++ 7 files changed, 328 insertions(+), 98 deletions(-) diff --git a/README.md b/README.md index 8c88392..5214807 100644 --- a/README.md +++ b/README.md @@ -64,11 +64,12 @@ reader = lcs.TraceReader( # Step 2: Initialize cache cache = lcs.S3FIFO( - cache_size=1024*1024, + cache_size=0.1, # Cache specific parameters small_size_ratio=0.2, ghost_size_ratio=0.8, move_to_main_threshold=2, + reader=reader, ) # Step 3: Process entire trace efficiently (C++ backend) @@ -77,11 +78,12 @@ print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ra # Step 3.1: Process the first 1000 requests cache = lcs.S3FIFO( - cache_size=1024 * 1024, + cache_size=0.1, # Cache specific parameters small_size_ratio=0.2, ghost_size_ratio=0.8, move_to_main_threshold=2, + reader=reader, ) req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000) print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") diff --git a/libcachesim/__init__.pyi b/libcachesim/__init__.pyi index dc253ce..137aa82 100644 --- a/libcachesim/__init__.pyi +++ b/libcachesim/__init__.pyi @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import bool, int, str, tuple +from typing import bool, int, str, tuple, Optional, Callable from collections.abc import Iterator from .libcachesim_python import ReqOp, TraceType, SamplerType @@ -38,6 +38,85 @@ class CommonCacheParams: hashpower: int consider_obj_metadata: bool +class ReaderInitParam: + ignore_obj_size: bool + ignore_size_zero_req: bool + obj_id_is_num: bool + obj_id_is_num_set: bool + cap_at_n_req: int + time_field: int + obj_id_field: int + obj_size_field: int + op_field: int + ttl_field: int + cnt_field: int + tenant_field: int + next_access_vtime_field: int + n_feature_fields: int + block_size: int + has_header: bool + has_header_set: bool + delimiter: str + trace_start_offset: int + binary_fmt_str: str + def __init__( + self, + binary_fmt_str: str = "", + ignore_obj_size: bool = False, + ignore_size_zero_req: bool = True, + obj_id_is_num: bool = True, + obj_id_is_num_set: bool = False, + cap_at_n_req: int = -1, + block_size: int = -1, + has_header: bool = False, + has_header_set: bool = False, + delimiter: str = ",", + trace_start_offset: int = 0, + sampler = None, + ): ... + +class AnalysisParam: + access_pattern_sample_ratio_inv: int + track_n_popular: int + track_n_hit: int + time_window: int + warmup_time: int + def __init__( + self, + access_pattern_sample_ratio_inv: int = 10, + track_n_popular: int = 10, + track_n_hit: int = 5, + time_window: int = 60, + warmup_time: int = 0, + ): ... + +class AnalysisOption: + req_rate: bool + access_pattern: bool + size: bool + reuse: bool + popularity: bool + ttl: bool + popularity_decay: bool + lifetime: bool + create_future_reuse_ccdf: bool + prob_at_age: bool + size_change: bool + def __init__( + self, + req_rate: bool = True, + access_pattern: bool = True, + size: bool = True, + reuse: bool = True, + popularity: bool = True, + ttl: bool = False, + popularity_decay: bool = False, + lifetime: bool = False, + create_future_reuse_ccdf: bool = False, + prob_at_age: bool = False, + size_change: bool = False, + ): ... + class Cache: cache_size: int default_ttl: int @@ -80,113 +159,164 @@ class CacheBase: def cache_name(self) -> str: ... # Core cache algorithms +class LHD(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + class LRU(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class FIFO(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class LFU(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class ARC(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class Clock(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_freq: int = 0, n_bit_counter: int = 1, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class Random(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... # Advanced algorithms class S3FIFO(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, small_size_ratio: float = 0.1, ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class Sieve(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class LIRS(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class TwoQ(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, a_in_size_ratio: float = 0.25, a_out_size_ratio: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class SLRU(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class WTinyLFU(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, main_cache: str = "SLRU", window_size: float = 0.01, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class LeCaR(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, update_weight: bool = True, lru_weight: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class LFUDA(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class ClockPro(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_ref: int = 0, init_ratio_cold: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class Cacheus(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... # Optimal algorithms class Belady(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None ): ... class BeladySize(CacheBase): def __init__( - self, cache_size: int, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, n_samples: int = 128, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +# Probabilistic algorithms +class LRUProb(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, prob: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +class FlashProb(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, ram_size_ratio: float = 0.05, disk_admit_prob: float = 0.2, ram_cache: str = "LRU", disk_cache: str = "FIFO", admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +# Size-based algorithms +class Size(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +class GDSF(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +# Hyperbolic algorithms +class Hyperbolic(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +# Extra deps +class ThreeLCache(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +class GLCache(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, segment_size: int = 100, n_merge: int = 2, type: str = "learned", rank_intvl: float = 0.02, merge_consecutive_segs: bool = True, train_source_y: str = "online", retrain_intvl: int = 86400, admissioner = None, reader: Optional[ReaderProtocol] = None + ): ... + +class LRB(CacheBase): + def __init__( + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner = None, reader: Optional[ReaderProtocol] = None ): ... # Plugin cache class PluginCache(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, + cache_init_hook: Callable, + cache_hit_hook: Callable, + cache_miss_hook: Callable, + cache_eviction_hook: Callable, + cache_remove_hook: Callable, + cache_free_hook: Optional[Callable] = None, cache_name: str = "PythonHookCache", default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, - cache_init_hook=None, - cache_hit_hook=None, - cache_miss_hook=None, - cache_eviction_hook=None, - cache_remove_hook=None, - cache_free_hook=None, + admissioner = None, + reader: Optional[ReaderProtocol] = None, ): ... def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): ... @@ -246,3 +376,37 @@ class Util: def process_trace( cache: CacheBase, reader: ReaderProtocol, start_req: int = 0, max_req: int = -1 ) -> tuple[float, float]: ... + +# Admissioners +class AdmissionerBase: + def __init__(self, _admissioner): ... + def clone(self): ... + def update(self, req: Request, cache_size: int): ... + def admit(self, req: Request) -> bool: ... + def free(self): ... + +class BloomFilterAdmissioner(AdmissionerBase): + def __init__(self): ... + +class ProbAdmissioner(AdmissionerBase): + def __init__(self, prob: Optional[float] = None): ... + +class SizeAdmissioner(AdmissionerBase): + def __init__(self, size_threshold: Optional[int] = None): ... + +class SizeProbabilisticAdmissioner(AdmissionerBase): + def __init__(self, exponent: Optional[float] = None): ... + +class AdaptSizeAdmissioner(AdmissionerBase): + def __init__(self, max_iteration: Optional[int] = None, reconf_interval: Optional[int] = None): ... + +class PluginAdmissioner(AdmissionerBase): + def __init__( + self, + admissioner_name: str, + admissioner_init_hook: Callable, + admissioner_admit_hook: Callable, + admissioner_clone_hook: Callable, + admissioner_update_hook: Callable, + admissioner_free_hook: Callable, + ): ... diff --git a/libcachesim/cache.py b/libcachesim/cache.py index 506257e..42b38c2 100644 --- a/libcachesim/cache.py +++ b/libcachesim/cache.py @@ -1,4 +1,5 @@ from abc import ABC +import logging from typing import Callable, Optional from .libcachesim_python import ( CommonCacheParams, @@ -45,12 +46,15 @@ from .protocols import ReaderProtocol +logger = logging.getLogger(__name__) + + class CacheBase(ABC): """Base class for all cache implementations""" _cache: Cache # Internal C++ cache object - def __init__(self, _cache: Cache, admissioner: AdmissionerBase = None): + def __init__(self, _cache: Cache, admissioner: AdmissionerBase = None, reader: ReaderProtocol = None): if admissioner is not None: _cache.admissioner = admissioner._admissioner self._cache = _cache @@ -145,9 +149,19 @@ def cache_name(self) -> str: def _create_common_params( - cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, + reader: ReaderProtocol = None ) -> CommonCacheParams: """Helper to create common cache parameters""" + if isinstance(cache_size, float): + logger.debug(f"Cache size provided as float {cache_size}, interpreting as bytes") + if reader is None: + raise ValueError("Cache size provided as float but no trace provided to determine total size") + total_wss_bytes = reader.get_working_set_size()[1] + cache_size_bytes = int(cache_size * total_wss_bytes) + logger.debug(f"Interpreted cache size as {cache_size_bytes} bytes based on total working set size of {total_wss_bytes} bytes") + cache_size = cache_size_bytes + return CommonCacheParams( cache_size=cache_size, default_ttl=default_ttl, @@ -164,14 +178,15 @@ class LHD(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=LHD_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=LHD_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -181,14 +196,15 @@ class LRU(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=LRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -198,14 +214,15 @@ class FIFO(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -215,14 +232,15 @@ class LFU(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=LFU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -232,14 +250,15 @@ class ARC(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=ARC_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -254,18 +273,19 @@ class Clock(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, init_freq: int = 0, n_bit_counter: int = 1, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"init-freq={init_freq}, n-bit-counter={n_bit_counter}" super().__init__( _cache=Clock_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -277,14 +297,15 @@ class Random(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Random_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -301,7 +322,7 @@ class S3FIFO(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, @@ -309,10 +330,11 @@ def __init__( ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"small-size-ratio={small_size_ratio}, ghost-size-ratio={ghost_size_ratio}, move-to-main-threshold={move_to_main_threshold}" super().__init__( - _cache=S3FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params), + _cache=S3FIFO_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params), admissioner=admissioner ) @@ -322,14 +344,15 @@ class Sieve(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Sieve_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -339,14 +362,15 @@ class LIRS(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=LIRS_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -364,18 +388,19 @@ class TwoQ(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, a_in_size_ratio: float = 0.25, a_out_size_ratio: float = 0.5, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"Ain-size-ratio={a_in_size_ratio}, Aout-size-ratio={a_out_size_ratio}" super().__init__( _cache=TwoQ_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -387,14 +412,15 @@ class SLRU(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=SLRU_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -409,18 +435,19 @@ class WTinyLFU(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, main_cache: str = "SLRU", window_size: float = 0.01, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"main-cache={main_cache}, window-size={window_size}" super().__init__( _cache=WTinyLFU_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -437,18 +464,19 @@ class LeCaR(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, update_weight: bool = True, lru_weight: float = 0.5, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"update-weight={int(update_weight)}, lru-weight={lru_weight}" super().__init__( _cache=LeCaR_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -460,14 +488,15 @@ class LFUDA(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=LFUDA_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -482,18 +511,19 @@ class ClockPro(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, init_ref: int = 0, init_ratio_cold: float = 0.5, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"init-ref={init_ref}, init-ratio-cold={init_ratio_cold}" super().__init__( _cache=ClockPro_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -505,14 +535,15 @@ class Cacheus(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Cacheus_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -523,14 +554,15 @@ class Belady(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Belady_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -544,17 +576,18 @@ class BeladySize(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, n_samples: int = 128, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"n-samples={n_samples}" super().__init__( _cache=BeladySize_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -570,17 +603,18 @@ class LRUProb(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, prob: float = 0.5, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"prob={prob}" super().__init__( _cache=LRU_Prob_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -599,7 +633,7 @@ class FlashProb(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, @@ -608,11 +642,12 @@ def __init__( ram_cache: str = "LRU", disk_cache: str = "FIFO", admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): cache_specific_params = f"ram-size-ratio={ram_size_ratio}, disk-admit-prob={disk_admit_prob}, ram-cache={ram_cache}, disk-cache={disk_cache}" super().__init__( _cache=flashProb_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -624,14 +659,15 @@ class Size(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Size_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Size_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -641,14 +677,15 @@ class GDSF(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=GDSF_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=GDSF_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -658,14 +695,15 @@ class Hyperbolic(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): super().__init__( - _cache=Hyperbolic_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata)), + _cache=Hyperbolic_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader)), admissioner=admissioner ) @@ -680,12 +718,13 @@ class ThreeLCache(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): # Try to import ThreeLCache_init try: @@ -698,7 +737,7 @@ def __init__( cache_specific_params = f"objective={objective}" super().__init__( _cache=ThreeLCache_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -720,7 +759,7 @@ class GLCache(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, @@ -732,6 +771,7 @@ def __init__( train_source_y: str = "online", retrain_intvl: int = 86400, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): # Try to import GLCache_init try: @@ -744,7 +784,7 @@ def __init__( cache_specific_params = f"segment-size={segment_size}, n-merge={n_merge}, type={type}, rank-intvl={rank_intvl}, merge-consecutive-segs={merge_consecutive_segs}, train-source-y={train_source_y}, retrain-intvl={retrain_intvl}" super().__init__( _cache=GLCache_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -760,12 +800,13 @@ class LRB(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): # Try to import LRB_init try: @@ -778,7 +819,7 @@ def __init__( cache_specific_params = f"objective={objective}" super().__init__( _cache=LRB_init( - _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader), cache_specific_params, ), admissioner=admissioner @@ -791,7 +832,7 @@ class PluginCache(CacheBase): def __init__( self, - cache_size: int, + cache_size: int | float, cache_init_hook: Callable, cache_hit_hook: Callable, cache_miss_hook: Callable, @@ -803,8 +844,9 @@ def __init__( hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: AdmissionerBase = None, + reader: ReaderProtocol = None, ): - self.common_cache_params = _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata) + self.common_cache_params = _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata, reader) super().__init__( _cache=pypluginCache_init( diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py index e746dc0..8438da1 100644 --- a/libcachesim/synthetic_reader.py +++ b/libcachesim/synthetic_reader.py @@ -206,11 +206,9 @@ def get_read_pos(self) -> float: def get_working_set_size(self) -> tuple[int, int]: """Calculate working set size""" - wss_obj, wss_byte = 0, 0 - if self.current_pos > 0: - unique_ids = np.unique(self.obj_ids[: self.current_pos]) - wss_obj = len(unique_ids) - wss_byte = wss_obj * self.obj_size + unique_ids = np.unique(self.obj_ids) + wss_obj = len(unique_ids) + wss_byte = wss_obj * self.obj_size return wss_obj, wss_byte def __iter__(self) -> Iterator[Request]: diff --git a/scripts/install.sh b/scripts/install.sh index bc52786..7fc85ee 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -42,19 +42,31 @@ if [ $? -ne 0 ]; then exit 1 fi -python scripts/sync_version.py -CMAKE_ARGS=$CMAKE_ARGS python -m pip install -e . -vvv +# Detect Python command +PYTHON_CMD="" +if command -v python3 &> /dev/null; then + PYTHON_CMD="python3" +elif command -v python &> /dev/null; then + PYTHON_CMD="python" +else + echo "Error: python is not installed. Please install Python 3 and try again." + exit 1 +fi + +echo "Using Python command: $PYTHON_CMD" +$PYTHON_CMD scripts/sync_version.py +CMAKE_ARGS=$CMAKE_ARGS $PYTHON_CMD -m pip install -e . -vvv # Test that the import works echo "Testing import..." -python -c "import libcachesim" +$PYTHON_CMD -c "import libcachesim" # Run tests -python -m pip install pytest -python -m pytest tests +$PYTHON_CMD -m pip install pytest +$PYTHON_CMD -m pytest tests if [[ "$CMAKE_ARGS" == *"-DENABLE_LRB=ON"* && "$CMAKE_ARGS" == *"-DENABLE_GLCACHE=ON"* && "$CMAKE_ARGS" == *"-DENABLE_3L_CACHE=ON"* ]]; then echo "Running tests for optional eviction algos..." - python -m pytest tests -m "optional" + $PYTHON_CMD -m pytest tests -m "optional" fi # Build wheels if requested diff --git a/src/libCacheSim b/src/libCacheSim index 91f703a..dbf8423 160000 --- a/src/libCacheSim +++ b/src/libCacheSim @@ -1 +1 @@ -Subproject commit 91f703a0bb9bcb728cf48ae9f1df03a8d096db21 +Subproject commit dbf84235759f64f5e0353a77c49d187ed0dd3608 diff --git a/tests/test_cache.py b/tests/test_cache.py index 9eb2c86..61cfae3 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -540,3 +540,15 @@ def test_3lcache(self): cache = ThreeLCache(1024) assert cache is not None + + def test_fractional_cache_size(self): + """Test initializing cache with fractional size""" + # Create a trace, ignore obj size + trace = SyntheticReader(num_of_req=10000, obj_size=1, dist="uniform", num_objects=1000, seed=42) + cache = LRU(0.5, reader=trace) + assert cache.cache_size == 500 + + # with obj size + trace = SyntheticReader(num_of_req=10000, obj_size=100, dist="uniform", num_objects=1000, seed=42) + cache = LRU(0.5, reader=trace) + assert cache.cache_size == 500 * 100 \ No newline at end of file From 22ce668e25e0f6a3737026f3ff297bf7818b8299 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Mon, 16 Feb 2026 17:56:30 -0600 Subject: [PATCH 2/4] Apply suggestions --- README.md | 11 +++++--- libcachesim/__init__.pyi | 60 ++++++++++++++++++++-------------------- libcachesim/cache.py | 32 +++++++++++++++++---- scripts/install.sh | 16 +++++------ 4 files changed, 72 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 5214807..86e6ba6 100644 --- a/README.md +++ b/README.md @@ -63,13 +63,15 @@ reader = lcs.TraceReader( ) # Step 2: Initialize cache +# Note: cache_size as float (0.1) means 10% of the reader's total working set size in bytes. +# To specify an absolute size, pass an integer (e.g., 1024*1024 for 1MB). cache = lcs.S3FIFO( - cache_size=0.1, + cache_size=0.1, # 0.1 = 10% of trace's working set size (requires reader parameter) # Cache specific parameters small_size_ratio=0.2, ghost_size_ratio=0.8, move_to_main_threshold=2, - reader=reader, + reader=reader, # Required when cache_size is a float ratio ) # Step 3: Process entire trace efficiently (C++ backend) @@ -77,13 +79,14 @@ req_miss_ratio, byte_miss_ratio = cache.process_trace(reader) print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") # Step 3.1: Process the first 1000 requests +# Note: cache_size as float means a ratio of the working set size (requires reader parameter) cache = lcs.S3FIFO( - cache_size=0.1, + cache_size=0.1, # 10% of trace's working set size # Cache specific parameters small_size_ratio=0.2, ghost_size_ratio=0.8, move_to_main_threshold=2, - reader=reader, + reader=reader, # Required when cache_size is a float ratio ) req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000) print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") diff --git a/libcachesim/__init__.pyi b/libcachesim/__init__.pyi index 137aa82..508c2d3 100644 --- a/libcachesim/__init__.pyi +++ b/libcachesim/__init__.pyi @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import bool, int, str, tuple, Optional, Callable +from typing import Optional, Callable, Any from collections.abc import Iterator from .libcachesim_python import ReqOp, TraceType, SamplerType @@ -72,7 +72,7 @@ class ReaderInitParam: has_header_set: bool = False, delimiter: str = ",", trace_start_offset: int = 0, - sampler = None, + sampler: Optional[Any] = None, ): ... class AnalysisParam: @@ -161,143 +161,143 @@ class CacheBase: # Core cache algorithms class LHD(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LRU(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class FIFO(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LFU(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class ARC(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class Clock(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_freq: int = 0, n_bit_counter: int = 1, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_freq: int = 0, n_bit_counter: int = 1, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class Random(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Advanced algorithms class S3FIFO(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, small_size_ratio: float = 0.1, ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, small_size_ratio: float = 0.1, ghost_size_ratio: float = 0.9, move_to_main_threshold: int = 2, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class Sieve(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LIRS(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class TwoQ(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, a_in_size_ratio: float = 0.25, a_out_size_ratio: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, a_in_size_ratio: float = 0.25, a_out_size_ratio: float = 0.5, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class SLRU(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class WTinyLFU(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, main_cache: str = "SLRU", window_size: float = 0.01, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, main_cache: str = "SLRU", window_size: float = 0.01, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LeCaR(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, update_weight: bool = True, lru_weight: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, update_weight: bool = True, lru_weight: float = 0.5, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LFUDA(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class ClockPro(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_ref: int = 0, init_ratio_cold: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, init_ref: int = 0, init_ratio_cold: float = 0.5, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class Cacheus(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Optimal algorithms class Belady(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class BeladySize(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, n_samples: int = 128, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, n_samples: int = 128, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Probabilistic algorithms class LRUProb(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, prob: float = 0.5, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, prob: float = 0.5, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class FlashProb(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, ram_size_ratio: float = 0.05, disk_admit_prob: float = 0.2, ram_cache: str = "LRU", disk_cache: str = "FIFO", admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, ram_size_ratio: float = 0.05, disk_admit_prob: float = 0.2, ram_cache: str = "LRU", disk_cache: str = "FIFO", admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Size-based algorithms class Size(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class GDSF(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Hyperbolic algorithms class Hyperbolic(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Extra deps class ThreeLCache(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class GLCache(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, segment_size: int = 100, n_merge: int = 2, type: str = "learned", rank_intvl: float = 0.02, merge_consecutive_segs: bool = True, train_source_y: str = "online", retrain_intvl: int = 86400, admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, segment_size: int = 100, n_merge: int = 2, type: str = "learned", rank_intvl: float = 0.02, merge_consecutive_segs: bool = True, train_source_y: str = "online", retrain_intvl: int = 86400, admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... class LRB(CacheBase): def __init__( - self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner = None, reader: Optional[ReaderProtocol] = None + self, cache_size: int | float, default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, objective: str = "byte-miss-ratio", admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None ): ... # Plugin cache @@ -315,7 +315,7 @@ class PluginCache(CacheBase): default_ttl: int = 25920000, hashpower: int = 24, consider_obj_metadata: bool = False, - admissioner = None, + admissioner: Optional["AdmissionerBase"] = None, reader: Optional[ReaderProtocol] = None, ): ... def set_hooks(self, init_hook, hit_hook, miss_hook, eviction_hook, remove_hook, free_hook=None): ... diff --git a/libcachesim/cache.py b/libcachesim/cache.py index 42b38c2..b83a7b9 100644 --- a/libcachesim/cache.py +++ b/libcachesim/cache.py @@ -54,7 +54,7 @@ class CacheBase(ABC): _cache: Cache # Internal C++ cache object - def __init__(self, _cache: Cache, admissioner: AdmissionerBase = None, reader: ReaderProtocol = None): + def __init__(self, _cache: Cache, admissioner: AdmissionerBase = None): if admissioner is not None: _cache.admissioner = admissioner._admissioner self._cache = _cache @@ -152,14 +152,36 @@ def _create_common_params( cache_size: int | float, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, reader: ReaderProtocol = None ) -> CommonCacheParams: - """Helper to create common cache parameters""" + """Helper to create common cache parameters. + + If ``cache_size`` is provided as a float, it is interpreted as a ratio + (0 < cache_size <= 1) of the total working set size in bytes as + returned by ``reader.get_working_set_size()``. + """ + if cache_size <= 0: + raise ValueError(f"cache_size must be positive; got {cache_size!r}") + if isinstance(cache_size, float): - logger.debug(f"Cache size provided as float {cache_size}, interpreting as bytes") + if not (0 < cache_size <= 1): + raise ValueError( + f"When provided as a float, cache_size is interpreted as a ratio " + f"of total working set bytes and must satisfy 0 < cache_size <= 1; " + f"got {cache_size!r}" + ) + logger.debug( + f"Cache size provided as float {cache_size}, interpreting as ratio of total working set bytes" + ) if reader is None: - raise ValueError("Cache size provided as float but no trace provided to determine total size") + raise ValueError( + "Cache size provided as float ratio but no trace reader provided " + "to determine total working set size" + ) total_wss_bytes = reader.get_working_set_size()[1] cache_size_bytes = int(cache_size * total_wss_bytes) - logger.debug(f"Interpreted cache size as {cache_size_bytes} bytes based on total working set size of {total_wss_bytes} bytes") + logger.debug( + f"Interpreted cache size ratio {cache_size} as {cache_size_bytes} bytes " + f"based on total working set size of {total_wss_bytes} bytes" + ) cache_size = cache_size_bytes return CommonCacheParams( diff --git a/scripts/install.sh b/scripts/install.sh index 7fc85ee..8708076 100644 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -44,9 +44,9 @@ fi # Detect Python command PYTHON_CMD="" -if command -v python3 &> /dev/null; then +if command -v python3 >/dev/null 2>&1; then PYTHON_CMD="python3" -elif command -v python &> /dev/null; then +elif command -v python >/dev/null 2>&1; then PYTHON_CMD="python" else echo "Error: python is not installed. Please install Python 3 and try again." @@ -76,26 +76,26 @@ if [[ $BUILD_WHEELS -eq 1 ]]; then # --- Environment and dependency checks --- echo "Checking dependencies: python3, pip, docker, cibuildwheel..." - if ! command -v python3 &> /dev/null; then + if ! command -v python3 >/dev/null 2>&1; then echo "Error: python3 is not installed. Please install it and run this script again." exit 1 fi - if ! python3 -m pip --version &> /dev/null; then + if ! python3 -m pip --version >/dev/null 2>&1; then echo "Error: pip for python3 is not available. Please install it." exit 1 fi - if ! command -v docker &> /dev/null; then + if ! command -v docker >/dev/null 2>&1; then echo "Error: docker is not installed. Please install it and ensure the docker daemon is running." exit 1 fi # Check if user can run docker without sudo, otherwise use sudo SUDO_CMD="" - if ! docker ps &> /dev/null; then + if ! docker ps >/dev/null 2>&1; then echo "Warning: Current user cannot run docker. Trying with sudo." - if sudo docker ps &> /dev/null; then + if sudo docker ps >/dev/null 2>&1; then SUDO_CMD="sudo" else echo "Error: Failed to run docker, even with sudo. Please check your docker installation and permissions." @@ -103,7 +103,7 @@ if [[ $BUILD_WHEELS -eq 1 ]]; then fi fi - if ! python3 -m cibuildwheel --version &> /dev/null; then + if ! python3 -m cibuildwheel --version >/dev/null 2>&1; then echo "cibuildwheel not found, installing..." python3 -m pip install cibuildwheel fi From 158a83555ad4c18d0cbf7137edfd53de89556040 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Mon, 16 Feb 2026 18:07:11 -0600 Subject: [PATCH 3/4] Fix corner case --- libcachesim/cache.py | 4 ++-- tests/test_cache.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcachesim/cache.py b/libcachesim/cache.py index b83a7b9..95367ef 100644 --- a/libcachesim/cache.py +++ b/libcachesim/cache.py @@ -158,8 +158,8 @@ def _create_common_params( (0 < cache_size <= 1) of the total working set size in bytes as returned by ``reader.get_working_set_size()``. """ - if cache_size <= 0: - raise ValueError(f"cache_size must be positive; got {cache_size!r}") + if cache_size < 0: + raise ValueError(f"cache_size must be non-negative; got {cache_size!r}") if isinstance(cache_size, float): if not (0 < cache_size <= 1): diff --git a/tests/test_cache.py b/tests/test_cache.py index 61cfae3..9dbea40 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -546,9 +546,9 @@ def test_fractional_cache_size(self): # Create a trace, ignore obj size trace = SyntheticReader(num_of_req=10000, obj_size=1, dist="uniform", num_objects=1000, seed=42) cache = LRU(0.5, reader=trace) - assert cache.cache_size == 500 + assert cache.cache_size == 0.5 * trace.get_working_set_size()[1] # with obj size trace = SyntheticReader(num_of_req=10000, obj_size=100, dist="uniform", num_objects=1000, seed=42) cache = LRU(0.5, reader=trace) - assert cache.cache_size == 500 * 100 \ No newline at end of file + assert cache.cache_size == 0.5 * trace.get_working_set_size()[1] \ No newline at end of file From b77a4d4d758bf2e6815fb0f2ad957ad9a2de4ed4 Mon Sep 17 00:00:00 2001 From: haochengxia Date: Mon, 16 Feb 2026 18:25:31 -0600 Subject: [PATCH 4/4] Depracate 3.9 --- .github/workflows/build.yml | 2 +- pyproject.toml | 3 +-- tests/test_cache.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index af120ff..a1ce156 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,7 +21,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, macos-14] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] + python-version: ['3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index ae9f818..b42e048 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,12 +12,11 @@ name = "libcachesim" version = "0.3.3.post3" description="Python bindings for libCacheSim" readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" keywords = ["performance", "cache", "simulator"] classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", diff --git a/tests/test_cache.py b/tests/test_cache.py index 9dbea40..2a477d2 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -545,7 +545,7 @@ def test_fractional_cache_size(self): """Test initializing cache with fractional size""" # Create a trace, ignore obj size trace = SyntheticReader(num_of_req=10000, obj_size=1, dist="uniform", num_objects=1000, seed=42) - cache = LRU(0.5, reader=trace) + cache = LRU(0.5, reader=trace) assert cache.cache_size == 0.5 * trace.get_working_set_size()[1] # with obj size