diff --git a/.gitignore b/.gitignore
index 31faa4e5..93b602fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,9 +5,17 @@ dist/
 .env*
 .idea/
 workspace/
+.vscode/
+.cursor/
 __pycache__/
 
 # Dev toggle script artifacts
 .internal-configs/
 .dev-toggle-state
-.go.mod.appended
\ No newline at end of file
+.go.mod.appended
+
+
+flashring/performance_results.csv
+flashring/mem.prof
+flashring/flashring
+flashring/flashringtest
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..2decad3c
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,34 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        
+        {
+            "name": "Shard",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/ssd-cache/cmd/shardtest/main.go"
+        },
+        {
+            "name": "Cache",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/ssd-cache/cmd/cachetest/main.go"
+        },
+        {
+            "name": "Flashring",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "${workspaceFolder}/flashring/cmd/flashringtest",
+            "env": {
+                "PLAN": "readthrough-batched"
+            }
+        }
+
+    ]
+}
\ No newline at end of file
diff --git a/flashring/.vscode/launch.json b/flashring/.vscode/launch.json
new file mode 100644
index 00000000..6ae01079
--- /dev/null
+++ b/flashring/.vscode/launch.json
@@ -0,0 +1,19 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+
+        {
+            "name": "Flashring",
+            "type": "go",
+            "request": "launch",
+            "mode": "debug",
+            "program": "/home/a0d00kc/Desktop/BharatMLStack/flashring/cmd/flashringtest/main.go",
+            "env": {
+                "GODEBUG": "asyncpreemptoff=1"
+            }
+        },
+    ]
+}
\ No newline at end of file
diff --git a/flashring/README.md b/flashring/README.md
new file mode 100644
index 00000000..f006c3f6
--- /dev/null
+++ b/flashring/README.md
@@ -0,0 +1,461 @@
+# High-Performance Append-Only File Writing Benchmarks
+
+This package provides comprehensive benchmarks for append-only file writing in Go, focusing on maximum throughput and optimal page-aligned buffering strategies.
+
+## Features
+
+- **Page-Aligned Buffering**: Custom buffer implementation that flushes only when page boundaries are reached
+- **Multiple Buffer Sizes**: Tests with 4KB, 8KB, 16KB, and 64KB buffers aligned to system page sizes
+- **Memory-Mapped I/O**: Uses mmap for ultra-fast sequential writes
+- **Direct Write Comparison**: Benchmarks unbuffered writes for baseline comparison
+- **Concurrent Write Testing**: Thread-safe concurrent write benchmarks
+- **Multiple Record Sizes**: Tests with small (128B), medium (1KB), and large (8KB) records
+
+## Quick Start
+
+### Run Visual Benchmarks
+```bash
+go run main.go
+```
+
+This will run comprehensive benchmarks showing:
+- Throughput in MB/s
+- Records per second
+- Duration comparisons
+- Performance recommendations
+
+## Test Results & Analysis
+
+### Hardware Configuration
+- **CPU**: AMD Ryzen 7 9800X3D 8-Core Processor
+- **OS**: Linux (kernel 6.11.0-26-generic)
+- **Go Version**: 1.22.12
+- **Architecture**: amd64
+- **Storage**: SSD with ext4 filesystem
+
+### Visual Benchmark Results
+
+```
+=== Append-Only File Writing Benchmarks ===
+
+=== Small Records (128B x 100K) ===
+Method                        :   Duration |     MB/s |  Records/s | Total MB
+--------------------------------------------------------------------------------
+Direct Write                  :   50.8ms   |   240.07 |  1,966,655 |    12.21
+Buffered (4K)                 :    9.6ms   | 1,266.93 | 10,378,707 |    12.21
+Buffered (8K)                 :    9.1ms   | 1,337.27 | 10,954,887 |    12.21
+Buffered (16K)                :    9.2ms   | 1,327.55 | 10,875,326 |    12.21
+Buffered (64K)                :    8.6ms   | 1,415.92 | 11,599,245 |    12.21
+Page-Aligned (4K)             :   10.5ms   | 1,165.22 |  9,545,493 |    12.21
+Page-Aligned (8K)             :    9.8ms   | 1,244.86 | 10,197,862 |    12.21
+Page-Aligned (16K)            :   10.4ms   | 1,176.88 |  9,641,008 |    12.21
+Page-Aligned (64K)            :    9.5ms   | 1,281.76 | 10,500,163 |    12.21
+Memory Mapped                 :   10.4ms   | 1,168.32 |  9,570,867 |    12.21
+
+=== Medium Records (1KB x 50K) ===
+Method                        :   Duration |     MB/s |  Records/s | Total MB
+--------------------------------------------------------------------------------
+Direct Write                  :   43.1ms   | 1,134.06 |  1,161,276 |    48.83
+Buffered (4K)                 :   24.1ms   | 2,025.50 |  2,074,108 |    48.83
+Buffered (8K)                 :   21.1ms   | 2,308.94 |  2,364,359 |    48.83
+Buffered (16K)                :   19.8ms   | 2,464.45 |  2,523,597 |    48.83
+Buffered (64K)                :   19.9ms   | 2,458.15 |  2,517,143 |    48.83
+Page-Aligned (4K)             :   24.8ms   | 1,970.50 |  2,017,793 |    48.83
+Page-Aligned (8K)             :   21.6ms   | 2,262.77 |  2,317,076 |    48.83
+Page-Aligned (16K)            :   21.1ms   | 2,311.49 |  2,366,963 |    48.83
+Page-Aligned (64K)            :   19.5ms   | 2,499.25 |  2,559,228 |    48.83
+Memory Mapped                 :   23.8ms   | 2,054.37 |  2,103,677 |    48.83
+
+=== Large Records (8KB x 10K) ===
+Method                        :   Duration |     MB/s |  Records/s | Total MB
+--------------------------------------------------------------------------------
+Direct Write                  :   31.3ms   | 2,496.41 |    319,540 |    78.12
+Buffered (4K)                 :   31.9ms   | 2,450.08 |    313,610 |    78.12
+Buffered (8K)                 :   32.8ms   | 2,384.48 |    305,213 |    78.12
+Buffered (16K)                :   30.6ms   | 2,551.66 |    326,613 |    78.12
+Buffered (64K)                :   29.0ms   | 2,693.30 |    344,743 |    78.12
+Page-Aligned (4K)             :   31.6ms   | 2,473.40 |    316,595 |    78.12
+Page-Aligned (8K)             :   31.8ms   | 2,457.32 |    314,537 |    78.12
+Page-Aligned (16K)            :   30.3ms   | 2,576.79 |    329,829 |    78.12
+Page-Aligned (64K)            :   29.4ms   | 2,655.21 |    339,867 |    78.12
+Memory Mapped                 :   35.4ms   | 2,207.78 |    282,596 |    78.12
+```
+
+### Go Benchmark Results
+
+```
+goos: linux
+goarch: amd64
+pkg: github.com/Meesho/BharatMLStack/ssd-cache
+cpu: AMD Ryzen 7 9800X3D 8-Core Processor           
+
+BenchmarkDirectWrite-8           2359388     513.5 ns/op  1994.02 MB/s  0 B/op  0 allocs/op
+BenchmarkPageAligned4K-8         4910527     238.6 ns/op  4290.94 MB/s  0 B/op  0 allocs/op
+BenchmarkPageAligned16K-8        6308680     188.0 ns/op  5446.73 MB/s  0 B/op  0 allocs/op
+BenchmarkPageAligned64K-8        6850387     176.4 ns/op  5803.96 MB/s  0 B/op  0 allocs/op
+BenchmarkMemoryMapped-8          4761464     246.8 ns/op  4148.75 MB/s  0 B/op  0 allocs/op
+
+BenchmarkSmallRecords/DirectWrite-8          3071392     387.8 ns/op   330.08 MB/s  0 B/op  0 allocs/op
+BenchmarkSmallRecords/PageAligned16K-8      36121743      32.68 ns/op  3916.19 MB/s  0 B/op  0 allocs/op
+BenchmarkMediumRecords/DirectWrite-8         2346501     516.5 ns/op  1982.42 MB/s  0 B/op  0 allocs/op
+BenchmarkMediumRecords/PageAligned16K-8      6304753     188.8 ns/op  5422.59 MB/s  0 B/op  0 allocs/op
+BenchmarkLargeRecords/DirectWrite-8           710790      1514 ns/op   5409.65 MB/s  0 B/op  0 allocs/op
+BenchmarkLargeRecords/PageAligned16K-8        757474      1431 ns/op   5723.57 MB/s  0 B/op  0 allocs/op
+BenchmarkConcurrentWrites-8                  5787453     204.3 ns/op  5012.58 MB/s  0 B/op  0 allocs/op
+```
+
+### Performance Analysis
+
+#### Key Findings
+
+1. **Page-Aligned Buffers Dominate**: The page-aligned 64KB buffer achieved the highest throughput at **5,803.96 MB/s**
+2. **Buffer Size Sweet Spot**: 16KB-64KB buffers provide optimal performance across all record sizes
+3. **Zero Memory Allocations**: All implementations achieve zero heap allocations per operation
+4. **Consistent Performance**: Page-aligned buffers maintain high performance across different record sizes
+
+#### Record Size Impact
+
+| Record Size | Best Method | Peak Throughput | Performance Gain vs Direct |
+|-------------|-------------|-----------------|----------------------------|
+| Small (128B) | Buffered 64K | 1,415.92 MB/s | **5.9x faster** |
+| Medium (1KB) | Page-Aligned 64K | 2,499.25 MB/s | **2.2x faster** |
+| Large (8KB) | Buffered 64K | 2,693.30 MB/s | **1.08x faster** |
+
+#### Latency Analysis (from Go benchmarks)
+
+- **Direct Write**: 513.5 ns/op (baseline)
+- **Page-Aligned 16K**: 188.0 ns/op (**2.7x faster**)
+- **Page-Aligned 64K**: 176.4 ns/op (**2.9x faster**)
+- **Small Records**: 32.68 ns/op (**15.7x faster** with page alignment)
+
+#### Scalability Characteristics
+
+1. **Small Records**: Page-aligned buffers show dramatic improvement (5-15x)
+2. **Medium Records**: Consistent 2-3x improvement across all buffered methods
+3. **Large Records**: Diminishing returns as record size approaches buffer size
+4. **Concurrent Writes**: Thread-safe implementation maintains high throughput (5,012 MB/s)
+
+#### Technical Insights
+
+**Why Page-Aligned Buffers Win:**
+- **Reduced System Calls**: Buffer aggregation minimizes expensive kernel transitions
+- **Cache Line Efficiency**: Page-aligned memory access patterns optimize CPU cache usage  
+- **Filesystem Optimization**: Writes aligned to filesystem block boundaries reduce overhead
+- **Memory Management**: Eliminates heap allocations through pre-allocated buffers
+
+**Buffer Size Analysis:**
+- **4KB**: Matches most filesystem page sizes, good baseline performance
+- **16KB**: Sweet spot for balanced throughput and memory usage
+- **64KB**: Maximum throughput but higher memory consumption
+- **Beyond 64KB**: Diminishing returns due to cache pressure
+
+**Record Size Effects:**
+- **Small Records (128B)**: Massive gains from batching (up to 15x improvement)
+- **Medium Records (1KB)**: Strong benefits from reduced syscall overhead
+- **Large Records (8KB)**: Minimal gains as records approach buffer size
+
+#### Production Recommendations
+
+**For High-Throughput Applications:**
+```go
+// Optimal configuration for maximum throughput
+writer := NewPageAlignedBuffer("data.log", PageSize64K)
+defer writer.Close()
+
+// Batch small records for maximum efficiency
+batch := make([]byte, 0, 8192)
+for record := range records {
+    batch = append(batch, record...)
+    if len(batch) >= 8192 {
+        writer.Write(batch)
+        batch = batch[:0]
+    }
+}
+```
+
+**For Low-Latency Applications:**
+```go
+// Balance between throughput and latency
+writer := NewPageAlignedBuffer("events.log", PageSize16K)
+defer writer.Close()
+
+// Periodic flushes for guaranteed durability
+ticker := time.NewTicker(100 * time.Millisecond)
+go func() {
+    for range ticker.C {
+        writer.Sync()
+    }
+}()
+```
+
+**Memory vs Performance Trade-offs:**
+
+| Buffer Size | Memory Usage | Throughput | Best For |
+|-------------|--------------|------------|----------|
+| 4KB | 4KB per writer | Good | Memory-constrained |
+| 16KB | 16KB per writer | **Optimal** | **General purpose** |
+| 64KB | 64KB per writer | Maximum | Bulk ingestion |
+
+## FUSE Filesystem Analysis
+
+### Can FUSE Improve Performance?
+
+**Short Answer: Usually No** - FUSE typically **reduces** performance for append-only workloads due to context switching overhead.
+
+### FUSE Performance Impact
+
+| Aspect | Impact | Reason |
+|--------|--------|--------|
+| **Context Switches** | -50-200μs per operation | Kernel ↔ Userspace transitions |
+| **Data Copying** | -10-50μs per MB | Additional memory copies |
+| **System Call Overhead** | -1-5μs per call | Extra syscalls in pipeline |
+| **Overall Performance** | **3-5x slower** | Cumulative overhead |
+
+### When FUSE Might Help
+
+FUSE becomes beneficial when you need:
+
+1. **Custom Compression** (compression ratio > 3:1)
+```go
+// FUSE with transparent compression
+compressed := compress(data)  // Saves 3x storage I/O
+backingFile.Write(compressed) // Compensates for FUSE overhead
+```
+
+2. **Specialized Storage Formats**
+```go
+// Convert row-based to columnar storage
+columns := convertToColumns(records)
+writeColumnarData(columns)  // Optimized for analytics
+```
+
+3. **Network Storage Optimization**
+```go
+// Batch operations for network efficiency
+batch := accumulate(data)
+sendBatchAsync(compress(batch))  // Reduces network round-trips
+```
+
+4. **Multi-tier Storage Management**
+```go
+// Intelligent data placement
+if isHotData(data) {
+    writeSSD(data)
+} else {
+    writeToCloud(compress(data))
+}
+```
+
+### Performance Comparison
+
+Based on our benchmarks:
+
+| Method | Throughput | Best Use Case |
+|--------|------------|---------------|
+| **Direct Write** | 1,134 MB/s | Simple baseline |
+| **Page-Aligned 16K** | **2,311 MB/s** | **Recommended** |
+| **Memory Mapped** | 2,054 MB/s | Large sequential |
+| **FUSE Basic** | ~400 MB/s | ❌ Not recommended |
+| **FUSE + Compression** | ~800 MB/s | High compression ratios only |
+
+### Recommendation
+
+**For pure append-only performance**: Use **PageAlignedBuffer** - it's 2-3x faster than direct writes and 5-6x faster than FUSE.
+
+**Consider FUSE only when**:
+- You need data transformation (compression, encryption, format conversion)
+- Working with network storage where batching helps
+- Building storage abstraction layers
+
+See `FUSE_ANALYSIS.md` for detailed technical analysis.
+
+### Run Go Benchmarks
+```bash
+# Run all benchmarks
+go test -bench=.
+
+# Run specific benchmark
+go test -bench=BenchmarkPageAligned16K
+
+# Run with memory profiling
+go test -bench=. -memprofile=mem.prof
+
+# Run with CPU profiling
+go test -bench=. -cpuprofile=cpu.prof
+
+# Detailed benchmark with allocations
+go test -bench=. -benchmem
+```
+
+## Architecture Components
+
+### 1. PageAlignedBuffer
+Custom buffered writer that:
+- Maintains internal buffer aligned to page boundaries
+- Flushes only when buffer reaches capacity or explicitly requested
+- Thread-safe with mutex protection
+- Optimized for sequential append operations
+
+```go
+writer, err := NewPageAlignedBuffer("file.log", PageSize16K)
+defer writer.Close()
+
+// Writes are buffered until page boundary
+writer.Write(data)
+writer.Sync() // Flush and fsync to disk
+```
+
+### 2. Memory-Mapped Writer
+Uses `mmap()` system call for:
+- Zero-copy writes directly to memory
+- Kernel-managed page cache optimization
+- Efficient for large sequential writes
+
+```go
+writer, err := NewMemoryMappedWriter("file.log", totalSize)
+defer writer.Close()
+
+writer.Write(data) // Writes directly to mapped memory
+writer.Sync()      // Sync to disk with msync()
+```
+
+### 3. Direct Writer
+Baseline implementation for comparison:
+- No buffering - each write goes directly to kernel
+- Useful for understanding buffering benefits
+- Higher syscall overhead but guaranteed write ordering
+
+## Performance Optimization Strategies
+
+### Buffer Size Selection
+- **4KB-8KB**: Best for low-latency applications requiring frequent flushes
+- **16KB-32KB**: Optimal for most high-throughput workloads
+- **64KB+**: Best for bulk data ingestion with less frequent syncing
+
+### Write Pattern Optimization
+1. **Batch Small Writes**: Accumulate small records before writing
+2. **Align to Page Boundaries**: Use page-sized buffers (4KB multiples)
+3. **Minimize Sync Calls**: Only sync when durability is required
+4. **Pre-allocate Files**: Use `fallocate()` to pre-allocate disk space
+
+### System-Level Optimizations
+```bash
+# Disable file access time updates
+mount -o noatime,nodiratime /dev/sda1 /data
+
+# Increase write buffer sizes
+echo 'vm.dirty_ratio = 40' >> /etc/sysctl.conf
+echo 'vm.dirty_background_ratio = 10' >> /etc/sysctl.conf
+
+# Use deadline I/O scheduler for sequential writes
+echo deadline > /sys/block/sda/queue/scheduler
+```
+
+## Benchmark Results Analysis
+
+### Expected Performance Characteristics
+
+| Method | Throughput | Latency | CPU Usage | Use Case |
+|--------|------------|---------|-----------|----------|
+| Direct Write | Low | High | Low | Strict ordering |
+| Buffered 4K | Medium | Medium | Medium | Balanced |
+| Page-Aligned 16K | High | Low | Medium | High throughput |
+| Memory Mapped | Highest | Lowest | Highest | Bulk ingestion |
+
+### Platform-Specific Considerations
+
+**SSD Storage:**
+- Page-aligned buffers show 3-5x improvement over direct writes
+- Memory mapping excels for large sequential writes
+- 16KB-32KB buffers provide optimal throughput
+
+**HDD Storage:**
+- Larger buffers (64KB+) reduce seek overhead
+- Sequential write patterns are crucial
+- Pre-allocation reduces fragmentation
+
+**Network Storage (NFS/CIFS):**
+- Larger buffers reduce network round-trips
+- Memory mapping may not provide benefits
+- Consider async write modes
+
+## Advanced Usage
+
+### Custom Record Format
+```go
+type LogRecord struct {
+    Timestamp int64
+    Level     uint8
+    Message   []byte
+}
+
+func (r *LogRecord) Marshal() []byte {
+    // Custom serialization optimized for append-only writes
+}
+```
+
+### Batch Writing
+```go
+writer := NewPageAlignedBuffer("batch.log", PageSize16K)
+defer writer.Close()
+
+// Accumulate records until page boundary
+var batch []byte
+for record := range records {
+    batch = append(batch, record.Marshal()...)
+    if len(batch) >= PageSize4K {
+        writer.Write(batch)
+        batch = batch[:0] // Reset slice
+    }
+}
+```
+
+### Error Recovery
+```go
+if err := writer.Write(data); err != nil {
+    // Log error but continue - append-only design allows recovery
+    log.Printf("Write failed: %v", err)
+    
+    // Attempt to sync partial data
+    if syncErr := writer.Sync(); syncErr != nil {
+        log.Printf("Sync failed: %v", syncErr)
+    }
+}
+```
+
+## Monitoring and Metrics
+
+### Key Performance Indicators
+- **Write Throughput**: MB/s sustained write rate
+- **Write Latency**: p99 latency for individual writes
+- **Buffer Efficiency**: Ratio of buffered to direct writes
+- **Disk Utilization**: IOPs and queue depth
+- **Memory Usage**: Buffer memory and page cache
+
+### Profiling Integration
+```bash
+# CPU profiling
+go test -bench=BenchmarkPageAligned16K -cpuprofile=cpu.prof
+go tool pprof cpu.prof
+
+# Memory profiling  
+go test -bench=BenchmarkMemoryMapped -memprofile=mem.prof
+go tool pprof mem.prof
+
+# Trace analysis
+go test -bench=. -trace=trace.out
+go tool trace trace.out
+```
+
+## Contributing
+
+When adding new benchmarks:
+1. Follow the naming convention `Benchmark<Method><Parameters>`
+2. Use `b.SetBytes()` to report throughput
+3. Reset timers appropriately with `b.ResetTimer()`
+4. Clean up test files with `defer os.Remove()`
+5. Test on multiple platforms (Linux, macOS, Windows)
+
+## License
+
+This benchmark suite is part of the BharatMLStack project and follows the same licensing terms. 
\ No newline at end of file
diff --git a/flashring/cmd/flashringtest/__debug_bin2081587258 b/flashring/cmd/flashringtest/__debug_bin2081587258
new file mode 100755
index 00000000..c90caa97
Binary files /dev/null and b/flashring/cmd/flashringtest/__debug_bin2081587258 differ
diff --git a/flashring/cmd/flashringtest/main.go b/flashring/cmd/flashringtest/main.go
new file mode 100644
index 00000000..57051662
--- /dev/null
+++ b/flashring/cmd/flashringtest/main.go
@@ -0,0 +1,105 @@
+package main
+
+import (
+	"math/rand"
+	"os"
+
+	_ "net/http/pprof"
+)
+
+// normalDistInt returns an integer in [0, max) following a normal distribution
+// centered at max/2 with standard deviation = max/6 (so ~99.7% values are in range)
+func normalDistInt(max int) int {
+	if max <= 0 {
+		return 0
+	}
+
+	mean := float64(max) / 2.0
+	stdDev := float64(max) / 8.0
+
+	for {
+		val := rand.NormFloat64()*stdDev + mean
+
+		if val >= 0 && val < float64(max) {
+			return int(val)
+		}
+	}
+}
+
+// normalDistIntPartitioned returns an integer following a normal distribution
+// centered at the middle of the total key space, but constrained to a specific
+// worker's partition. Workers assigned to ranges near the center will naturally
+// get more load, while workers at the edges get less load.
+// workerID: the ID of the worker (0-indexed)
+// numWorkers: total number of workers
+// totalKeys: total number of keys across all partitions
+func normalDistIntPartitioned(workerID, numWorkers, totalKeys int) int {
+	if totalKeys <= 0 || numWorkers <= 0 {
+		return 0
+	}
+
+	// Calculate partition boundaries for this worker
+	partitionSize := totalKeys / numWorkers
+	partitionStart := workerID * partitionSize
+	partitionEnd := partitionStart + partitionSize
+
+	// Last worker takes any remaining keys
+	if workerID == numWorkers-1 {
+		partitionEnd = totalKeys
+	}
+
+	// All workers sample from the same distribution centered at the middle
+	mean := float64(totalKeys) / 2.0
+	stdDev := float64(totalKeys) / 8.0
+
+	// Keep sampling until we get a value in this worker's partition
+	for {
+		val := rand.NormFloat64()*stdDev + mean
+
+		if val >= float64(partitionStart) && val < float64(partitionEnd) {
+			return int(val)
+		}
+	}
+}
+
+func main() {
+	// Flags to parameterize load tests
+	//pick plan from the environment variable
+	plan := os.Getenv("PLAN")
+	if plan == "freecache" {
+		planFreecache()
+	} else if plan == "readthrough" {
+		planReadthroughGaussian()
+	} else if plan == "random" {
+		planRandomGaussian()
+	} else if plan == "readthrough-batched" {
+		planReadthroughGaussianBatched()
+	} else if plan == "lockless" {
+		planLockless()
+	} else if plan == "badger" {
+		planBadger()
+	} else {
+		panic("invalid plan")
+	}
+}
+
+// func BucketsByWidth(a float64, n int) []float64 {
+// 	if n <= 0 {
+// 		return []float64{0}
+// 	}
+// 	b := make([]float64, n+1)
+// 	b[0] = 0
+// 	if math.Abs(a) < 1e-12 {
+// 		// a ~ 0 => uniform
+// 		for i := 1; i <= n; i++ {
+// 			b[i] = float64(i) / float64(n)
+// 		}
+// 		return b
+// 	}
+// 	s := math.Expm1(a) / float64(n) // (e^a - 1)/n (stable)
+// 	ia := 1.0 / a
+// 	for i := 0; i <= n; i++ {
+// 		b[i] = ia * math.Log1p(s*float64(i)) // ln(1 + s*i)
+// 	}
+// 	return b
+// }
diff --git a/flashring/cmd/flashringtest/mem.prof b/flashring/cmd/flashringtest/mem.prof
new file mode 100644
index 00000000..f11189a6
Binary files /dev/null and b/flashring/cmd/flashringtest/mem.prof differ
diff --git a/flashring/cmd/flashringtest/plan_badger.go b/flashring/cmd/flashringtest/plan_badger.go
new file mode 100644
index 00000000..4ba266d4
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_badger.go
@@ -0,0 +1,169 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"os"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"sync"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planBadger() {
+
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/badger", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 1, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 20_000_000, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 1, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 4, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 4, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+
+	cfg := cachepkg.WrapCacheConfig{
+		MountPoint: mountPoint,
+	}
+
+	cache, err := cachepkg.NewBadger(cfg, logStats)
+	if err != nil {
+		panic(err)
+	}
+
+	MULTIPLIER := 300
+
+	missedKeyChanList := make([]chan int, writeWorkers)
+	for i := 0; i < writeWorkers; i++ {
+		missedKeyChanList[i] = make(chan int)
+	}
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+	var writeWg sync.WaitGroup
+
+	//prepopulate 70% keys
+	fmt.Printf("----------------------------------------------prepopulating keys\n")
+	for k := 0; k < int(totalKeys); k++ {
+
+		if rand.Intn(100) < 30 {
+			continue
+		}
+
+		key := fmt.Sprintf("key%d", k)
+		val := []byte(fmt.Sprintf(str1kb, k))
+		if err := cache.Put(key, val, 60*60); err != nil {
+			panic(err)
+		}
+		if k%5000000 == 0 {
+			fmt.Printf("----------------------------------------------prepopulated %d keys\n", k)
+		}
+	}
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------starting write workers\n")
+		writeWg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer writeWg.Done()
+
+				for mk := range missedKeyChanList[workerID] {
+					key := fmt.Sprintf("key%d", mk)
+					val := []byte(fmt.Sprintf(str1kb, mk))
+					if err := cache.Put(key, val, 60*60); err != nil {
+						panic(err)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistInt(totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					_, found, expired := cache.Get(key)
+
+					if !found {
+						writeWorkerid := randomval % writeWorkers
+						missedKeyChanList[writeWorkerid] <- randomval
+					}
+
+					if expired {
+						panic("key expired")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/plan_freecache.go b/flashring/cmd/flashringtest/plan_freecache.go
new file mode 100644
index 00000000..0fe6a297
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_freecache.go
@@ -0,0 +1,172 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"os"
+	"runtime"
+	"runtime/debug"
+	"runtime/pprof"
+	"strings"
+	"sync"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planFreecache() {
+
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 1, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 20_000_000, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 1, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 4, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 4, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+
+	cfg := cachepkg.WrapCacheConfig{
+		KeysPerShard: keysPerShard,
+		FileSize:     4 * 1024 * 1024 * 1024,
+	}
+
+	cache, err := cachepkg.NewFreecache(cfg, logStats)
+	if err != nil {
+		panic(err)
+	}
+	debug.SetGCPercent(20)
+
+	MULTIPLIER := 300
+
+	missedKeyChanList := make([]chan int, writeWorkers)
+	for i := 0; i < writeWorkers; i++ {
+		missedKeyChanList[i] = make(chan int)
+	}
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+	var writeWg sync.WaitGroup
+
+	//prepopulate 70% keys
+	fmt.Printf("----------------------------------------------prepopulating keys\n")
+	for k := 0; k < int(totalKeys); k++ {
+
+		if rand.Intn(100) < 30 {
+			continue
+		}
+
+		key := fmt.Sprintf("key%d", k)
+		val := []byte(fmt.Sprintf(str1kb, k))
+		if err := cache.Put(key, val, 60*60); err != nil {
+			panic(err)
+		}
+		if k%5000000 == 0 {
+			fmt.Printf("----------------------------------------------prepopulated %d keys\n", k)
+		}
+	}
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------starting write workers\n")
+		writeWg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer writeWg.Done()
+
+				for mk := range missedKeyChanList[workerID] {
+					key := fmt.Sprintf("key%d", mk)
+					val := []byte(fmt.Sprintf(str1kb, mk))
+					if err := cache.Put(key, val, 60*60); err != nil {
+						panic(err)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistInt(totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					_, found, expired := cache.Get(key)
+
+					if !found {
+						writeWorkerid := randomval % writeWorkers
+						missedKeyChanList[writeWorkerid] <- randomval
+					}
+
+					if expired {
+						panic("key expired")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go
new file mode 100644
index 00000000..e946c9af
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_lockless.go
@@ -0,0 +1,228 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"sync"
+	"time"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planLockless() {
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 500, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 10_00_00, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 8, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 8, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	go func() {
+		log.Info().Msg("Starting pprof server on :8080")
+		log.Info().Msg("Access profiles at: http://localhost:8080/debug/pprof/")
+		log.Info().Msg("Memory profile: http://localhost:8080/debug/pprof/heap")
+		log.Info().Msg("Goroutine profile: http://localhost:8080/debug/pprof/goroutine")
+		if err := http.ListenAndServe(":8080", nil); err != nil {
+			log.Error().Err(err).Msg("pprof server failed")
+		}
+	}()
+
+	// CPU profiling
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create CPU profile")
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not start CPU profile")
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	//remove all files inside the mount point
+	files, err := os.ReadDir(mountPoint)
+	if err != nil {
+		panic(err)
+	}
+	for _, file := range files {
+		os.Remove(filepath.Join(mountPoint, file.Name()))
+	}
+
+	memtableSizeInBytes := int32(memtableMB) * 1024 * 1024
+	fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes)
+
+	cfg := cachepkg.WrapCacheConfig{
+		NumShards:             numShards,
+		KeysPerShard:          keysPerShard,
+		FileSize:              fileSizeInBytes,
+		MemtableSize:          memtableSizeInBytes,
+		ReWriteScoreThreshold: 0.8,
+		GridSearchEpsilon:     0.0001,
+		SampleDuration:        time.Duration(sampleSecs) * time.Second,
+
+		// Pass the metrics collector to record cache metrics
+		MetricsRecorder: InitMetricsCollector(),
+	}
+
+	// Set additional input parameters that the cache doesn't know about
+	metricsCollector.SetShards(numShards)
+	metricsCollector.SetKeysPerShard(keysPerShard)
+	metricsCollector.SetReadWorkers(readWorkers)
+	metricsCollector.SetWriteWorkers(writeWorkers)
+	metricsCollector.SetPlan("lockless")
+
+	// Start background goroutine to wait for shutdown signal and export CSV
+	go RunmetricsWaitForShutdown()
+
+	pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats)
+	if err != nil {
+		panic(err)
+	}
+
+	MULTIPLIER := 300
+
+	missedKeyChanList := make([]chan int, writeWorkers)
+	for i := 0; i < writeWorkers; i++ {
+		missedKeyChanList[i] = make(chan int)
+	}
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+	var writeWg sync.WaitGroup
+
+	//prepopulate 70% keys
+	fmt.Printf("----------------------------------------------prepopulating keys\n")
+	for k := 0; k < int(totalKeys); k++ {
+
+		if rand.Intn(100) < 30 {
+			continue
+		}
+
+		key := fmt.Sprintf("key%d", k)
+		val := []byte(fmt.Sprintf(str1kb, k))
+		if err := pc.PutLL(key, val, 60); err != nil {
+			panic(err)
+		}
+		if k%5000000 == 0 {
+			fmt.Printf("----------------------------------------------prepopulated %d keys\n", k)
+		}
+	}
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------starting write workers\n")
+		writeWg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer writeWg.Done()
+
+				for mk := range missedKeyChanList[workerID] {
+					key := fmt.Sprintf("key%d", mk)
+					val := []byte(fmt.Sprintf(str1kb, mk))
+					if err := pc.PutLL(key, val, 60); err != nil {
+						panic(err)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistIntPartitioned(workerID, readWorkers, totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					val, found, expired := pc.GetLL(key)
+
+					if !found {
+						writeWorkerid := randomval % writeWorkers
+						missedKeyChanList[writeWorkerid] <- randomval
+					}
+
+					if expired {
+						panic("key expired")
+
+					}
+					if found && string(val) != fmt.Sprintf(str1kb, randomval) {
+						panic("value mismatch")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go
new file mode 100644
index 00000000..3fbaf849
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_random_gausian.go
@@ -0,0 +1,189 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"sync"
+	"time"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planRandomGaussian() {
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 1, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 20_000_000, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 40, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 1, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 1, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	go func() {
+		log.Info().Msg("Starting pprof server on :8080")
+		log.Info().Msg("Access profiles at: http://localhost:8080/debug/pprof/")
+		log.Info().Msg("Memory profile: http://localhost:8080/debug/pprof/heap")
+		log.Info().Msg("Goroutine profile: http://localhost:8080/debug/pprof/goroutine")
+		if err := http.ListenAndServe(":8080", nil); err != nil {
+			log.Error().Err(err).Msg("pprof server failed")
+		}
+	}()
+
+	// CPU profiling
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create CPU profile")
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not start CPU profile")
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	//remove all files inside the mount point
+	files, err := os.ReadDir(mountPoint)
+	if err != nil {
+		panic(err)
+	}
+	for _, file := range files {
+		os.Remove(filepath.Join(mountPoint, file.Name()))
+	}
+
+	memtableSizeInBytes := int32(memtableMB) * 1024 * 1024
+	fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes)
+
+	cfg := cachepkg.WrapCacheConfig{
+		NumShards:             numShards,
+		KeysPerShard:          keysPerShard,
+		FileSize:              fileSizeInBytes,
+		MemtableSize:          memtableSizeInBytes,
+		ReWriteScoreThreshold: 0.8,
+		GridSearchEpsilon:     0.0001,
+		SampleDuration:        time.Duration(sampleSecs) * time.Second,
+	}
+
+	pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats)
+	if err != nil {
+		panic(err)
+	}
+
+	MULTIPLIER := 300
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------writing keys\n")
+		wg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistInt(totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+
+					val := []byte(fmt.Sprintf(str1kb, randomval))
+					if err := pc.Put(key, val, 60); err != nil {
+						panic(err)
+					}
+
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------wrote %d keys %d writerid\n", k, workerID)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistInt(totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					val, found, expired := pc.Get(key)
+
+					if expired {
+						panic("key expired")
+					}
+					if found && string(val) != fmt.Sprintf(str1kb, randomval) {
+						panic("value mismatch")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go
new file mode 100644
index 00000000..56c6da3d
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go
@@ -0,0 +1,228 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"sync"
+	"time"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planReadthroughGaussian() {
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 500, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 4_00_00, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 8, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 8, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	go func() {
+		log.Info().Msg("Starting pprof server on :8080")
+		log.Info().Msg("Access profiles at: http://localhost:8080/debug/pprof/")
+		log.Info().Msg("Memory profile: http://localhost:8080/debug/pprof/heap")
+		log.Info().Msg("Goroutine profile: http://localhost:8080/debug/pprof/goroutine")
+		if err := http.ListenAndServe(":8080", nil); err != nil {
+			log.Error().Err(err).Msg("pprof server failed")
+		}
+	}()
+
+	// CPU profiling
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create CPU profile")
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not start CPU profile")
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	//remove all files inside the mount point
+	files, err := os.ReadDir(mountPoint)
+	if err != nil {
+		panic(err)
+	}
+	for _, file := range files {
+		os.Remove(filepath.Join(mountPoint, file.Name()))
+	}
+
+	memtableSizeInBytes := int32(memtableMB) * 1024 * 1024
+	fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes)
+
+	cfg := cachepkg.WrapCacheConfig{
+		NumShards:             numShards,
+		KeysPerShard:          keysPerShard,
+		FileSize:              fileSizeInBytes,
+		MemtableSize:          memtableSizeInBytes,
+		ReWriteScoreThreshold: 0.8,
+		GridSearchEpsilon:     0.0001,
+		SampleDuration:        time.Duration(sampleSecs) * time.Second,
+
+		// Pass the metrics collector to record cache metrics
+		MetricsRecorder: InitMetricsCollector(),
+	}
+
+	// Set additional input parameters that the cache doesn't know about
+	metricsCollector.SetShards(numShards)
+	metricsCollector.SetKeysPerShard(keysPerShard)
+	metricsCollector.SetReadWorkers(readWorkers)
+	metricsCollector.SetWriteWorkers(writeWorkers)
+	metricsCollector.SetPlan("readthrough")
+
+	// Start background goroutine to wait for shutdown signal and export CSV
+	go RunmetricsWaitForShutdown()
+
+	pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats)
+	if err != nil {
+		panic(err)
+	}
+
+	MULTIPLIER := 300
+
+	missedKeyChanList := make([]chan int, writeWorkers)
+	for i := 0; i < writeWorkers; i++ {
+		missedKeyChanList[i] = make(chan int)
+	}
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+	var writeWg sync.WaitGroup
+
+	//prepopulate 70% keys
+	fmt.Printf("----------------------------------------------prepopulating keys\n")
+	for k := 0; k < int(totalKeys); k++ {
+
+		if rand.Intn(100) < 30 {
+			continue
+		}
+
+		key := fmt.Sprintf("key%d", k)
+		val := []byte(fmt.Sprintf(str1kb, k))
+		if err := pc.Put(key, val, 60); err != nil {
+			panic(err)
+		}
+		if k%5000000 == 0 {
+			fmt.Printf("----------------------------------------------prepopulated %d keys\n", k)
+		}
+	}
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------starting write workers\n")
+		writeWg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer writeWg.Done()
+
+				for mk := range missedKeyChanList[workerID] {
+					key := fmt.Sprintf("key%d", mk)
+					val := []byte(fmt.Sprintf(str1kb, mk))
+					if err := pc.Put(key, val, 60); err != nil {
+						panic(err)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					randomval := normalDistIntPartitioned(workerID, readWorkers, totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					val, found, expired := pc.Get(key)
+
+					if !found {
+						writeWorkerid := randomval % writeWorkers
+						missedKeyChanList[writeWorkerid] <- randomval
+					}
+
+					if expired {
+						panic("key expired")
+
+					}
+					if found && string(val) != fmt.Sprintf(str1kb, randomval) {
+						panic("value mismatch")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go
new file mode 100644
index 00000000..fd33e06a
--- /dev/null
+++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go
@@ -0,0 +1,243 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"math/rand"
+	"net/http"
+	"os"
+	"path/filepath"
+	"runtime"
+	"runtime/pprof"
+	"strings"
+	"sync"
+	"time"
+
+	cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+func planReadthroughGaussianBatched() {
+	var (
+		mountPoint         string
+		numShards          int
+		keysPerShard       int
+		memtableMB         int
+		fileSizeMultiplier int
+		readWorkers        int
+		writeWorkers       int
+		sampleSecs         int
+		iterations         int64
+		aVal               float64
+		logStats           bool
+		memProfile         string
+		cpuProfile         string
+
+		//batching reads
+		enableBatching    bool
+		batchWindowMicros int // in microseconds
+		maxBatchSize      int
+	)
+
+	flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files")
+	flag.IntVar(&numShards, "shards", 200, "number of shards")
+	flag.IntVar(&keysPerShard, "keys-per-shard", 10_00_00, "keys per shard")
+	flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB")
+	flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 10, "file size in GiB per shard")
+	flag.IntVar(&readWorkers, "readers", 8, "number of read workers")
+	flag.IntVar(&writeWorkers, "writers", 8, "number of write workers")
+	flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds")
+	flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations")
+	flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor")
+	flag.BoolVar(&logStats, "log-stats", true, "periodically log cache stats")
+	flag.StringVar(&memProfile, "memprofile", "mem.prof", "write memory profile to this file")
+	flag.StringVar(&cpuProfile, "cpuprofile", "", "write cpu profile to this file")
+
+	flag.BoolVar(&enableBatching, "enable-batching", true, "enable read batching")
+	flag.IntVar(&batchWindowMicros, "batch-window-us", 1, "batch window in microseconds")
+	flag.IntVar(&maxBatchSize, "max-batch", 200, "max batch size")
+	flag.Parse()
+
+	zerolog.SetGlobalLevel(zerolog.InfoLevel)
+	go func() {
+		log.Info().Msg("Starting pprof server on :8080")
+		log.Info().Msg("Access profiles at: http://localhost:8080/debug/pprof/")
+		log.Info().Msg("Memory profile: http://localhost:8080/debug/pprof/heap")
+		log.Info().Msg("Goroutine profile: http://localhost:8080/debug/pprof/goroutine")
+		if err := http.ListenAndServe(":8080", nil); err != nil {
+			log.Error().Err(err).Msg("pprof server failed")
+		}
+	}()
+
+	// CPU profiling
+	if cpuProfile != "" {
+		f, err := os.Create(cpuProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create CPU profile")
+		}
+		defer f.Close()
+		if err := pprof.StartCPUProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not start CPU profile")
+		}
+		defer pprof.StopCPUProfile()
+	}
+
+	//remove all files inside the mount point
+	files, err := os.ReadDir(mountPoint)
+	if err != nil {
+		panic(err)
+	}
+	for _, file := range files {
+		os.Remove(filepath.Join(mountPoint, file.Name()))
+	}
+
+	memtableSizeInBytes := int32(memtableMB) * 1024 * 1024
+	fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes)
+
+	cfg := cachepkg.WrapCacheConfig{
+		NumShards:             numShards,
+		KeysPerShard:          keysPerShard,
+		FileSize:              fileSizeInBytes,
+		MemtableSize:          memtableSizeInBytes,
+		ReWriteScoreThreshold: 0.8,
+		GridSearchEpsilon:     0.0001,
+		SampleDuration:        time.Duration(sampleSecs) * time.Second,
+
+		//batching reads
+		EnableBatching:    enableBatching,
+		BatchWindowMicros: batchWindowMicros,
+		MaxBatchSize:      maxBatchSize,
+
+		// Pass the metrics collector to record cache metrics
+		MetricsRecorder: InitMetricsCollector(),
+	}
+
+	// Set additional input parameters that the cache doesn't know about
+	metricsCollector.SetShards(numShards)
+	metricsCollector.SetKeysPerShard(keysPerShard)
+	metricsCollector.SetReadWorkers(readWorkers)
+	metricsCollector.SetWriteWorkers(writeWorkers)
+	metricsCollector.SetPlan("readthrough-batched")
+
+	// Start background goroutine to wait for shutdown signal and export CSV
+	go RunmetricsWaitForShutdown()
+
+	pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats)
+	if err != nil {
+		panic(err)
+	}
+
+	MULTIPLIER := 300
+
+	missedKeyChanList := make([]chan int, writeWorkers)
+	for i := 0; i < writeWorkers; i++ {
+		missedKeyChanList[i] = make(chan int)
+	}
+
+	totalKeys := keysPerShard * numShards
+	str1kb := strings.Repeat("a", 1024)
+	str1kb = "%d" + str1kb
+
+	var wg sync.WaitGroup
+	var writeWg sync.WaitGroup
+
+	//prepopulate 70% keys
+	fmt.Printf("----------------------------------------------prepopulating keys\n")
+	for k := 0; k < int(totalKeys); k++ {
+
+		if rand.Intn(100) < 30 {
+			continue
+		}
+
+		key := fmt.Sprintf("key%d", k)
+		val := []byte(fmt.Sprintf(str1kb, k))
+		if err := pc.Put(key, val, 60); err != nil {
+			panic(err)
+		}
+		if k%5000000 == 0 {
+			fmt.Printf("----------------------------------------------prepopulated %d keys\n", k)
+		}
+	}
+
+	if writeWorkers > 0 {
+		fmt.Printf("----------------------------------------------starting write workers\n")
+		writeWg.Add(writeWorkers)
+
+		for w := 0; w < writeWorkers; w++ {
+			go func(workerID int) {
+				defer writeWg.Done()
+
+				for mk := range missedKeyChanList[workerID] {
+					key := fmt.Sprintf("key%d", mk)
+					val := []byte(fmt.Sprintf(str1kb, mk))
+					if err := pc.Put(key, val, 60); err != nil {
+						panic(err)
+					}
+				}
+			}(w)
+		}
+	}
+
+	if readWorkers > 0 {
+		fmt.Printf("----------------------------------------------reading keys\n")
+		wg.Add(readWorkers)
+
+		for r := 0; r < readWorkers; r++ {
+			go func(workerID int) {
+				defer wg.Done()
+				for k := 0; k < totalKeys*MULTIPLIER; k += 1 {
+					// Each worker samples from its own partition of the key space
+					randomval := normalDistIntPartitioned(workerID, readWorkers, totalKeys)
+					key := fmt.Sprintf("key%d", randomval)
+					val, found, expired := pc.Get(key)
+
+					if !found {
+						writeWorkerid := randomval % writeWorkers
+						missedKeyChanList[writeWorkerid] <- randomval
+					}
+
+					if expired {
+						panic("key expired")
+
+					}
+					if found && string(val) != fmt.Sprintf(str1kb, randomval) {
+						panic("value mismatch")
+					}
+					if k%5000000 == 0 {
+						fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID)
+					}
+				}
+			}(r)
+		}
+	}
+
+	// Start pprof HTTP server for runtime profiling
+
+	wg.Wait()
+	log.Info().Msgf("done putting")
+
+	// Memory profiling
+	if memProfile != "" {
+		runtime.GC() // get up-to-date statistics
+		f, err := os.Create(memProfile)
+		if err != nil {
+			log.Fatal().Err(err).Msg("could not create memory profile")
+		}
+		defer f.Close()
+		if err := pprof.WriteHeapProfile(f); err != nil {
+			log.Fatal().Err(err).Msg("could not write memory profile")
+		}
+		log.Info().Msgf("Memory profile written to %s", memProfile)
+	}
+
+	// Print memory stats
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	log.Info().
+		Str("alloc", fmt.Sprintf("%.2f MB", float64(m.Alloc)/1024/1024)).
+		Str("total_alloc", fmt.Sprintf("%.2f MB", float64(m.TotalAlloc)/1024/1024)).
+		Str("sys", fmt.Sprintf("%.2f MB", float64(m.Sys)/1024/1024)).
+		Uint32("num_gc", m.NumGC).
+		Msg("Memory statistics")
+}
diff --git a/flashring/cmd/flashringtest/runmetrics.go b/flashring/cmd/flashringtest/runmetrics.go
new file mode 100644
index 00000000..5e1aabec
--- /dev/null
+++ b/flashring/cmd/flashringtest/runmetrics.go
@@ -0,0 +1,515 @@
+package main
+
+import (
+	"bufio"
+	"encoding/csv"
+	"fmt"
+	"log"
+	"os"
+	"os/signal"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+// Define your parameter structure
+type RunMetrics struct {
+	// Input Parameters
+	Shards       int
+	KeysPerShard int
+	ReadWorkers  int
+	WriteWorkers int
+	Plan         string
+
+	// Observation Parameters
+	RP99        time.Duration
+	RP50        time.Duration
+	RP25        time.Duration
+	WP99        time.Duration
+	WP50        time.Duration
+	WP25        time.Duration
+	RThroughput float64
+	WThroughput float64
+	HitRate     float64
+	CPUUsage    float64
+	MemoryUsage float64
+}
+
+// MetricChannels holds separate channels for each metric type
+type MetricChannels struct {
+	RP99        chan time.Duration
+	RP50        chan time.Duration
+	RP25        chan time.Duration
+	WP99        chan time.Duration
+	WP50        chan time.Duration
+	WP25        chan time.Duration
+	RThroughput chan float64
+	WThroughput chan float64
+	HitRate     chan float64
+	CPUUsage    chan float64
+	MemoryUsage chan float64
+}
+
+// MetricAverager maintains running averages for a metric
+type MetricAverager struct {
+	mu        sync.RWMutex
+	sum       float64
+	count     int64
+	lastValue float64
+}
+
+func (ma *MetricAverager) Add(value float64) {
+	if value == 0 {
+		return // Ignore zero values
+	}
+	ma.mu.Lock()
+	defer ma.mu.Unlock()
+	ma.sum += value
+	ma.count++
+	ma.lastValue = value
+}
+
+func (ma *MetricAverager) AddDuration(value time.Duration) {
+	if value == 0 {
+		return // Ignore zero values
+	}
+	ma.mu.Lock()
+	defer ma.mu.Unlock()
+	ma.sum += float64(value)
+	ma.count++
+}
+
+func (ma *MetricAverager) Average() float64 {
+	ma.mu.RLock()
+	defer ma.mu.RUnlock()
+	if ma.count == 0 {
+		return 0
+	}
+	return ma.sum / float64(ma.count)
+}
+
+func (ma *MetricAverager) Latest() float64 {
+	ma.mu.RLock()
+	defer ma.mu.RUnlock()
+	return ma.lastValue
+}
+
+func (ma *MetricAverager) Reset() {
+	ma.mu.Lock()
+	defer ma.mu.Unlock()
+	ma.sum = 0
+	ma.count = 0
+}
+
+// MetricsCollector collects and averages all metrics
+type MetricsCollector struct {
+	channels  MetricChannels
+	averagers map[string]*MetricAverager
+	stopCh    chan struct{}
+	wg        sync.WaitGroup
+
+	// Input parameters (set once)
+	Shards       int
+	KeysPerShard int
+	ReadWorkers  int
+	WriteWorkers int
+	Plan         string
+}
+
+// NewMetricsCollector creates a new metrics collector with channels
+func NewMetricsCollector(bufferSize int) *MetricsCollector {
+	mc := &MetricsCollector{
+		channels: MetricChannels{
+			RP99:        make(chan time.Duration, bufferSize),
+			RP50:        make(chan time.Duration, bufferSize),
+			RP25:        make(chan time.Duration, bufferSize),
+			WP99:        make(chan time.Duration, bufferSize),
+			WP50:        make(chan time.Duration, bufferSize),
+			WP25:        make(chan time.Duration, bufferSize),
+			RThroughput: make(chan float64, bufferSize),
+			WThroughput: make(chan float64, bufferSize),
+			HitRate:     make(chan float64, bufferSize),
+			CPUUsage:    make(chan float64, bufferSize),
+			MemoryUsage: make(chan float64, bufferSize),
+		},
+		averagers: make(map[string]*MetricAverager),
+		stopCh:    make(chan struct{}),
+	}
+
+	// Initialize averagers for each metric
+	metricNames := []string{"RThroughput", "RP99", "RP50", "RP25", "WThroughput", "WP99", "WP50", "WP25", "HitRate", "CPUUsage", "MemoryUsage"}
+	for _, name := range metricNames {
+		mc.averagers[name] = &MetricAverager{}
+	}
+
+	return mc
+}
+
+// Start begins collecting metrics from all channels
+func (mc *MetricsCollector) Start() {
+	// Start a goroutine for each metric channel
+	mc.wg.Add(11)
+
+	go mc.collectMetricDuration(mc.channels.RP99, "RP99")
+	go mc.collectMetricDuration(mc.channels.RP50, "RP50")
+	go mc.collectMetricDuration(mc.channels.RP25, "RP25")
+	go mc.collectMetricDuration(mc.channels.WP99, "WP99")
+	go mc.collectMetricDuration(mc.channels.WP50, "WP50")
+	go mc.collectMetricDuration(mc.channels.WP25, "WP25")
+	go mc.collectMetric(mc.channels.RThroughput, "RThroughput")
+	go mc.collectMetric(mc.channels.WThroughput, "WThroughput")
+	go mc.collectMetric(mc.channels.HitRate, "HitRate")
+	go mc.collectMetric(mc.channels.CPUUsage, "CPUUsage")
+	go mc.collectMetric(mc.channels.MemoryUsage, "MemoryUsage")
+}
+
+func (mc *MetricsCollector) collectMetric(ch chan float64, name string) {
+	defer mc.wg.Done()
+	for {
+		select {
+		case <-mc.stopCh:
+			return
+		case value, ok := <-ch:
+			if !ok {
+				return
+			}
+			mc.averagers[name].Add(value)
+		}
+	}
+}
+
+func (mc *MetricsCollector) collectMetricDuration(ch chan time.Duration, name string) {
+	defer mc.wg.Done()
+	for {
+		select {
+		case <-mc.stopCh:
+			return
+		case value, ok := <-ch:
+			if !ok {
+				return
+			}
+			mc.averagers[name].AddDuration(value)
+		}
+	}
+}
+
+// RecordRP99 sends a value to the RP99 channel
+func (mc *MetricsCollector) RecordRP99(value time.Duration) {
+	select {
+	case mc.channels.RP99 <- value:
+	default: // Don't block if channel is full
+	}
+}
+
+// RecordRP50 sends a value to the RP50 channel
+func (mc *MetricsCollector) RecordRP50(value time.Duration) {
+	select {
+	case mc.channels.RP50 <- value:
+	default:
+	}
+}
+
+// RecordRP25 sends a value to the RP25 channel
+func (mc *MetricsCollector) RecordRP25(value time.Duration) {
+	select {
+	case mc.channels.RP25 <- value:
+	default:
+	}
+}
+
+// RecordWP99 sends a value to the WP99 channel
+func (mc *MetricsCollector) RecordWP99(value time.Duration) {
+	select {
+	case mc.channels.WP99 <- value:
+	default:
+	}
+}
+
+// RecordWP50 sends a value to the WP50 channel
+func (mc *MetricsCollector) RecordWP50(value time.Duration) {
+	select {
+	case mc.channels.WP50 <- value:
+	default:
+	}
+}
+
+// RecordWP25 sends a value to the WP25 channel
+func (mc *MetricsCollector) RecordWP25(value time.Duration) {
+	select {
+	case mc.channels.WP25 <- value:
+	default:
+	}
+}
+
+// RecordRThroughput sends a value to the RThroughput channel
+func (mc *MetricsCollector) RecordRThroughput(value float64) {
+	select {
+	case mc.channels.RThroughput <- value:
+	default:
+	}
+}
+
+// RecordWThroughput sends a value to the WThroughput channel
+func (mc *MetricsCollector) RecordWThroughput(value float64) {
+	select {
+	case mc.channels.WThroughput <- value:
+	default:
+	}
+}
+
+// RecordHitRate sends a value to the HitRate channel
+func (mc *MetricsCollector) RecordHitRate(value float64) {
+	select {
+	case mc.channels.HitRate <- value:
+	default:
+	}
+}
+
+// GetAveragedMetrics returns the current averaged metrics
+func (mc *MetricsCollector) GetAveragedMetrics() RunMetrics {
+	return RunMetrics{
+		Shards:       mc.Shards,
+		KeysPerShard: mc.KeysPerShard,
+		ReadWorkers:  mc.ReadWorkers,
+		WriteWorkers: mc.WriteWorkers,
+		Plan:         mc.Plan,
+		RP99:         time.Duration(mc.averagers["RP99"].Average()),
+		RP50:         time.Duration(mc.averagers["RP50"].Average()),
+		RP25:         time.Duration(mc.averagers["RP25"].Average()),
+		WP99:         time.Duration(mc.averagers["WP99"].Average()),
+		WP50:         time.Duration(mc.averagers["WP50"].Average()),
+		WP25:         time.Duration(mc.averagers["WP25"].Average()),
+		RThroughput:  mc.averagers["RThroughput"].Latest(),
+		WThroughput:  mc.averagers["WThroughput"].Latest(),
+		HitRate:      mc.averagers["HitRate"].Average(),
+		CPUUsage:     mc.averagers["CPUUsage"].Average(),
+		MemoryUsage:  mc.averagers["MemoryUsage"].Average(),
+	}
+}
+
+// ResetAverages resets all averagers to start fresh
+func (mc *MetricsCollector) ResetAverages() {
+	for _, avg := range mc.averagers {
+		avg.Reset()
+	}
+}
+
+// Stop stops all collector goroutines
+func (mc *MetricsCollector) Stop() {
+	close(mc.stopCh)
+	mc.wg.Wait()
+}
+
+// SetShards sets the number of shards (input parameter)
+func (mc *MetricsCollector) SetShards(value int) {
+	mc.Shards = value
+}
+
+// SetKeysPerShard sets the keys per shard (input parameter)
+func (mc *MetricsCollector) SetKeysPerShard(value int) {
+	mc.KeysPerShard = value
+}
+
+// SetReadWorkers sets the number of read workers (input parameter)
+func (mc *MetricsCollector) SetReadWorkers(value int) {
+	mc.ReadWorkers = value
+}
+
+// SetWriteWorkers sets the number of write workers (input parameter)
+func (mc *MetricsCollector) SetWriteWorkers(value int) {
+	mc.WriteWorkers = value
+}
+
+// SetPlan sets the plan name (input parameter)
+func (mc *MetricsCollector) SetPlan(value string) {
+	mc.Plan = value
+}
+
+// Global variable to hold runtime data
+var currentMetrics RunMetrics
+var metricsCollector *MetricsCollector
+
+// --- CSV Configuration ---
+const CSVFileName = "performance_results.csv"
+
+// InitMetricsCollector creates and starts the metrics collector, returning it
+// so it can be passed to other components (e.g., cache config)
+func InitMetricsCollector() *MetricsCollector {
+	metricsCollector = NewMetricsCollector(100)
+	metricsCollector.Start()
+	return metricsCollector
+}
+
+// RunmetricsWaitForShutdown waits for shutdown signal and logs final metrics to CSV
+func RunmetricsWaitForShutdown() {
+	// --- Set up Signal Handling ---
+	stopChan := make(chan os.Signal, 1)
+	signal.Notify(stopChan, syscall.SIGINT, syscall.SIGTERM)
+
+	fmt.Println("Program running. Press Ctrl+C to stop and log results to CSV...")
+
+	// --- Wait for Stop Signal ---
+	<-stopChan
+	fmt.Println("\nTermination signal received. Stopping work and logging results...")
+
+	// Stop the metrics collector
+	if metricsCollector != nil {
+		metricsCollector.Stop()
+
+		// Get final averaged metrics
+		currentMetrics = metricsCollector.GetAveragedMetrics()
+	}
+
+	// Get memory usage and CPU usage at this instant
+	currentMetrics.MemoryUsage = getMemoryUsageMB()
+	currentMetrics.CPUUsage = getCPUUsagePercent()
+
+	// --- Log Data to CSV ---
+	if err := logResultsToCSV(); err != nil {
+		log.Fatalf("FATAL: Failed to log results to CSV: %v", err)
+	}
+
+	fmt.Printf("Successfully logged results to %s.\n", CSVFileName)
+
+	// Exit the program since we're running in a goroutine
+	os.Exit(0)
+}
+
+// RunmetricsInit initializes metrics and waits for shutdown (convenience function)
+func RunmetricsInit() {
+	InitMetricsCollector()
+	RunmetricsWaitForShutdown()
+}
+
+func logResultsToCSV() error {
+	// 1. Check if the file exists to determine if we need a header row.
+	file, err := os.OpenFile(CSVFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return fmt.Errorf("failed to open CSV file: %w", err)
+	}
+	defer file.Close()
+
+	writer := csv.NewWriter(file)
+	defer writer.Flush() // Crucial to ensure data is written to the file before exiting.
+
+	// The list of all your column headers
+	header := []string{
+		"SHARDS", "KEYS_PER_SHARD", "READ_WORKERS", "WRITE_WORKERS", "PLAN",
+		"R_THROUGHPUT", "R_P99", "R_P50", "R_P25", "W_THROUGHPUT", "W_P99", "W_P50", "W_P25",
+		"HIT_RATE", "CPU", "MEMORY", "TIME",
+	}
+
+	// Determine if the file is new (or empty) and needs the header
+	fileInfo, _ := file.Stat()
+	if fileInfo.Size() == 0 {
+		if err := writer.Write(header); err != nil {
+			return fmt.Errorf("error writing CSV header: %w", err)
+		}
+	}
+
+	// Convert your struct fields into a slice of strings for the CSV writer
+	dataRow := []string{
+		// Input Parameters
+		strconv.Itoa(currentMetrics.Shards),
+		strconv.Itoa(currentMetrics.KeysPerShard),
+		strconv.Itoa(currentMetrics.ReadWorkers), // Convert int to string
+		strconv.Itoa(currentMetrics.WriteWorkers),
+		currentMetrics.Plan,
+
+		// Observation Parameters (convert floats to strings)
+		fmt.Sprintf("%v", currentMetrics.RThroughput),
+		fmt.Sprintf("%v", currentMetrics.RP99),
+		fmt.Sprintf("%v", currentMetrics.RP50),
+		fmt.Sprintf("%v", currentMetrics.RP25),
+
+		fmt.Sprintf("%v", currentMetrics.WThroughput),
+		fmt.Sprintf("%v", currentMetrics.WP99),
+		fmt.Sprintf("%v", currentMetrics.WP50),
+		fmt.Sprintf("%v", currentMetrics.WP25),
+
+		fmt.Sprintf("%v", currentMetrics.HitRate),
+		fmt.Sprintf("%v", currentMetrics.CPUUsage),
+		fmt.Sprintf("%v", currentMetrics.MemoryUsage),
+		fmt.Sprintf("%v", time.Now().In(time.FixedZone("IST", 5*60*60+30*60)).Format("2006-01-02 15:04:05")),
+	}
+
+	if err := writer.Write(dataRow); err != nil {
+		return fmt.Errorf("error writing CSV data row: %w", err)
+	}
+
+	return nil
+}
+
+// getMemoryUsageMB returns the current memory usage of this process in MB
+func getMemoryUsageMB() float64 {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	// Alloc is bytes of allocated heap objects
+	return float64(m.Alloc) / 1024 / 1024
+}
+
+// getSystemMemoryUsageMB returns the total system memory used by this process in MB
+func getSystemMemoryUsageMB() float64 {
+	var m runtime.MemStats
+	runtime.ReadMemStats(&m)
+	// Sys is the total bytes of memory obtained from the OS
+	return float64(m.Sys) / 1024 / 1024
+}
+
+// getCPUUsagePercent returns the CPU usage percentage for this process
+// It measures CPU usage over a short interval
+func getCPUUsagePercent() float64 {
+	// Read initial CPU stats
+	idle1, total1 := getCPUStats()
+	time.Sleep(100 * time.Millisecond)
+	// Read CPU stats again
+	idle2, total2 := getCPUStats()
+
+	idleDelta := float64(idle2 - idle1)
+	totalDelta := float64(total2 - total1)
+
+	if totalDelta == 0 {
+		return 0
+	}
+
+	cpuUsage := (1.0 - idleDelta/totalDelta) * 100.0
+	return cpuUsage
+}
+
+// getCPUStats reads /proc/stat and returns idle and total CPU time
+func getCPUStats() (idle, total uint64) {
+	file, err := os.Open("/proc/stat")
+	if err != nil {
+		return 0, 0
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := scanner.Text()
+		if strings.HasPrefix(line, "cpu ") {
+			fields := strings.Fields(line)
+			if len(fields) < 5 {
+				return 0, 0
+			}
+			// fields: cpu user nice system idle iowait irq softirq steal guest guest_nice
+			var values []uint64
+			for _, field := range fields[1:] {
+				val, err := strconv.ParseUint(field, 10, 64)
+				if err != nil {
+					continue
+				}
+				values = append(values, val)
+				total += val
+			}
+			if len(values) >= 4 {
+				idle = values[3] // idle is the 4th value
+			}
+			break
+		}
+	}
+	return idle, total
+}
diff --git a/flashring/go.mod b/flashring/go.mod
new file mode 100644
index 00000000..f02d9663
--- /dev/null
+++ b/flashring/go.mod
@@ -0,0 +1,32 @@
+module github.com/Meesho/BharatMLStack/flashring
+
+go 1.24.0
+
+toolchain go1.24.9
+
+require (
+	github.com/cespare/xxhash/v2 v2.3.0
+	github.com/coocood/freecache v1.2.4
+	github.com/rs/zerolog v1.34.0
+	github.com/zeebo/xxh3 v1.0.2
+	golang.org/x/sys v0.38.0
+)
+
+require (
+	github.com/dgraph-io/badger/v4 v4.9.0 // indirect
+	github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/google/flatbuffers v25.2.10+incompatible // indirect
+	github.com/klauspost/compress v1.18.0 // indirect
+	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
+	github.com/mattn/go-colorable v0.1.14 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	go.opentelemetry.io/auto/sdk v1.1.0 // indirect
+	go.opentelemetry.io/otel v1.37.0 // indirect
+	go.opentelemetry.io/otel/metric v1.37.0 // indirect
+	go.opentelemetry.io/otel/trace v1.37.0 // indirect
+	golang.org/x/net v0.43.0 // indirect
+	google.golang.org/protobuf v1.36.7 // indirect
+)
diff --git a/flashring/go.sum b/flashring/go.sum
new file mode 100644
index 00000000..6c22ab66
--- /dev/null
+++ b/flashring/go.sum
@@ -0,0 +1,62 @@
+github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M=
+github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk=
+github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
+github.com/dgraph-io/badger/v4 v4.9.0 h1:tpqWb0NewSrCYqTvywbcXOhQdWcqephkVkbBmaaqHzc=
+github.com/dgraph-io/badger/v4 v4.9.0/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0=
+github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM=
+github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
+github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q=
+github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
+github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
+github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
+github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
+github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
+github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
+github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
+github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
+github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
+github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
+github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
+github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
+github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
+github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
+go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
+go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
+go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
+go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
+go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
+go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
+go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
+go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
+google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
diff --git a/flashring/internal/allocators/allocators.go b/flashring/internal/allocators/allocators.go
new file mode 100644
index 00000000..3f4cf692
--- /dev/null
+++ b/flashring/internal/allocators/allocators.go
@@ -0,0 +1,6 @@
+package allocators
+
+type SizeClass struct {
+	Size     int
+	MinCount int
+}
diff --git a/flashring/internal/allocators/byte_slice_allocator.go b/flashring/internal/allocators/byte_slice_allocator.go
new file mode 100644
index 00000000..f2990924
--- /dev/null
+++ b/flashring/internal/allocators/byte_slice_allocator.go
@@ -0,0 +1,55 @@
+package allocators
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/pools"
+	"github.com/rs/zerolog/log"
+)
+
+type ByteSliceAllocatorConfig struct {
+	SizeClasses []SizeClass
+}
+
+type ByteSliceAllocator struct {
+	config ByteSliceAllocatorConfig
+	pools  []*pools.LeakyPool
+}
+
+func NewByteSliceAllocator(config ByteSliceAllocatorConfig) *ByteSliceAllocator {
+	poolList := make([]*pools.LeakyPool, len(config.SizeClasses))
+	sort.Slice(config.SizeClasses, func(i, j int) bool {
+		return config.SizeClasses[i].Size < config.SizeClasses[j].Size
+	})
+	for i, sizeClass := range config.SizeClasses {
+		poolConfig := pools.LeakyPoolConfig{
+			Capacity:   sizeClass.MinCount,
+			Meta:       Meta{Size: sizeClass.Size, Name: fmt.Sprintf("ByteSlicePool-%dBytes", sizeClass.Size)},
+			CreateFunc: func() interface{} { return make([]byte, sizeClass.Size) },
+		}
+		poolList[i] = pools.NewLeakyPool(poolConfig)
+		log.Debug().Msgf("ByteSliceAllocator: size class - %d | min count - %d", sizeClass.Size, sizeClass.MinCount)
+	}
+	return &ByteSliceAllocator{config: config, pools: poolList}
+}
+
+func (a *ByteSliceAllocator) Get(size int) []byte {
+	for _, pool := range a.pools {
+		if size <= pool.Meta.(Meta).Size {
+			slice := pool.Get()
+			return slice.([]byte)
+		}
+	}
+	return nil
+}
+
+func (a *ByteSliceAllocator) Put(p []byte) {
+	for _, pool := range a.pools {
+		if len(p) <= pool.Meta.(Meta).Size {
+			pool.Put(p)
+			return
+		}
+	}
+	log.Error().Msgf("ByteSliceAllocator: Size class not found for size %d", len(p))
+}
diff --git a/flashring/internal/allocators/byte_slice_allocator_test.go b/flashring/internal/allocators/byte_slice_allocator_test.go
new file mode 100644
index 00000000..a962dd06
--- /dev/null
+++ b/flashring/internal/allocators/byte_slice_allocator_test.go
@@ -0,0 +1,447 @@
+package allocators
+
+import (
+	"testing"
+)
+
+func TestNewByteSliceAllocator(t *testing.T) {
+	t.Run("creates allocator with single size class", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if allocator.config.SizeClasses[0].Size != config.SizeClasses[0].Size {
+			t.Errorf("Expected config to match, got %v", allocator.config)
+		}
+		if len(allocator.pools) != 1 {
+			t.Errorf("Expected 1 pool, got %d", len(allocator.pools))
+		}
+		if allocator.pools[0].Meta.(Meta).Size != 1024 {
+			t.Errorf("Expected pool size 1024, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[0].Meta.(Meta).Name != "ByteSlicePool-1024Bytes" {
+			t.Errorf("Expected pool name 'ByteSlicePool-1024Bytes', got %s", allocator.pools[0].Meta.(Meta).Name)
+		}
+	})
+
+	t.Run("creates allocator with multiple size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 512, MinCount: 5},
+				{Size: 1024, MinCount: 10},
+				{Size: 256, MinCount: 15},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if len(allocator.pools) != 3 {
+			t.Errorf("Expected 3 pools, got %d", len(allocator.pools))
+		}
+
+		// Should be sorted by size
+		if allocator.pools[0].Meta.(Meta).Size != 256 {
+			t.Errorf("Expected first pool size 256, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[1].Meta.(Meta).Size != 512 {
+			t.Errorf("Expected second pool size 512, got %d", allocator.pools[1].Meta.(Meta).Size)
+		}
+		if allocator.pools[2].Meta.(Meta).Size != 1024 {
+			t.Errorf("Expected third pool size 1024, got %d", allocator.pools[2].Meta.(Meta).Size)
+		}
+	})
+
+	t.Run("creates allocator with empty size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if len(allocator.pools) != 0 {
+			t.Errorf("Expected 0 pools, got %d", len(allocator.pools))
+		}
+	})
+}
+
+func TestByteSliceAllocator_Get(t *testing.T) {
+	t.Run("returns byte slice for exact size match", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(1024)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if cap(slice) != 1024 {
+			t.Errorf("Expected slice capacity 1024, got %d", cap(slice))
+		}
+		if len(slice) != 1024 {
+			t.Errorf("Expected slice length 1024, got %d", len(slice))
+		}
+	})
+
+	t.Run("returns byte slice for smaller size", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(512)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if cap(slice) != 1024 {
+			t.Errorf("Expected slice capacity 1024, got %d", cap(slice))
+		}
+		if len(slice) != 1024 {
+			t.Errorf("Expected slice length 1024, got %d", len(slice))
+		}
+	})
+
+	t.Run("returns smallest suitable size class", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 256, MinCount: 5},
+				{Size: 512, MinCount: 10},
+				{Size: 1024, MinCount: 15},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(300)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if cap(slice) != 512 {
+			t.Errorf("Expected slice capacity 512, got %d", cap(slice))
+		}
+		if len(slice) != 512 {
+			t.Errorf("Expected slice length 512, got %d", len(slice))
+		}
+	})
+
+	t.Run("returns nil for size larger than all size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(2048)
+		if slice != nil {
+			t.Error("Expected slice to be nil for size larger than all size classes")
+		}
+	})
+
+	t.Run("returns nil for empty size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(1024)
+		if slice != nil {
+			t.Error("Expected slice to be nil for empty size classes")
+		}
+	})
+
+	t.Run("returns slice for zero size request", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(0)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil for zero size request")
+		}
+		if cap(slice) != 1024 {
+			t.Errorf("Expected slice capacity 1024, got %d", cap(slice))
+		}
+	})
+
+	t.Run("returns slice for negative size request", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(-1)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil for negative size request")
+		}
+		if cap(slice) != 1024 {
+			t.Errorf("Expected slice capacity 1024, got %d", cap(slice))
+		}
+	})
+}
+
+func TestByteSliceAllocator_Put(t *testing.T) {
+	t.Run("puts byte slice back to correct pool", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(1024)
+		if slice == nil {
+			t.Fatal("Expected slice to be non-nil")
+		}
+
+		// Put should not panic
+		allocator.Put(slice)
+	})
+
+	t.Run("puts byte slice to smallest suitable pool", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 256, MinCount: 5},
+				{Size: 512, MinCount: 10},
+				{Size: 1024, MinCount: 15},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := make([]byte, 300)
+		allocator.Put(slice)
+		// Should not panic, even though slice wasn't from the pool
+	})
+
+	t.Run("handles slice larger than all size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := make([]byte, 2048)
+		// Should not panic, but will log error
+		allocator.Put(slice)
+	})
+
+	t.Run("handles empty slice", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := make([]byte, 0)
+		allocator.Put(slice)
+		// Should not panic
+	})
+
+	t.Run("handles nil slice", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		// Should not panic
+		allocator.Put(nil)
+	})
+}
+
+func TestByteSliceAllocator_GetAndPut_Integration(t *testing.T) {
+	t.Run("get and put multiple times", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 256, MinCount: 2},
+				{Size: 512, MinCount: 3},
+				{Size: 1024, MinCount: 5},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		// Get multiple slices
+		slices := make([][]byte, 5)
+		for i := 0; i < 5; i++ {
+			slices[i] = allocator.Get(200)
+			if slices[i] == nil {
+				t.Errorf("Expected slice %d to be non-nil", i)
+			}
+			if len(slices[i]) != 256 {
+				t.Errorf("Expected slice %d length 256, got %d", i, len(slices[i]))
+			}
+		}
+
+		// Put them back
+		for _, slice := range slices {
+			allocator.Put(slice)
+		}
+
+		// Get them again
+		for i := 0; i < 5; i++ {
+			slice := allocator.Get(200)
+			if slice == nil {
+				t.Errorf("Expected slice %d to be non-nil on second get", i)
+			}
+			if len(slice) != 256 {
+				t.Errorf("Expected slice %d length 256 on second get, got %d", i, len(slice))
+			}
+		}
+	})
+
+	t.Run("get and put with different sizes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 256, MinCount: 2},
+				{Size: 512, MinCount: 3},
+				{Size: 1024, MinCount: 5},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		// Get slices of different sizes
+		slice256 := allocator.Get(200)
+		slice512 := allocator.Get(400)
+		slice1024 := allocator.Get(800)
+
+		if len(slice256) != 256 {
+			t.Errorf("Expected slice256 length 256, got %d", len(slice256))
+		}
+		if len(slice512) != 512 {
+			t.Errorf("Expected slice512 length 512, got %d", len(slice512))
+		}
+		if len(slice1024) != 1024 {
+			t.Errorf("Expected slice1024 length 1024, got %d", len(slice1024))
+		}
+
+		// Put them back
+		allocator.Put(slice256)
+		allocator.Put(slice512)
+		allocator.Put(slice1024)
+
+		// Get them again
+		newSlice256 := allocator.Get(200)
+		newSlice512 := allocator.Get(400)
+		newSlice1024 := allocator.Get(800)
+
+		if len(newSlice256) != 256 {
+			t.Errorf("Expected newSlice256 length 256, got %d", len(newSlice256))
+		}
+		if len(newSlice512) != 512 {
+			t.Errorf("Expected newSlice512 length 512, got %d", len(newSlice512))
+		}
+		if len(newSlice1024) != 1024 {
+			t.Errorf("Expected newSlice1024 length 1024, got %d", len(newSlice1024))
+		}
+	})
+}
+
+func TestByteSliceAllocator_SizeClassSorting(t *testing.T) {
+	t.Run("size classes are sorted correctly", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 1024, MinCount: 10},
+				{Size: 256, MinCount: 5},
+				{Size: 512, MinCount: 15},
+				{Size: 128, MinCount: 20},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		// Verify pools are sorted by size
+		if allocator.pools[0].Meta.(Meta).Size != 128 {
+			t.Errorf("Expected first pool size 128, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[1].Meta.(Meta).Size != 256 {
+			t.Errorf("Expected second pool size 256, got %d", allocator.pools[1].Meta.(Meta).Size)
+		}
+		if allocator.pools[2].Meta.(Meta).Size != 512 {
+			t.Errorf("Expected third pool size 512, got %d", allocator.pools[2].Meta.(Meta).Size)
+		}
+		if allocator.pools[3].Meta.(Meta).Size != 1024 {
+			t.Errorf("Expected fourth pool size 1024, got %d", allocator.pools[3].Meta.(Meta).Size)
+		}
+
+		// Test that Get returns from the correct pool
+		slice := allocator.Get(200)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if len(slice) != 256 {
+			t.Errorf("Expected slice length 256 (should use 256 pool, not 128), got %d", len(slice))
+		}
+	})
+}
+
+func TestByteSliceAllocator_EdgeCases(t *testing.T) {
+	t.Run("single size class with exact match", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 512, MinCount: 1},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		slice := allocator.Get(512)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if len(slice) != 512 {
+			t.Errorf("Expected slice length 512, got %d", len(slice))
+		}
+
+		allocator.Put(slice)
+
+		// Get again after putting back
+		slice2 := allocator.Get(512)
+		if slice2 == nil {
+			t.Error("Expected slice2 to be non-nil")
+		}
+		if len(slice2) != 512 {
+			t.Errorf("Expected slice2 length 512, got %d", len(slice2))
+		}
+	})
+
+	t.Run("duplicate size classes", func(t *testing.T) {
+		config := ByteSliceAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 512, MinCount: 5},
+				{Size: 512, MinCount: 10},
+			},
+		}
+		allocator := NewByteSliceAllocator(config)
+
+		if len(allocator.pools) != 2 {
+			t.Errorf("Expected 2 pools, got %d", len(allocator.pools))
+		}
+
+		slice := allocator.Get(512)
+		if slice == nil {
+			t.Error("Expected slice to be non-nil")
+		}
+		if len(slice) != 512 {
+			t.Errorf("Expected slice length 512, got %d", len(slice))
+		}
+	})
+}
diff --git a/flashring/internal/allocators/slab_aligned_page_allocator.go b/flashring/internal/allocators/slab_aligned_page_allocator.go
new file mode 100644
index 00000000..07a8d8ba
--- /dev/null
+++ b/flashring/internal/allocators/slab_aligned_page_allocator.go
@@ -0,0 +1,72 @@
+package allocators
+
+import (
+	"errors"
+	"fmt"
+	"sort"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/Meesho/BharatMLStack/flashring/internal/pools"
+	"github.com/rs/zerolog/log"
+)
+
+var (
+	ErrSizeNotAligned = errors.New("size not aligned")
+)
+
+type SlabAlignedPageAllocatorConfig struct {
+	SizeClasses []SizeClass
+}
+
+type Meta struct {
+	Size int
+	Name string
+}
+
+type SlabAlignedPageAllocator struct {
+	config SlabAlignedPageAllocatorConfig
+	pools  []*pools.LeakyPool
+}
+
+func NewSlabAlignedPageAllocator(config SlabAlignedPageAllocatorConfig) (*SlabAlignedPageAllocator, error) {
+	poolList := make([]*pools.LeakyPool, len(config.SizeClasses))
+	sort.Slice(config.SizeClasses, func(i, j int) bool {
+		return config.SizeClasses[i].Size < config.SizeClasses[j].Size
+	})
+	for i, sizeClass := range config.SizeClasses {
+		if sizeClass.Size%fs.BLOCK_SIZE != 0 {
+			return nil, ErrSizeNotAligned
+		}
+		poolConfig := pools.LeakyPoolConfig{
+			Capacity:   sizeClass.MinCount,
+			Meta:       Meta{Size: sizeClass.Size, Name: fmt.Sprintf("SlabAlignedPagePool-%dBytes", sizeClass.Size)},
+			CreateFunc: func() interface{} { return fs.NewAlignedPage(sizeClass.Size) },
+		}
+		poolList[i] = pools.NewLeakyPool(poolConfig)
+		poolList[i].RegisterPreDrefHook(func(obj interface{}) {
+			fs.Unmap(obj.(*fs.AlignedPage))
+		})
+		log.Debug().Msgf("SlabAlignedPageAllocator: size class - %d | min count - %d", sizeClass.Size, sizeClass.MinCount)
+	}
+	return &SlabAlignedPageAllocator{config: config, pools: poolList}, nil
+}
+
+func (a *SlabAlignedPageAllocator) Get(size int) *fs.AlignedPage {
+	for _, pool := range a.pools {
+		if size <= pool.Meta.(Meta).Size {
+			page := pool.Get()
+			return page.(*fs.AlignedPage)
+		}
+	}
+	return nil
+}
+
+func (a *SlabAlignedPageAllocator) Put(p *fs.AlignedPage) {
+	for _, pool := range a.pools {
+		if len(p.Buf) <= pool.Meta.(Meta).Size {
+			pool.Put(p)
+			return
+		}
+	}
+	log.Error().Msgf("SlabAlignedPageAllocator: Size class not found for size %d", len(p.Buf))
+}
diff --git a/flashring/internal/allocators/slab_aligned_page_allocator_test.go b/flashring/internal/allocators/slab_aligned_page_allocator_test.go
new file mode 100644
index 00000000..55a187c7
--- /dev/null
+++ b/flashring/internal/allocators/slab_aligned_page_allocator_test.go
@@ -0,0 +1,693 @@
+package allocators
+
+import (
+	"testing"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+)
+
+func TestNewSlabAlignedPageAllocator(t *testing.T) {
+	t.Run("creates allocator with single aligned size class", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10}, // 4096 is aligned to fs.BLOCK_SIZE
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if allocator.config.SizeClasses[0].Size != config.SizeClasses[0].Size {
+			t.Errorf("Expected config to match, got %v", allocator.config)
+		}
+		if len(allocator.pools) != 1 {
+			t.Errorf("Expected 1 pool, got %d", len(allocator.pools))
+		}
+		if allocator.pools[0].Meta.(Meta).Size != 4096 {
+			t.Errorf("Expected pool size 4096, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[0].Meta.(Meta).Name != "SlabAlignedPagePool-4096Bytes" {
+			t.Errorf("Expected pool name 'SlabAlignedPagePool-4096Bytes', got %s", allocator.pools[0].Meta.(Meta).Name)
+		}
+	})
+
+	t.Run("creates allocator with multiple aligned size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 8192, MinCount: 5},  // 8192 is aligned to fs.BLOCK_SIZE
+				{Size: 4096, MinCount: 10}, // 4096 is aligned to fs.BLOCK_SIZE
+				{Size: 16384, MinCount: 3}, // 16384 is aligned to fs.BLOCK_SIZE
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if len(allocator.pools) != 3 {
+			t.Errorf("Expected 3 pools, got %d", len(allocator.pools))
+		}
+
+		// Should be sorted by size
+		if allocator.pools[0].Meta.(Meta).Size != 4096 {
+			t.Errorf("Expected first pool size 4096, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[1].Meta.(Meta).Size != 8192 {
+			t.Errorf("Expected second pool size 8192, got %d", allocator.pools[1].Meta.(Meta).Size)
+		}
+		if allocator.pools[2].Meta.(Meta).Size != 16384 {
+			t.Errorf("Expected third pool size 16384, got %d", allocator.pools[2].Meta.(Meta).Size)
+		}
+	})
+
+	t.Run("creates allocator with empty size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+		if allocator == nil {
+			t.Error("Expected allocator to be non-nil")
+		}
+		if len(allocator.pools) != 0 {
+			t.Errorf("Expected 0 pools, got %d", len(allocator.pools))
+		}
+	})
+
+	t.Run("returns error for non-aligned size class", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4097, MinCount: 10}, // 4097 is not aligned to fs.BLOCK_SIZE (4096)
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+
+		if err != ErrSizeNotAligned {
+			t.Errorf("Expected ErrSizeNotAligned, got %v", err)
+		}
+		if allocator != nil {
+			t.Error("Expected allocator to be nil on error")
+		}
+	})
+
+	t.Run("returns error for mixed aligned and non-aligned size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10}, // aligned
+				{Size: 3000, MinCount: 5},  // not aligned
+				{Size: 8192, MinCount: 3},  // aligned
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+
+		if err != ErrSizeNotAligned {
+			t.Errorf("Expected ErrSizeNotAligned, got %v", err)
+		}
+		if allocator != nil {
+			t.Error("Expected allocator to be nil on error")
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_Get(t *testing.T) {
+	t.Run("returns aligned page for exact size match", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+		if cap(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer capacity 4096, got %d", cap(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+
+	t.Run("returns aligned page for smaller size", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(2048)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+
+	t.Run("returns smallest suitable size class", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 5},
+				{Size: 8192, MinCount: 10},
+				{Size: 16384, MinCount: 3},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(6000)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 8192 {
+			t.Errorf("Expected page buffer length 8192, got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+
+	t.Run("returns nil for size larger than all size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(8192)
+		if page != nil {
+			t.Error("Expected page to be nil for size larger than all size classes")
+		}
+	})
+
+	t.Run("returns nil for empty size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(4096)
+		if page != nil {
+			t.Error("Expected page to be nil for empty size classes")
+		}
+	})
+
+	t.Run("returns page for zero size request", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(0)
+		if page == nil {
+			t.Error("Expected page to be non-nil for zero size request")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+
+	t.Run("returns page for negative size request", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(-1)
+		if page == nil {
+			t.Error("Expected page to be non-nil for negative size request")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_Put(t *testing.T) {
+	t.Run("puts aligned page back to correct pool", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Fatal("Expected page to be non-nil")
+		}
+
+		// Put should not panic
+		allocator.Put(page)
+	})
+
+	t.Run("puts page to smallest suitable pool", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 5},
+				{Size: 8192, MinCount: 10},
+				{Size: 16384, MinCount: 3},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Create a page manually (not from pool)
+		page := fs.NewAlignedPage(6000)
+		if page == nil {
+			t.Fatal("Failed to create aligned page")
+		}
+
+		// Should not panic, even though page wasn't from the pool
+		allocator.Put(page)
+	})
+
+	t.Run("handles page larger than all size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Create a large page
+		page := fs.NewAlignedPage(8192)
+		if page == nil {
+			t.Fatal("Failed to create aligned page")
+		}
+
+		// Should not panic, but will log error
+		allocator.Put(page)
+
+		// Clean up manually since it won't be put back in pool
+		fs.Unmap(page)
+	})
+
+	t.Run("handles nil page", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Should not panic, but may cause issues due to nil pointer
+		// This test mainly ensures the method doesn't crash completely
+		defer func() {
+			if r := recover(); r != nil {
+				// It's expected that this might panic due to nil pointer access
+				t.Logf("Expected panic occurred: %v", r)
+			}
+		}()
+
+		allocator.Put(nil)
+	})
+}
+
+func TestSlabAlignedPageAllocator_GetAndPut_Integration(t *testing.T) {
+	t.Run("get and put multiple times", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 2},
+				{Size: 8192, MinCount: 3},
+				{Size: 16384, MinCount: 1},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Get multiple pages
+		pages := make([]*fs.AlignedPage, 5)
+		for i := 0; i < 5; i++ {
+			pages[i] = allocator.Get(3000) // Should get 4096 size
+			if pages[i] == nil {
+				t.Errorf("Expected page %d to be non-nil", i)
+			}
+			if len(pages[i].Buf) != 4096 {
+				t.Errorf("Expected page %d buffer length 4096, got %d", i, len(pages[i].Buf))
+			}
+		}
+
+		// Put them back
+		for _, page := range pages {
+			if page != nil {
+				allocator.Put(page)
+			}
+		}
+
+		// Get them again
+		for i := 0; i < 5; i++ {
+			page := allocator.Get(3000)
+			if page == nil {
+				t.Errorf("Expected page %d to be non-nil on second get", i)
+			}
+			if page != nil && len(page.Buf) != 4096 {
+				t.Errorf("Expected page %d buffer length 4096 on second get, got %d", i, len(page.Buf))
+			}
+		}
+	})
+
+	t.Run("get and put with different sizes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 2},
+				{Size: 8192, MinCount: 3},
+				{Size: 16384, MinCount: 1},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Get pages of different sizes
+		page4k := allocator.Get(3000)   // Should get 4096
+		page8k := allocator.Get(6000)   // Should get 8192
+		page16k := allocator.Get(12000) // Should get 16384
+
+		if len(page4k.Buf) != 4096 {
+			t.Errorf("Expected page4k buffer length 4096, got %d", len(page4k.Buf))
+		}
+		if len(page8k.Buf) != 8192 {
+			t.Errorf("Expected page8k buffer length 8192, got %d", len(page8k.Buf))
+		}
+		if len(page16k.Buf) != 16384 {
+			t.Errorf("Expected page16k buffer length 16384, got %d", len(page16k.Buf))
+		}
+
+		// Put them back
+		allocator.Put(page4k)
+		allocator.Put(page8k)
+		allocator.Put(page16k)
+
+		// Get them again
+		newPage4k := allocator.Get(3000)
+		newPage8k := allocator.Get(6000)
+		newPage16k := allocator.Get(12000)
+
+		if len(newPage4k.Buf) != 4096 {
+			t.Errorf("Expected newPage4k buffer length 4096, got %d", len(newPage4k.Buf))
+		}
+		if len(newPage8k.Buf) != 8192 {
+			t.Errorf("Expected newPage8k buffer length 8192, got %d", len(newPage8k.Buf))
+		}
+		if len(newPage16k.Buf) != 16384 {
+			t.Errorf("Expected newPage16k buffer length 16384, got %d", len(newPage16k.Buf))
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_SizeClassSorting(t *testing.T) {
+	t.Run("size classes are sorted correctly", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 16384, MinCount: 3},
+				{Size: 4096, MinCount: 10},
+				{Size: 8192, MinCount: 5},
+				{Size: 12288, MinCount: 2}, // 12288 = 3 * 4096, aligned
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// Verify pools are sorted by size
+		if allocator.pools[0].Meta.(Meta).Size != 4096 {
+			t.Errorf("Expected first pool size 4096, got %d", allocator.pools[0].Meta.(Meta).Size)
+		}
+		if allocator.pools[1].Meta.(Meta).Size != 8192 {
+			t.Errorf("Expected second pool size 8192, got %d", allocator.pools[1].Meta.(Meta).Size)
+		}
+		if allocator.pools[2].Meta.(Meta).Size != 12288 {
+			t.Errorf("Expected third pool size 12288, got %d", allocator.pools[2].Meta.(Meta).Size)
+		}
+		if allocator.pools[3].Meta.(Meta).Size != 16384 {
+			t.Errorf("Expected fourth pool size 16384, got %d", allocator.pools[3].Meta.(Meta).Size)
+		}
+
+		// Test that Get returns from the correct pool
+		page := allocator.Get(10000)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 12288 {
+			t.Errorf("Expected page buffer length 12288 (should use 12288 pool), got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_EdgeCases(t *testing.T) {
+	t.Run("single size class with exact match", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 1},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+
+		allocator.Put(page)
+
+		// Get again after putting back
+		page2 := allocator.Get(4096)
+		if page2 == nil {
+			t.Error("Expected page2 to be non-nil")
+		}
+		if len(page2.Buf) != 4096 {
+			t.Errorf("Expected page2 buffer length 4096, got %d", len(page2.Buf))
+		}
+	})
+
+	t.Run("duplicate size classes", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 5},
+				{Size: 4096, MinCount: 10},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		if len(allocator.pools) != 2 {
+			t.Errorf("Expected 2 pools, got %d", len(allocator.pools))
+		}
+
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+		if len(page.Buf) != 4096 {
+			t.Errorf("Expected page buffer length 4096, got %d", len(page.Buf))
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_MemoryAlignment(t *testing.T) {
+	t.Run("pages are properly aligned", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 1},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+
+		// Test that we can write to the page without issues
+		if len(page.Buf) > 0 {
+			page.Buf[0] = 0x42
+			page.Buf[len(page.Buf)-1] = 0x24
+
+			if page.Buf[0] != 0x42 {
+				t.Error("Failed to write to first byte of page")
+			}
+			if page.Buf[len(page.Buf)-1] != 0x24 {
+				t.Error("Failed to write to last byte of page")
+			}
+		}
+
+		// Clean up
+		if page != nil {
+			fs.Unmap(page)
+		}
+	})
+}
+
+func TestSlabAlignedPageAllocator_PreDrefHook(t *testing.T) {
+	t.Run("pre deref hook is registered", func(t *testing.T) {
+		config := SlabAlignedPageAllocatorConfig{
+			SizeClasses: []SizeClass{
+				{Size: 4096, MinCount: 1},
+			},
+		}
+		allocator, err := NewSlabAlignedPageAllocator(config)
+		if err != nil {
+			t.Fatalf("Expected no error, got %v", err)
+		}
+
+		// The PreDrefHook should be registered during construction
+		// We can't directly test the hook execution without accessing private fields
+		// But we can verify that pool creation succeeded
+		if len(allocator.pools) != 1 {
+			t.Errorf("Expected 1 pool to be created, got %d", len(allocator.pools))
+		}
+
+		// Test normal allocation and deallocation
+		page := allocator.Get(4096)
+		if page == nil {
+			t.Error("Expected page to be non-nil")
+		}
+
+		// Put back should trigger the hook internally when pool is full
+		allocator.Put(page)
+	})
+}
+
+func TestSlabAlignedPageAllocator_AlignmentValidation(t *testing.T) {
+	t.Run("various alignment checks", func(t *testing.T) {
+		tests := []struct {
+			name        string
+			size        int
+			shouldError bool
+		}{
+			{"aligned 4096", 4096, false},
+			{"aligned 8192", 8192, false},
+			{"aligned 12288", 12288, false},
+			{"aligned 16384", 16384, false},
+			{"unaligned 4097", 4097, true},
+			{"unaligned 4000", 4000, true},
+			{"unaligned 5000", 5000, true},
+			{"unaligned 1024", 1024, true}, // 1024 < 4096
+			{"unaligned 2048", 2048, true}, // 2048 < 4096
+		}
+
+		for _, tt := range tests {
+			t.Run(tt.name, func(t *testing.T) {
+				config := SlabAlignedPageAllocatorConfig{
+					SizeClasses: []SizeClass{
+						{Size: tt.size, MinCount: 1},
+					},
+				}
+				allocator, err := NewSlabAlignedPageAllocator(config)
+
+				if tt.shouldError {
+					if err != ErrSizeNotAligned {
+						t.Errorf("Expected ErrSizeNotAligned for size %d, got %v", tt.size, err)
+					}
+					if allocator != nil {
+						t.Errorf("Expected nil allocator for size %d", tt.size)
+					}
+				} else {
+					if err != nil {
+						t.Errorf("Expected no error for size %d, got %v", tt.size, err)
+					}
+					if allocator == nil {
+						t.Errorf("Expected non-nil allocator for size %d", tt.size)
+					}
+				}
+			})
+		}
+	})
+}
diff --git a/flashring/internal/cache/badger.go b/flashring/internal/cache/badger.go
new file mode 100644
index 00000000..7ff8c691
--- /dev/null
+++ b/flashring/internal/cache/badger.go
@@ -0,0 +1,135 @@
+package internal
+
+import (
+	"sync/atomic"
+	"time"
+
+	filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard"
+	badger "github.com/dgraph-io/badger/v4"
+	"github.com/rs/zerolog/log"
+)
+
+type Badger struct {
+	cache *badger.DB
+	stats *CacheStats
+}
+
+func NewBadger(config WrapCacheConfig, logStats bool) (*Badger, error) {
+	options := badger.DefaultOptions(config.MountPoint)
+	options.MetricsEnabled = false
+
+	// 1. PRIMARY CACHE (1GB)
+	// This caches the data blocks themselves.
+	options.BlockCacheSize = 1024 << 20
+
+	// 2. INDEX CACHE (512MB)
+	// This keeps the keys and the structure of the LSM tree in RAM.
+	// This is the most critical setting for read latency.
+	options.IndexCacheSize = 512 << 20
+
+	// 3. WRITE BUFFERS (Memtables)
+	// We use 3 tables of 64MB each. This allows Badger to handle
+	// write spikes without blocking. (~192MB total)
+	options.NumMemtables = 40
+	options.MemTableSize = 1024 << 20
+
+	options.ValueThreshold = 1024
+	options.SyncWrites = false
+
+	cache, err := badger.Open(options)
+	if err != nil {
+		return nil, err
+	}
+	bc := &Badger{
+		cache: cache,
+		stats: &CacheStats{
+			Hits:                   atomic.Uint64{},
+			TotalGets:              atomic.Uint64{},
+			TotalPuts:              atomic.Uint64{},
+			ReWrites:               atomic.Uint64{},
+			Expired:                atomic.Uint64{},
+			ShardWiseActiveEntries: atomic.Uint64{},
+			LatencyTracker:         filecache.NewLatencyTracker(),
+		},
+	}
+
+	if logStats {
+		go func() {
+			sleepDuration := 10 * time.Second
+			var prevTotalGets, prevTotalPuts uint64
+			for {
+				time.Sleep(sleepDuration)
+
+				totalGets := bc.stats.TotalGets.Load()
+				totalPuts := bc.stats.TotalPuts.Load()
+				getsPerSec := float64(totalGets-prevTotalGets) / sleepDuration.Seconds()
+				putsPerSec := float64(totalPuts-prevTotalPuts) / sleepDuration.Seconds()
+
+				log.Info().Msgf("Shard %d HitRate: %v", 0, cache.BlockCacheMetrics().Hits())
+				log.Info().Msgf("Shard %d Expired: %v", 0, cache.BlockCacheMetrics().Misses())
+				log.Info().Msgf("Shard %d Total: %v", 0, cache.BlockCacheMetrics().KeysEvicted())
+				log.Info().Msgf("Gets/sec: %v", getsPerSec)
+				log.Info().Msgf("Puts/sec: %v", putsPerSec)
+
+				getP25, getP50, getP99 := bc.stats.LatencyTracker.GetLatencyPercentiles()
+				putP25, putP50, putP99 := bc.stats.LatencyTracker.PutLatencyPercentiles()
+
+				log.Info().Msgf("Get Count: %v", totalGets)
+				log.Info().Msgf("Put Count: %v", totalPuts)
+				log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99)
+				log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99)
+
+				prevTotalGets = totalGets
+				prevTotalPuts = totalPuts
+			}
+		}()
+	}
+
+	return bc, nil
+}
+
+func (b *Badger) Put(key string, value []byte, exptimeInMinutes uint16) error {
+
+	start := time.Now()
+	defer func() {
+		b.stats.LatencyTracker.RecordPut(time.Since(start))
+	}()
+
+	b.stats.TotalPuts.Add(1)
+	err := b.cache.Update(func(txn *badger.Txn) error {
+		entry := badger.NewEntry([]byte(key), value).WithTTL(time.Duration(exptimeInMinutes) * time.Minute)
+		err := txn.SetEntry(entry)
+		return err
+	})
+	return err
+}
+
+func (b *Badger) Get(key string) ([]byte, bool, bool) {
+
+	start := time.Now()
+	defer func() {
+		b.stats.LatencyTracker.RecordGet(time.Since(start))
+	}()
+
+	b.stats.TotalGets.Add(1)
+
+	val := make([]byte, 0)
+	err := b.cache.View(func(txn *badger.Txn) error {
+		item, err := txn.Get([]byte(key))
+		if err != nil {
+			return err
+		}
+		val, err = item.ValueCopy(val)
+
+		if err != nil {
+			b.stats.Hits.Add(1)
+		}
+
+		return err
+	})
+	return val, err != badger.ErrKeyNotFound, false
+}
+
+func (b *Badger) Close() error {
+	return b.cache.Close()
+}
diff --git a/flashring/internal/cache/cache.go b/flashring/internal/cache/cache.go
new file mode 100644
index 00000000..74755251
--- /dev/null
+++ b/flashring/internal/cache/cache.go
@@ -0,0 +1,457 @@
+package internal
+
+import (
+	"fmt"
+	"strconv"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/maths"
+	filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard"
+	"github.com/cespare/xxhash/v2"
+	"github.com/rs/zerolog/log"
+)
+
+/*
+ Each shard can keep 67M keys
+ With Round = 1, expected collision (67M)^2/(2*2^62) = 4.87×10^-4
+*/
+
+const (
+	ROUNDS         = 1
+	KEYS_PER_SHARD = (1 << 26)
+	BLOCK_SIZE     = 4096
+)
+
+var (
+	ErrNumShardLessThan1            = fmt.Errorf("num shards must be greater than 0")
+	ErrKeysPerShardLessThan1        = fmt.Errorf("keys per shard must be greater than 0")
+	ErrKeysPerShardGreaterThan67M   = fmt.Errorf("keys per shard must be less than 67M")
+	ErrMemtableSizeLessThan1        = fmt.Errorf("memtable size must be greater than 0")
+	ErrMemtableSizeGreaterThan1GB   = fmt.Errorf("memtable size must be less than 1GB")
+	ErrMemtableSizeNotMultipleOf4KB = fmt.Errorf("memtable size must be a multiple of 4KB")
+	ErrFileSizeLessThan1            = fmt.Errorf("file size must be greater than 0")
+	ErrFileSizeNotMultipleOf4KB     = fmt.Errorf("file size must be a multiple of 4KB")
+	Seed                            = xxhash.Sum64String(strconv.Itoa(int(time.Now().UnixNano())))
+)
+
+type WrapCache struct {
+	shards          []*filecache.ShardCache
+	shardLocks      []sync.RWMutex
+	predictor       *maths.Predictor
+	stats           []*CacheStats
+	metricsRecorder MetricsRecorder
+}
+
+type CacheStats struct {
+	Hits                   atomic.Uint64
+	TotalGets              atomic.Uint64
+	TotalPuts              atomic.Uint64
+	ReWrites               atomic.Uint64
+	Expired                atomic.Uint64
+	ShardWiseActiveEntries atomic.Uint64
+	LatencyTracker         *filecache.LatencyTracker
+	BatchTracker           *filecache.BatchTracker
+}
+
+// MetricsRecorder is an interface for recording metrics from the cache
+// Implement this interface to receive metrics from the cache layer
+type MetricsRecorder interface {
+	// Input parameters
+	SetShards(value int)
+	SetKeysPerShard(value int)
+	SetReadWorkers(value int)
+	SetWriteWorkers(value int)
+	SetPlan(value string)
+
+	// Observation metrics
+	RecordRP99(value time.Duration)
+	RecordRP50(value time.Duration)
+	RecordRP25(value time.Duration)
+	RecordWP99(value time.Duration)
+	RecordWP50(value time.Duration)
+	RecordWP25(value time.Duration)
+	RecordRThroughput(value float64)
+	RecordWThroughput(value float64)
+	RecordHitRate(value float64)
+}
+
+type WrapCacheConfig struct {
+	NumShards             int
+	KeysPerShard          int
+	FileSize              int64
+	MemtableSize          int32
+	ReWriteScoreThreshold float32
+	GridSearchEpsilon     float64
+	SampleDuration        time.Duration
+
+	// Batching reads
+	EnableBatching    bool
+	BatchWindowMicros int // in microseconds
+	MaxBatchSize      int
+
+	// Optional metrics recorder
+	MetricsRecorder MetricsRecorder
+
+	//Badger
+	MountPoint string
+}
+
+func NewWrapCache(config WrapCacheConfig, mountPoint string, logStats bool) (*WrapCache, error) {
+	if config.NumShards <= 0 {
+		return nil, ErrNumShardLessThan1
+	}
+	if config.KeysPerShard <= 0 {
+		return nil, ErrKeysPerShardLessThan1
+	}
+	if config.KeysPerShard > KEYS_PER_SHARD {
+		return nil, ErrKeysPerShardGreaterThan67M
+	}
+	if config.MemtableSize <= 0 {
+		return nil, ErrMemtableSizeLessThan1
+	}
+	if config.MemtableSize > 1024*1024*1024 {
+		return nil, ErrMemtableSizeGreaterThan1GB
+	}
+	if config.MemtableSize%BLOCK_SIZE != 0 {
+		return nil, ErrMemtableSizeNotMultipleOf4KB
+	}
+	if config.FileSize <= 0 {
+		return nil, ErrFileSizeLessThan1
+	}
+	if config.FileSize%BLOCK_SIZE != 0 {
+		return nil, ErrFileSizeNotMultipleOf4KB
+	}
+	weights := []maths.WeightTuple{
+		{
+			WFreq: 0.1,
+			WLA:   0.1,
+		},
+		{
+			WFreq: 0.45,
+			WLA:   0.1,
+		},
+		{
+			WFreq: 0.9,
+			WLA:   0.1,
+		},
+		{
+			WFreq: 0.1,
+			WLA:   0.45,
+		},
+		{
+			WFreq: 0.45,
+			WLA:   0.45,
+		},
+		{
+			WFreq: 0.9,
+			WLA:   0.45,
+		},
+		{
+			WFreq: 0.1,
+			WLA:   0.9,
+		},
+		{
+			WFreq: 0.45,
+			WLA:   0.9,
+		},
+		{
+			WFreq: 0.9,
+			WLA:   0.9,
+		},
+	}
+	MaxMemTableCount := config.FileSize / int64(config.MemtableSize)
+	predictor := maths.NewPredictor(maths.PredictorConfig{
+		ReWriteScoreThreshold: config.ReWriteScoreThreshold,
+		Weights:               weights,
+		SampleDuration:        config.SampleDuration,
+		MaxMemTableCount:      uint32(MaxMemTableCount),
+		GridSearchEpsilon:     config.GridSearchEpsilon,
+	})
+
+	batchWindow := time.Duration(0)
+	if config.EnableBatching && config.BatchWindowMicros > 0 {
+		batchWindow = time.Duration(config.BatchWindowMicros) * time.Microsecond
+	}
+	shardLocks := make([]sync.RWMutex, config.NumShards)
+	shards := make([]*filecache.ShardCache, config.NumShards)
+	for i := 0; i < config.NumShards; i++ {
+		shards[i] = filecache.NewShardCache(filecache.ShardCacheConfig{
+			MemtableSize:        config.MemtableSize,
+			Rounds:              ROUNDS,
+			RbInitial:           config.KeysPerShard,
+			RbMax:               config.KeysPerShard,
+			DeleteAmortizedStep: 10000,
+			MaxFileSize:         int64(config.FileSize),
+			BlockSize:           BLOCK_SIZE,
+			Directory:           mountPoint,
+			Predictor:           predictor,
+
+			//batching reads
+			EnableBatching: config.EnableBatching,
+			BatchWindow:    batchWindow,
+			MaxBatchSize:   config.MaxBatchSize,
+		}, &shardLocks[i])
+	}
+
+	stats := make([]*CacheStats, config.NumShards)
+	for i := 0; i < config.NumShards; i++ {
+		stats[i] = &CacheStats{LatencyTracker: filecache.NewLatencyTracker(), BatchTracker: filecache.NewBatchTracker()}
+	}
+	wc := &WrapCache{
+		shards:          shards,
+		shardLocks:      shardLocks,
+		predictor:       predictor,
+		stats:           stats,
+		metricsRecorder: config.MetricsRecorder,
+	}
+	if logStats {
+
+		go func() {
+			sleepDuration := 10 * time.Second
+			// perShardPrevTotalGets := make([]uint64, config.NumShards)
+			// perShardPrevTotalPuts := make([]uint64, config.NumShards)
+			combinedPrevTotalGets := uint64(0)
+			combinedPrevTotalPuts := uint64(0)
+			for {
+				time.Sleep(sleepDuration)
+
+				combinedTotalGets := uint64(0)
+				combinedTotalPuts := uint64(0)
+				combinedHits := uint64(0)
+				combinedReWrites := uint64(0)
+				combinedExpired := uint64(0)
+				combinedShardWiseActiveEntries := uint64(0)
+				for i := 0; i < config.NumShards; i++ {
+					combinedTotalGets += wc.stats[i].TotalGets.Load()
+					combinedTotalPuts += wc.stats[i].TotalPuts.Load()
+					combinedHits += wc.stats[i].Hits.Load()
+					combinedReWrites += wc.stats[i].ReWrites.Load()
+					combinedExpired += wc.stats[i].Expired.Load()
+					combinedShardWiseActiveEntries += wc.stats[i].ShardWiseActiveEntries.Load()
+				}
+
+				combinedHitRate := float64(0)
+				if combinedTotalGets > 0 {
+					combinedHitRate = float64(combinedHits) / float64(combinedTotalGets)
+				}
+
+				log.Info().Msgf("Combined HitRate: %v", combinedHitRate)
+				log.Info().Msgf("Combined ReWrites: %v", combinedReWrites)
+				log.Info().Msgf("Combined Expired: %v", combinedExpired)
+				log.Info().Msgf("Combined Total: %v", combinedTotalGets)
+				log.Info().Msgf("Combined Puts/sec: %v", float64(combinedTotalPuts-combinedPrevTotalPuts)/float64(sleepDuration.Seconds()))
+				log.Info().Msgf("Combined Gets/sec: %v", float64(combinedTotalGets-combinedPrevTotalGets)/float64(sleepDuration.Seconds()))
+				log.Info().Msgf("Combined ShardWiseActiveEntries: %v", combinedShardWiseActiveEntries)
+
+				combinedGetP25, combinedGetP50, combinedGetP99 := wc.stats[0].LatencyTracker.GetLatencyPercentiles()
+				combinedPutP25, combinedPutP50, combinedPutP99 := wc.stats[0].LatencyTracker.PutLatencyPercentiles()
+
+				log.Info().Msgf("Combined Get Count: %v", combinedTotalGets)
+				log.Info().Msgf("Combined Put Count: %v", combinedTotalPuts)
+				log.Info().Msgf("Combined Get Latencies - P25: %v, P50: %v, P99: %v", combinedGetP25, combinedGetP50, combinedGetP99)
+				log.Info().Msgf("Combined Put Latencies - P25: %v, P50: %v, P99: %v", combinedPutP25, combinedPutP50, combinedPutP99)
+
+				combinedGetBatchP25, combinedGetBatchP50, combinedGetBatchP99 := wc.shards[0].Stats.BatchTracker.GetBatchSizePercentiles()
+				log.Info().Msgf("Combined Get Batch Sizes - P25: %v, P50: %v, P99: %v", combinedGetBatchP25, combinedGetBatchP50, combinedGetBatchP99)
+
+				// Send metrics to the recorder if configured
+				if wc.metricsRecorder != nil {
+					rThroughput := float64(combinedTotalGets-combinedPrevTotalGets) / sleepDuration.Seconds()
+					wThroughput := float64(combinedTotalPuts-combinedPrevTotalPuts) / sleepDuration.Seconds()
+
+					wc.metricsRecorder.RecordRP25(combinedGetP25)
+					wc.metricsRecorder.RecordRP50(combinedGetP50)
+					wc.metricsRecorder.RecordRP99(combinedGetP99)
+					wc.metricsRecorder.RecordWP25(combinedPutP25)
+					wc.metricsRecorder.RecordWP50(combinedPutP50)
+					wc.metricsRecorder.RecordWP99(combinedPutP99)
+					wc.metricsRecorder.RecordRThroughput(rThroughput)
+					wc.metricsRecorder.RecordWThroughput(wThroughput)
+					wc.metricsRecorder.RecordHitRate(combinedHitRate)
+				}
+
+				combinedPrevTotalGets = combinedTotalGets
+				combinedPrevTotalPuts = combinedTotalPuts
+
+				/* disabling per shard stats for now
+				for i := 0; i < config.NumShards; i++ {
+					log.Info().Msgf("Shard %d has %d active entries", i, wc.stats[i].ShardWiseActiveEntries.Load())
+					total := wc.stats[i].TotalGets.Load()
+					hits := wc.stats[i].Hits.Load()
+					hitRate := float64(0)
+					if total > 0 {
+						hitRate = float64(hits) / float64(total)
+					}
+					log.Info().Msgf("Shard %d HitRate: %v", i, hitRate)
+					log.Info().Msgf("Shard %d ReWrites: %v", i, wc.stats[i].ReWrites.Load())
+					log.Info().Msgf("Shard %d Expired: %v", i, wc.stats[i].Expired.Load())
+					log.Info().Msgf("Shard %d Total: %v", i, total)
+					log.Info().Msgf("Gets/sec: %v", float64(total-perShardPrevTotalGets[i])/float64(sleepDuration.Seconds()))
+					log.Info().Msgf("Puts/sec: %v", float64(wc.stats[i].TotalPuts.Load()-perShardPrevTotalPuts[i])/float64(sleepDuration.Seconds()))
+					perShardPrevTotalGets[i] = total
+					perShardPrevTotalPuts[i] = wc.stats[i].TotalPuts.Load()
+
+					getP25, getP50, getP99 := wc.stats[i].LatencyTracker.GetLatencyPercentiles()
+					putP25, putP50, putP99 := wc.stats[i].LatencyTracker.PutLatencyPercentiles()
+
+					log.Info().Msgf("Get Count: %v", wc.stats[i].TotalGets.Load())
+					log.Info().Msgf("Put Count: %v", wc.stats[i].TotalPuts.Load())
+					log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99)
+					log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99)
+
+				}
+				*/
+				log.Info().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive())
+			}
+		}()
+	}
+	return wc, nil
+}
+
+func (wc *WrapCache) PutLL(key string, value []byte, exptimeInMinutes uint16) error {
+
+	h32 := wc.Hash(key)
+	shardIdx := h32 % uint32(len(wc.shards))
+	start := time.Now()
+
+	result := filecache.ErrorPool.Get().(chan error)
+
+	wc.shards[shardIdx].WriteCh <- &filecache.WriteRequestV2{
+		Key:              key,
+		Value:            value,
+		ExptimeInMinutes: exptimeInMinutes,
+		Result:           result,
+	}
+
+	if h32%100 < 10 {
+		wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries()))
+	}
+
+	op := <-result
+	filecache.ErrorPool.Put(result)
+	wc.stats[shardIdx].TotalPuts.Add(1)
+	wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start))
+	return op
+}
+
+func (wc *WrapCache) GetLL(key string) ([]byte, bool, bool) {
+	h32 := wc.Hash(key)
+	shardIdx := h32 % uint32(len(wc.shards))
+
+	start := time.Now()
+
+	found, value, _, expired, needsSlowPath := wc.shards[shardIdx].GetFastPath(key)
+
+	if !needsSlowPath {
+		if found && !expired {
+			wc.stats[shardIdx].Hits.Add(1)
+		} else if expired {
+			wc.stats[shardIdx].Expired.Add(1)
+		}
+
+		wc.stats[shardIdx].TotalGets.Add(1)
+		wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start))
+		return value, found, expired
+	}
+
+	result := filecache.ReadResultPool.Get().(chan filecache.ReadResultV2)
+
+	req := filecache.ReadRequestPool.Get().(*filecache.ReadRequestV2)
+	req.Key = key
+	req.Result = result
+
+	wc.shards[shardIdx].ReadCh <- req
+	op := <-result
+
+	filecache.ReadResultPool.Put(result)
+	filecache.ReadRequestPool.Put(req)
+
+	if op.Found && !op.Expired {
+		wc.stats[shardIdx].Hits.Add(1)
+	}
+	if op.Expired {
+		wc.stats[shardIdx].Expired.Add(1)
+	}
+	wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start))
+	wc.stats[shardIdx].TotalGets.Add(1)
+
+	return op.Data, op.Found, op.Expired
+}
+
+func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) error {
+
+	h32 := wc.Hash(key)
+	shardIdx := h32 % uint32(len(wc.shards))
+
+	start := time.Now()
+	defer func() {
+		wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start))
+	}()
+
+	wc.shardLocks[shardIdx].Lock()
+	defer wc.shardLocks[shardIdx].Unlock()
+	wc.putLocked(shardIdx, h32, key, value, exptimeInMinutes)
+	return nil
+}
+
+func (wc *WrapCache) putLocked(shardIdx uint32, h32 uint32, key string, value []byte, exptimeInMinutes uint16) {
+	wc.shards[shardIdx].Put(key, value, exptimeInMinutes)
+	wc.stats[shardIdx].TotalPuts.Add(1)
+	if h32%100 < 10 {
+		wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries()))
+	}
+}
+
+func (wc *WrapCache) Get(key string) ([]byte, bool, bool) {
+	h32 := wc.Hash(key)
+	shardIdx := h32 % uint32(len(wc.shards))
+
+	start := time.Now()
+	defer func() {
+		wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start))
+	}()
+
+	var keyFound bool
+	var val []byte
+	var remainingTTL uint16
+	var expired bool
+	var shouldReWrite bool
+	if wc.shards[shardIdx].BatchReader != nil {
+		reqChan := make(chan filecache.ReadResultV2, 1)
+		wc.shards[shardIdx].BatchReader.Requests <- &filecache.ReadRequestV2{
+			Key:    key,
+			Result: reqChan,
+		}
+		result := <-reqChan
+
+		keyFound, val, remainingTTL, expired, shouldReWrite = result.Found, result.Data, result.TTL, result.Expired, result.ShouldRewrite
+	} else {
+		wc.shardLocks[shardIdx].RLock()
+		defer wc.shardLocks[shardIdx].RUnlock()
+		keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key)
+	}
+
+	if keyFound && !expired {
+		wc.stats[shardIdx].Hits.Add(1)
+	}
+	if expired {
+		wc.stats[shardIdx].Expired.Add(1)
+	}
+	wc.stats[shardIdx].TotalGets.Add(1)
+	if shouldReWrite {
+		wc.stats[shardIdx].ReWrites.Add(1)
+		wc.putLocked(shardIdx, h32, key, val, remainingTTL)
+	}
+	wc.predictor.Observe(float64(wc.stats[shardIdx].Hits.Load()) / float64(wc.stats[shardIdx].TotalGets.Load()))
+	return val, keyFound, expired
+}
+
+func (wc *WrapCache) Hash(key string) uint32 {
+	return uint32(xxhash.Sum64String(key) ^ Seed)
+}
+
+func (wc *WrapCache) GetShardCache(shardIdx int) *filecache.ShardCache {
+	return wc.shards[shardIdx]
+}
diff --git a/flashring/internal/cache/freecache.go b/flashring/internal/cache/freecache.go
new file mode 100644
index 00000000..df0f0f75
--- /dev/null
+++ b/flashring/internal/cache/freecache.go
@@ -0,0 +1,96 @@
+package internal
+
+import (
+	"runtime/debug"
+	"sync/atomic"
+	"time"
+
+	filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard"
+	"github.com/coocood/freecache"
+	"github.com/rs/zerolog/log"
+)
+
+type Freecache struct {
+	cache *freecache.Cache
+	stats *CacheStats
+}
+
+func NewFreecache(config WrapCacheConfig, logStats bool) (*Freecache, error) {
+
+	cache := freecache.NewCache(int(config.FileSize))
+	debug.SetGCPercent(20)
+
+	fc := &Freecache{
+		cache: cache,
+		stats: &CacheStats{
+			Hits:                   atomic.Uint64{},
+			TotalGets:              atomic.Uint64{},
+			TotalPuts:              atomic.Uint64{},
+			ReWrites:               atomic.Uint64{},
+			Expired:                atomic.Uint64{},
+			ShardWiseActiveEntries: atomic.Uint64{},
+			LatencyTracker:         filecache.NewLatencyTracker(),
+		},
+	}
+
+	if logStats {
+		go func() {
+			sleepDuration := 10 * time.Second
+			var prevTotalGets, prevTotalPuts uint64
+			for {
+				time.Sleep(sleepDuration)
+
+				totalGets := fc.stats.TotalGets.Load()
+				totalPuts := fc.stats.TotalPuts.Load()
+				getsPerSec := float64(totalGets-prevTotalGets) / sleepDuration.Seconds()
+				putsPerSec := float64(totalPuts-prevTotalPuts) / sleepDuration.Seconds()
+
+				log.Info().Msgf("Shard %d HitRate: %v", 0, cache.HitRate())
+				log.Info().Msgf("Shard %d Expired: %v", 0, cache.ExpiredCount())
+				log.Info().Msgf("Shard %d Total: %v", 0, cache.EntryCount())
+				log.Info().Msgf("Gets/sec: %v", getsPerSec)
+				log.Info().Msgf("Puts/sec: %v", putsPerSec)
+
+				getP25, getP50, getP99 := fc.stats.LatencyTracker.GetLatencyPercentiles()
+				putP25, putP50, putP99 := fc.stats.LatencyTracker.PutLatencyPercentiles()
+
+				log.Info().Msgf("Get Count: %v", totalGets)
+				log.Info().Msgf("Put Count: %v", totalPuts)
+				log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99)
+				log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99)
+
+				prevTotalGets = totalGets
+				prevTotalPuts = totalPuts
+			}
+		}()
+	}
+
+	return fc, nil
+
+}
+
+func (c *Freecache) Put(key string, value []byte, exptimeInMinutes uint16) error {
+	start := time.Now()
+	defer func() {
+		c.stats.LatencyTracker.RecordPut(time.Since(start))
+	}()
+
+	c.stats.TotalPuts.Add(1)
+	c.cache.Set([]byte(key), value, int(exptimeInMinutes)*60)
+	return nil
+}
+
+func (c *Freecache) Get(key string) ([]byte, bool, bool) {
+	start := time.Now()
+	defer func() {
+		c.stats.LatencyTracker.RecordGet(time.Since(start))
+	}()
+
+	c.stats.TotalGets.Add(1)
+	val, err := c.cache.Get([]byte(key))
+	if err != nil {
+		return nil, false, false
+	}
+	c.stats.Hits.Add(1)
+	return val, true, false
+}
diff --git a/flashring/internal/fs/README.md b/flashring/internal/fs/README.md
new file mode 100644
index 00000000..dac08884
--- /dev/null
+++ b/flashring/internal/fs/README.md
@@ -0,0 +1,144 @@
+# Memtable Performance Benchmark (DirectIO + Go)
+
+This benchmark evaluates a single-threaded, append-only, `O_DIRECT`-backed memtable implementation in Go. The design mimics ScyllaDB’s core-local memtables and flush logic, emphasizing high throughput and stable latencies.
+
+## 🔧 Benchmark Configuration
+
+- **CPU**: AMD Ryzen 7 9800X3D
+- **Memtable Write Size**: 16KB per record
+- **Concurrency**: Single-threaded (8 goroutines pipelined into one locked OS thread)
+- **Flush Trigger**: Memtable capacity exceeded
+- **IO Mode**: DirectIO (`O_DIRECT`), Append-only
+- **Benchmark Tool**: `go test -bench`
+
+---
+
+## 📊 Performance Overview (NO_DSYNC vs DSYNC)
+
+| Capacity | RPS (NO_DSYNC) | Latency (ns/op) | RPS (DSYNC) | Latency (ns/op) |
+|---------:|---------------:|----------------:|------------:|----------------:|
+| 64KB     |       785       |     1,273,903    |     482     |   2,073,246      |
+| 128KB    |      1,568      |       637,656    |     970     |   1,030,739      |
+| 256KB    |      3,214      |       311,103    |    1,934     |     517,106      |
+| 512KB    |      6,499      |       153,871    |    3,930     |     254,432      |
+| 1MB      |     12,769      |        78,317    |    7,659     |     130,561      |
+| 2MB      |     25,013      |        39,979    |   15,186     |      65,849      |
+| 4MB      |     46,907      |        21,319    |   24,932     |      40,110      |
+| 8MB      |     84,494      |        11,835    |   41,206     |      24,268      |
+| 16MB     |    138,896      |         7,200    |   50,840     |      19,670      |
+| 32MB     |    170,877      |         5,852    |   66,387     |      15,063      |
+| 64MB     |    213,214      |         4,690    |   73,646     |      13,579      |
+| 128MB    |    250,319      |         3,995    |   76,413     |      13,087      |
+| 256MB    |     88,229      |        11,334    |   76,672     |      13,043      |
+| 512MB    |     81,517      |        12,267    |   77,174     |      12,958      |
+| 1GB      |     83,717      |        11,945    |   82,203     |      12,165      |
+
+---
+
+## 📉 Throughput vs Latency (Log Scale)
+
+![Throughput vs Latency](./profile.png)
+
+> Left axis: Throughput in MB/s (log scale)  
+> Right axis: Latency in ns/op (log scale)  
+> X-axis: Memtable size (KB, log scale)
+
+---
+
+## 🔁 Flush Frequency Trend
+
+- Smaller memtables trigger frequent flushes, degrading both throughput and latency.
+- Flush frequency stabilizes beyond **8–16MB**, where throughput growth starts to plateau.
+
+---
+
+## 🔒 `runtime.LockOSThread()` Impact
+
+To ensure predictable syscall behavior with `O_DIRECT` (DirectIO) and aligned memory buffers, we benchmarked with and without `runtime.LockOSThread()`.
+
+| Capacity | RPS (No Lock) | Latency (ns/op) | RPS (LockOSThread) | Latency (ns/op) |
+|---------:|--------------:|----------------:|--------------------:|----------------:|
+| 128MB    | ~220,000      | ~5,500          | **250,319**         | **3,995**       |
+| 256MB    | ~85,000       | ~11,000         | **88,229**          | **11,334**      |
+| 1GB      | ~81,000       | ~12,000         | **83,717**          | **11,945**      |
+
+✅ **Locking OS threads**:
+- Reduces context-switching overhead
+- Ensures aligned buffers remain valid (important for `O_DIRECT`)
+- Prevents `EINVAL` during write() syscalls
+- Better latency consistency
+
+---
+
+## 🧠 Final Conclusions
+
+- **Memtable Size Matters**: Performance improves linearly with size up to 128MB. Beyond that, throughput plateaus.
+- **DSYNC vs NO_DSYNC**: DSYNC incurs 1.5–2x higher latency at small sizes but converges at 512MB+. Use DSYNC if durability is essential.
+- **DirectIO Requirements**: `runtime.LockOSThread()` is highly recommended for DMA-safe writes, especially in single-threaded core-local memtable designs.
+- **Flush Design**: Scylla-like batching improves throughput. Flushes can be run on the same core if they yield properly between IO calls.
+
+---
+
+## Raw Stats
+
+```bash
+Running tool: /usr/local/go/bin/go test -benchmem -run=^$ -bench ^BenchmarkMemtable_Write16KBWorkload$ github.com/Meesho/BharatMLStack/ssd-cache/internal/memtable
+
+goos: linux
+goarch: amd64
+pkg: github.com/Meesho/BharatMLStack/ssd-cache/internal/memtable
+cpu: AMD Ryzen 7 9800X3D 8-Core Processor           
+BenchmarkMemtable_Write16KBWorkload/64KB-NO-DSYNC-8         	     950	   1273903 ns/op	  15532032 file_size	       237.0 flushes	       195.8 flushes/sec	       785.0 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/128KB-NO-DSYNC-8        	    2079	    637656 ns/op	  33947648 file_size	       259.0 flushes	       195.4 flushes/sec	      1568 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/256KB-NO-DSYNC-8        	    4028	    311103 ns/op	  65798144 file_size	       251.0 flushes	       200.3 flushes/sec	      3214 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/512KB-NO-DSYNC-8        	    8194	    153871 ns/op	 134217728 file_size	       256.0 flushes	       203.0 flushes/sec	      6499 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/1024KB-NO-DSYNC-8       	   15468	     78317 ns/op	 252706816 file_size	       241.0 flushes	       198.9 flushes/sec	     12769 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/2048KB-NO-DSYNC-8       	   30043	     39979 ns/op	 490733568 file_size	       234.0 flushes	       194.8 flushes/sec	     25013 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/4096KB-NO-DSYNC-8       	   56930	     21319 ns/op	 931135488 file_size	       222.0 flushes	       182.9 flushes/sec	     46907 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/8192KB-NO-DSYNC-8       	  103630	     11835 ns/op	1694498816 file_size	       202.0 flushes	       164.7 flushes/sec	     84494 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/16384KB-NO-DSYNC-8      	  175530	      7200 ns/op	2868903936 file_size	       171.0 flushes	       135.3 flushes/sec	    138896 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/32768KB-NO-DSYNC-8      	  271888	      5852 ns/op	4429185024 file_size	       132.0 flushes	        82.96 flushes/sec	    170877 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/65536KB-NO-DSYNC-8      	  235149	      4690 ns/op	3825205248 file_size	        57.00 flushes	        51.68 flushes/sec	    213214 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/131072KB-NO-DSYNC-8     	  304314	      3995 ns/op	4966055936 file_size	        37.00 flushes	        30.43 flushes/sec	    250319 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/262144KB-NO-DSYNC-8     	  542956	     11334 ns/op	8858370048 file_size	        33.00 flushes	         5.362 flushes/sec	     88229 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/524288KB-NO-DSYNC-8     	  540237	     12267 ns/op	8589934592 file_size	        16.00 flushes	         2.414 flushes/sec	     81517 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/1048576KB-NO-DSYNC-8    	  555834	     11945 ns/op	8589934592 file_size	         8.000 flushes	         1.205 flushes/sec	     83717 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/64KB-DSYNC-8            	     591	   2073246 ns/op	   9633792 file_size	       147.0 flushes	       120.0 flushes/sec	       482.3 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/128KB-DSYNC-8           	    1215	   1030739 ns/op	  19791872 file_size	       151.0 flushes	       120.6 flushes/sec	       970.2 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/256KB-DSYNC-8           	    2455	    517106 ns/op	  40108032 file_size	       153.0 flushes	       120.5 flushes/sec	      1934 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/512KB-DSYNC-8           	    5034	    254432 ns/op	  82313216 file_size	       157.0 flushes	       122.6 flushes/sec	      3930 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/1024KB-DSYNC-8          	   10000	    130561 ns/op	 163577856 file_size	       156.0 flushes	       119.5 flushes/sec	      7659 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/2048KB-DSYNC-8          	   18921	     65849 ns/op	 308281344 file_size	       147.0 flushes	       118.0 flushes/sec	     15186 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/4096KB-DSYNC-8          	   30013	     40110 ns/op	 490733568 file_size	       117.0 flushes	        97.19 flushes/sec	     24932 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/8192KB-DSYNC-8          	   49298	     24268 ns/op	 805306368 file_size	        96.00 flushes	        80.24 flushes/sec	     41206 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/16384KB-DSYNC-8         	   66595	     19670 ns/op	1090519040 file_size	        65.00 flushes	        49.62 flushes/sec	     50840 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/32768KB-DSYNC-8         	   91797	     15063 ns/op	1476395008 file_size	        44.00 flushes	        31.82 flushes/sec	     66387 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/65536KB-DSYNC-8         	   97675	     13579 ns/op	1543503872 file_size	        23.00 flushes	        17.34 flushes/sec	     73646 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/131072KB-DSYNC-8        	   92379	     13087 ns/op	1476395008 file_size	        11.00 flushes	         9.099 flushes/sec	     76413 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/262144KB-DSYNC-8        	  561945	     13043 ns/op	9126805504 file_size	        34.00 flushes	         4.639 flushes/sec	     76672 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/524288KB-DSYNC-8        	  562118	     12958 ns/op	9126805504 file_size	        17.00 flushes	         2.334 flushes/sec	     77174 records/sec	       0 B/op	       0 allocs/op
+BenchmarkMemtable_Write16KBWorkload/1048576KB-DSYNC-8       	  559707	     12165 ns/op	8589934592 file_size	         8.000 flushes	         1.175 flushes/sec	     82203 records/sec	       0 B/op	       0 allocs/op
+PASS
+ok  	github.com/Meesho/BharatMLStack/ssd-cache/internal/memtable	78.589s
+```
+
+## 🧪 Design Inspiration
+
+This experiment was inspired by **ScyllaDB’s core-local architecture**:
+- Per-core memtables
+- Flush triggered by memory thresholds
+- IO parallelism via sharded threads
+
+This design brings similar performance characteristics to a Go-based system using low-level syscalls and memory alignment.
+
+---
+
+## 📂 Future Work
+
+- Add WAL benchmarking
+- Integrate `io_uring` for flush batching
+- Explore compression + zero-copy read path
+
+---
+
+Made with ❤️ by [BharatMLStack](https://github.com/Meesho/BharatMLStack)
diff --git a/flashring/internal/fs/aligned_page.go b/flashring/internal/fs/aligned_page.go
new file mode 100644
index 00000000..c499ae36
--- /dev/null
+++ b/flashring/internal/fs/aligned_page.go
@@ -0,0 +1,54 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"runtime/pprof"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	PROT_READ   = unix.PROT_READ
+	PROT_WRITE  = unix.PROT_WRITE
+	MAP_PRIVATE = unix.MAP_PRIVATE
+	MAP_ANON    = unix.MAP_ANON
+)
+
+var mmapProf = pprof.NewProfile("mmap") // will show up in /debug/pprof/
+
+type AlignedPage struct {
+	Buf  []byte
+	mmap []byte
+}
+
+func NewAlignedPage(pageSize int) *AlignedPage {
+	b, err := unix.Mmap(-1, 0, pageSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON)
+	if err != nil {
+		panic(err)
+	}
+	if pageSize > 0 {
+		mmapProf.Add(&b[0], pageSize) // attribute sz bytes to this callsite
+	}
+	return &AlignedPage{
+		Buf:  b,
+		mmap: b,
+	}
+}
+
+func Unmap(p *AlignedPage) error {
+	if len(p.mmap) > 0 {
+		mmapProf.Remove(&p.mmap[0]) // release from custom profile
+	}
+	if p.mmap != nil {
+		err := unix.Munmap(p.mmap)
+		if err != nil {
+			return err
+		}
+		p.mmap = nil
+	}
+	p.Buf = nil
+	p.mmap = nil
+	return nil
+}
diff --git a/flashring/internal/fs/file_bench_test.go b/flashring/internal/fs/file_bench_test.go
new file mode 100644
index 00000000..2d3da83a
--- /dev/null
+++ b/flashring/internal/fs/file_bench_test.go
@@ -0,0 +1,161 @@
+package fs
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+func BenchmarkPwrite(b *testing.B) {
+	tmpDir := b.TempDir()
+	filename := filepath.Join(tmpDir, "bench_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024 * 1024, // 1GB
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Create aligned buffer for DirectIO
+	data := createAlignedBuffer(4096, 4096)
+	for i := 0; i < 4096; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := raf.Pwrite(data)
+		if err != nil {
+			b.Fatalf("Pwrite failed: %v", err)
+		}
+	}
+}
+
+func BenchmarkPread(b *testing.B) {
+	tmpDir := b.TempDir()
+	filename := filepath.Join(tmpDir, "bench_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024 * 1024, // 1GB
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Pre-populate with data using aligned buffer
+	writeData := createAlignedBuffer(4096, 4096)
+	for i := 0; i < 4096; i++ {
+		writeData[i] = byte(i % 256)
+	}
+
+	for i := 0; i < 200000; i++ {
+		_, err := raf.Pwrite(writeData)
+		if err != nil {
+			b.Fatalf("Pwrite failed: %v", err)
+		}
+	}
+
+	readData := createAlignedBuffer(4096, 4096)
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		offset := int64((i % 200000) * 4096)
+		_, err := raf.Pread(offset, readData)
+		if err != nil {
+			b.Fatalf("Pread failed: %v", err)
+		}
+	}
+}
+
+// Benchmarks
+func BenchmarkWrapAppendFile_Pwrite(b *testing.B) {
+	tmpDir := b.TempDir()
+	filename := filepath.Join(tmpDir, "bench_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024 * 1024, // 1GB
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Create aligned buffer for DirectIO
+	data := createAlignedBuffer(4096, 4096)
+	for i := 0; i < 4096; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		_, err := waf.Pwrite(data)
+		if err != nil {
+			b.Fatalf("Pwrite failed: %v", err)
+		}
+	}
+}
+
+func BenchmarkWrapAppendFile_Pread(b *testing.B) {
+	tmpDir := b.TempDir()
+	filename := filepath.Join(tmpDir, "bench_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024 * 1024, // 1GB
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Pre-populate with data using aligned buffer
+	writeData := createAlignedBuffer(4096, 4096)
+	for i := 0; i < 4096; i++ {
+		writeData[i] = byte(i % 256)
+	}
+
+	for i := 0; i < 200000; i++ {
+		_, err := waf.Pwrite(writeData)
+		if err != nil {
+			b.Fatalf("Pwrite failed: %v", err)
+		}
+	}
+
+	readData := createAlignedBuffer(4096, 4096)
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		offset := int64((i % 200000) * 4096)
+		_, err := waf.Pread(offset, readData)
+		if err != nil {
+			b.Fatalf("Pread failed: %v", err)
+		}
+	}
+}
diff --git a/flashring/internal/fs/fs.go b/flashring/internal/fs/fs.go
new file mode 100644
index 00000000..186e524e
--- /dev/null
+++ b/flashring/internal/fs/fs.go
@@ -0,0 +1,138 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"syscall"
+	"unsafe"
+
+	"github.com/rs/zerolog/log"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	O_DIRECT             = 0x4000
+	O_WRONLY             = syscall.O_WRONLY
+	O_RDONLY             = syscall.O_RDONLY
+	O_APPEND             = syscall.O_APPEND
+	O_CREAT              = syscall.O_CREAT
+	O_DSYNC              = syscall.O_DSYNC
+	FALLOC_FL_PUNCH_HOLE = unix.FALLOC_FL_PUNCH_HOLE
+	FALLOC_FL_KEEP_SIZE  = unix.FALLOC_FL_KEEP_SIZE
+	FILE_MODE            = 0644
+	BLOCK_SIZE           = 4096
+)
+
+var (
+	ErrBufNoAlign           = errors.New("buffer is not aligned to block size")
+	ErrFileSizeExceeded     = errors.New("file size exceeded. Please punch hole")
+	ErrFileOffsetOutOfRange = errors.New("file offset is out of range")
+	ErrOffsetNotAligned     = errors.New("offset is not aligned to block size")
+)
+
+type Stat struct {
+	WriteCount         int64
+	ReadCount          int64
+	PunchHoleCount     int64
+	CurrentLogicalSize int64
+}
+
+type FileConfig struct {
+	Filename          string
+	MaxFileSize       int64
+	FilePunchHoleSize int64
+	BlockSize         int
+}
+
+type File interface {
+	Pwrite(buf []byte) (currentPhysicalOffset int64, err error)
+	Pread(fileOffset int64, buf []byte) (n int32, err error)
+	TrimHead() (err error)
+	Close()
+}
+
+type Page interface {
+	Unmap() error
+}
+
+func createAppendOnlyWriteFileDescriptor(filename string) (int, *os.File, bool, error) {
+
+	// Open file with DIRECT_IO, WRITE_ONLY, CREAT flags
+	flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC
+	fd, err := syscall.Open(filename, flags, FILE_MODE)
+	if err != nil {
+		// If DIRECT_IO is not supported, fall back to regular flags
+		log.Warn().Msgf("DIRECT_IO not supported, falling back to regular flags: %v", err)
+		flags = O_WRONLY | O_CREAT | O_DSYNC
+		fd, err = syscall.Open(filename, flags, FILE_MODE)
+		if err != nil {
+			return 0, nil, false, err
+		}
+	}
+	file := os.NewFile(uintptr(fd), filename)
+	if file == nil {
+		return 0, nil, false, fmt.Errorf("failed to create file from fd")
+	}
+
+	return fd, file, true, nil
+}
+
+func createPreAllocatedWriteFileDescriptor(filename string, maxFileSize int64) (int, *os.File, bool, error) {
+	flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC
+	fd, err := syscall.Open(filename, flags, FILE_MODE)
+	if err != nil {
+		log.Warn().Msgf("DIRECT_IO not supported, falling back to regular flags: %v", err)
+		flags = O_WRONLY | O_CREAT | O_DSYNC
+		fd, err = syscall.Open(filename, flags, FILE_MODE)
+		if err != nil {
+			return 0, nil, false, err
+		}
+	}
+
+	// Preallocate file space
+	err = unix.Fallocate(fd, 0, 0, maxFileSize)
+	if err != nil {
+		log.Error().Err(err).Msg("Failed to fallocate file")
+		syscall.Close(fd)
+		return 0, nil, false, err
+	}
+
+	file := os.NewFile(uintptr(fd), filename)
+	if file == nil {
+		return 0, nil, false, fmt.Errorf("failed to create file from fd")
+	}
+
+	return fd, file, true, nil
+}
+
+func createReadFileDescriptor(filename string) (int, *os.File, bool, error) {
+	flags := O_DIRECT | O_RDONLY
+	fd, err := syscall.Open(filename, flags, 0)
+	if err != nil {
+		return 0, nil, false, err
+	}
+	file := os.NewFile(uintptr(fd), filename)
+	if file == nil {
+		return 0, nil, false, fmt.Errorf("failed to create file from fd")
+	}
+
+	return fd, file, true, nil
+}
+
+// isAligned checks if the buffer is aligned to the block size
+func isAlignedBuffer(buf []byte, alignment int) bool {
+	pt := uintptr(alignment)
+	if len(buf) == 0 {
+		return false
+	}
+	addr := uintptr(unsafe.Pointer(&buf[0]))
+	return addr%pt == 0
+}
+
+func isAlignedOffset(offset int64, alignment int) bool {
+	return offset%int64(alignment) == 0
+}
diff --git a/flashring/internal/fs/profile.png b/flashring/internal/fs/profile.png
new file mode 100644
index 00000000..ee759234
Binary files /dev/null and b/flashring/internal/fs/profile.png differ
diff --git a/flashring/internal/fs/rolling_appendonly_file.go b/flashring/internal/fs/rolling_appendonly_file.go
new file mode 100644
index 00000000..1e97b5c6
--- /dev/null
+++ b/flashring/internal/fs/rolling_appendonly_file.go
@@ -0,0 +1,124 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"os"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
+type RollingAppendFile struct {
+	WriteDirectIO         bool
+	ReadDirectIO          bool
+	blockSize             int
+	WriteFd               int      // write file descriptor
+	ReadFd                int      // read file descriptor
+	MaxFileSize           int64    // max file size in bytes
+	FilePunchHoleSize     int64    // file punch hole size in bytes
+	LogicalStartOffset    int64    // logical start offset in bytes
+	CurrentLogicalOffset  int64    // file current size in bytes
+	CurrentPhysicalOffset int64    // file current physical offset in bytes
+	WriteFile             *os.File // write file
+	ReadFile              *os.File // read file
+	Stat                  *Stat    // file statistics
+}
+
+func NewRollingAppendFile(config FileConfig) (*RollingAppendFile, error) {
+	filename := config.Filename
+	maxFileSize := config.MaxFileSize
+	filePunchHoleSize := config.FilePunchHoleSize
+
+	writeFd, writeFile, wDirectIO, err := createAppendOnlyWriteFileDescriptor(filename)
+	if err != nil {
+		return nil, err
+	}
+	readFd, readFile, rDirectIO, err := createReadFileDescriptor(filename)
+	if err != nil {
+		return nil, err
+	}
+	blockSize := config.BlockSize
+	if blockSize == 0 {
+		blockSize = BLOCK_SIZE
+	}
+	return &RollingAppendFile{
+		WriteDirectIO:         wDirectIO,
+		ReadDirectIO:          rDirectIO,
+		blockSize:             blockSize,
+		WriteFd:               writeFd,
+		ReadFd:                readFd,
+		WriteFile:             writeFile,
+		ReadFile:              readFile,
+		MaxFileSize:           maxFileSize,
+		FilePunchHoleSize:     filePunchHoleSize,
+		LogicalStartOffset:    0,
+		CurrentLogicalOffset:  0,
+		CurrentPhysicalOffset: 0,
+		Stat: &Stat{
+			WriteCount:         0,
+			ReadCount:          0,
+			PunchHoleCount:     0,
+			CurrentLogicalSize: 0,
+		},
+	}, nil
+}
+
+func (r *RollingAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err error) {
+	if r.CurrentLogicalOffset+int64(len(buf)) > r.MaxFileSize {
+		return 0, ErrFileSizeExceeded
+	}
+	if r.WriteDirectIO {
+		if !isAlignedBuffer(buf, r.blockSize) {
+			return 0, ErrBufNoAlign
+		}
+	}
+	n, err := syscall.Pwrite(r.WriteFd, buf, r.CurrentPhysicalOffset)
+	if err != nil {
+		return 0, err
+	}
+	r.CurrentPhysicalOffset += int64(n)
+	r.Stat.WriteCount++
+	return r.CurrentPhysicalOffset, nil
+}
+
+func (r *RollingAppendFile) Pread(fileOffset int64, buf []byte) (n int32, err error) {
+	if fileOffset < r.LogicalStartOffset || fileOffset+int64(len(buf)) > r.CurrentPhysicalOffset {
+		return 0, ErrFileOffsetOutOfRange
+	}
+	if r.ReadDirectIO {
+		if !isAlignedOffset(fileOffset, r.blockSize) {
+			return 0, ErrOffsetNotAligned
+		}
+		if !isAlignedBuffer(buf, r.blockSize) {
+			return 0, ErrBufNoAlign
+		}
+	}
+	syscall.Pread(r.ReadFd, buf, fileOffset)
+	r.Stat.ReadCount++
+	return int32(len(buf)), nil
+}
+
+func (r *RollingAppendFile) TrimHead() (err error) {
+	if r.WriteDirectIO {
+		if !isAlignedOffset(r.LogicalStartOffset, r.blockSize) {
+			return ErrOffsetNotAligned
+		}
+	}
+	err = unix.Fallocate(r.WriteFd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, r.LogicalStartOffset, int64(r.FilePunchHoleSize))
+	if err != nil {
+		return err
+	}
+	r.LogicalStartOffset += int64(r.FilePunchHoleSize)
+	r.CurrentLogicalOffset -= int64(r.FilePunchHoleSize)
+	r.Stat.PunchHoleCount++
+	return nil
+}
+
+func (r *RollingAppendFile) Close() {
+	syscall.Close(r.WriteFd)
+	syscall.Close(r.ReadFd)
+	os.Remove(r.WriteFile.Name())
+	os.Remove(r.ReadFile.Name())
+}
diff --git a/flashring/internal/fs/rolling_appendonly_file_test.go b/flashring/internal/fs/rolling_appendonly_file_test.go
new file mode 100644
index 00000000..c4afdd8c
--- /dev/null
+++ b/flashring/internal/fs/rolling_appendonly_file_test.go
@@ -0,0 +1,502 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+	"unsafe"
+)
+
+// Helper function to create aligned buffers for DirectIO
+func createAlignedBuffer(size, alignment int) []byte {
+	// Allocate more memory than needed to ensure we can find an aligned address
+	buf := make([]byte, size+alignment)
+
+	// Find the aligned address
+	addr := uintptr(unsafe.Pointer(&buf[0]))
+	alignedAddr := (addr + uintptr(alignment-1)) &^ uintptr(alignment-1)
+
+	// Calculate the offset
+	offset := alignedAddr - addr
+
+	// Return the aligned slice
+	return buf[offset : offset+uintptr(size)]
+}
+
+func TestNewRollingAppendFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024, // 1MB
+		FilePunchHoleSize: 64 * 1024,   // 64KB
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Verify initial state
+	if raf.MaxFileSize != config.MaxFileSize {
+		t.Errorf("Expected MaxFileSize %d, got %d", config.MaxFileSize, raf.MaxFileSize)
+	}
+	if raf.FilePunchHoleSize != config.FilePunchHoleSize {
+		t.Errorf("Expected FilePunchHoleSize %d, got %d", config.FilePunchHoleSize, raf.FilePunchHoleSize)
+	}
+	if raf.blockSize != config.BlockSize {
+		t.Errorf("Expected BlockSize %d, got %d", config.BlockSize, raf.blockSize)
+	}
+	if raf.CurrentLogicalOffset != 0 {
+		t.Errorf("Expected CurrentLogicalOffset 0, got %d", raf.CurrentLogicalOffset)
+	}
+	if raf.CurrentPhysicalOffset != 0 {
+		t.Errorf("Expected CurrentPhysicalOffset 0, got %d", raf.CurrentPhysicalOffset)
+	}
+}
+
+func TestNewRollingAppendFile_DefaultBlockSize(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         0, // Should default to BLOCK_SIZE
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	if raf.blockSize != BLOCK_SIZE {
+		t.Errorf("Expected default BlockSize %d, got %d", BLOCK_SIZE, raf.blockSize)
+	}
+}
+
+func TestPwrite_Success(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Create aligned buffer
+	data := createAlignedBuffer(4096, 4096)
+	for i := range data {
+		data[i] = byte(i % 256)
+	}
+
+	offset, err := raf.Pwrite(data)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	if offset != int64(len(data)) {
+		t.Errorf("Expected offset %d, got %d", len(data), offset)
+	}
+
+	if raf.CurrentPhysicalOffset != int64(len(data)) {
+		t.Errorf("Expected CurrentPhysicalOffset %d, got %d", len(data), raf.CurrentPhysicalOffset)
+	}
+
+	if raf.Stat.WriteCount != 1 {
+		t.Errorf("Expected WriteCount 1, got %d", raf.Stat.WriteCount)
+	}
+}
+
+func TestPwrite_FileSizeExceeded(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024, // Small max size
+		FilePunchHoleSize: 512,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Try to write more than max file size
+	data := make([]byte, 2048)
+
+	_, err = raf.Pwrite(data)
+	if err != ErrFileSizeExceeded {
+		t.Errorf("Expected ErrFileSizeExceeded, got %v", err)
+	}
+}
+
+func TestPwrite_BufferNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Only test if using DirectIO
+	if raf.WriteDirectIO {
+		// Create unaligned buffer
+		data := make([]byte, 4097) // Not aligned to 4096
+
+		_, err = raf.Pwrite(data)
+		if err != ErrBufNoAlign {
+			t.Errorf("Expected ErrBufNoAlign, got %v", err)
+		}
+	}
+}
+
+func TestPread_Success(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Write some data first
+	writeData := createAlignedBuffer(4096, 4096)
+	for i := range writeData {
+		writeData[i] = byte(i % 256)
+	}
+
+	_, err = raf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	// Read the data back
+	readData := createAlignedBuffer(4096, 4096)
+	n, err := raf.Pread(0, readData)
+	if err != nil {
+		t.Fatalf("Pread failed: %v", err)
+	}
+
+	if n != int32(len(readData)) {
+		t.Errorf("Expected read length %d, got %d", len(readData), n)
+	}
+
+	// Verify data matches
+	for i := range readData {
+		if readData[i] != writeData[i] {
+			t.Errorf("Data mismatch at index %d: expected %d, got %d", i, writeData[i], readData[i])
+		}
+	}
+
+	if raf.Stat.ReadCount != 1 {
+		t.Errorf("Expected ReadCount 1, got %d", raf.Stat.ReadCount)
+	}
+}
+
+func TestPread_FileOffsetOutOfRange(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Try to read without writing anything
+	readData := createAlignedBuffer(4096, 4096)
+	_, err = raf.Pread(0, readData)
+	if err != ErrFileOffsetOutOfRange {
+		t.Errorf("Expected ErrFileOffsetOutOfRange, got %v", err)
+	}
+
+	// Write some data
+	writeData := createAlignedBuffer(4096, 4096)
+	_, err = raf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	// Try to read beyond written data
+	_, err = raf.Pread(4096, readData)
+	if err != ErrFileOffsetOutOfRange {
+		t.Errorf("Expected ErrFileOffsetOutOfRange, got %v", err)
+	}
+}
+
+func TestPread_OffsetNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Only test if using DirectIO
+	if raf.ReadDirectIO {
+		// Write some data first
+		writeData := createAlignedBuffer(8192, 4096)
+		_, err = raf.Pwrite(writeData)
+		if err != nil {
+			t.Fatalf("Pwrite failed: %v", err)
+		}
+
+		// Try to read from unaligned offset
+		readData := createAlignedBuffer(4096, 4096)
+		_, err = raf.Pread(100, readData) // Not aligned to 4096
+		if err != ErrOffsetNotAligned {
+			t.Errorf("Expected ErrOffsetNotAligned, got %v", err)
+		}
+	}
+}
+
+func TestTrimHead_Success(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096, // One block
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Write some data first
+	writeData := createAlignedBuffer(8192, 4096) // 2 blocks
+	_, err = raf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	// Trim head
+	err = raf.TrimHead()
+	if err != nil {
+		t.Fatalf("TrimHead failed: %v", err)
+	}
+
+	// Verify state changes
+	if raf.LogicalStartOffset != int64(config.FilePunchHoleSize) {
+		t.Errorf("Expected LogicalStartOffset %d, got %d", config.FilePunchHoleSize, raf.LogicalStartOffset)
+	}
+
+	if raf.Stat.PunchHoleCount != 1 {
+		t.Errorf("Expected PunchHoleCount 1, got %d", raf.Stat.PunchHoleCount)
+	}
+}
+
+func TestIsAlignedOffset(t *testing.T) {
+	tests := []struct {
+		name      string
+		offset    int64
+		alignment int
+		expected  bool
+	}{
+		{"aligned_0", 0, 4096, true},
+		{"aligned_4096", 4096, 4096, true},
+		{"aligned_8192", 8192, 4096, true},
+		{"unaligned_100", 100, 4096, false},
+		{"unaligned_4097", 4097, 4096, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := isAlignedOffset(tt.offset, tt.alignment)
+			if result != tt.expected {
+				t.Errorf("isAlignedOffset(%d, %d) = %v, expected %v", tt.offset, tt.alignment, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestMultipleOperations(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Write multiple blocks
+	for i := 0; i < 5; i++ {
+		data := createAlignedBuffer(4096, 4096)
+		for j := range data {
+			data[j] = byte((i*256 + j) % 256)
+		}
+
+		_, err = raf.Pwrite(data)
+		if err != nil {
+			t.Fatalf("Pwrite %d failed: %v", i, err)
+		}
+	}
+
+	// Verify total written
+	expectedPhysicalOffset := int64(5 * 4096)
+	if raf.CurrentPhysicalOffset != expectedPhysicalOffset {
+		t.Errorf("Expected CurrentPhysicalOffset %d, got %d", expectedPhysicalOffset, raf.CurrentPhysicalOffset)
+	}
+
+	// Read back data from different offsets
+	for i := 0; i < 5; i++ {
+		readData := createAlignedBuffer(4096, 4096)
+		_, err = raf.Pread(int64(i*4096), readData)
+		if err != nil {
+			t.Fatalf("Pread %d failed: %v", i, err)
+		}
+
+		// Verify data integrity
+		for j := range readData {
+			expected := byte((i*256 + j) % 256)
+			if readData[j] != expected {
+				t.Errorf("Data mismatch at block %d, index %d: expected %d, got %d", i, j, expected, readData[j])
+			}
+		}
+	}
+
+	// Verify statistics
+	if raf.Stat.WriteCount != 5 {
+		t.Errorf("Expected WriteCount 5, got %d", raf.Stat.WriteCount)
+	}
+	if raf.Stat.ReadCount != 5 {
+		t.Errorf("Expected ReadCount 5, got %d", raf.Stat.ReadCount)
+	}
+}
+
+func TestStatistics(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_rolling_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	raf, err := NewRollingAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create RollingAppendFile: %v", err)
+	}
+	defer cleanup(raf)
+
+	// Initial state
+	if raf.Stat.WriteCount != 0 {
+		t.Errorf("Expected initial WriteCount 0, got %d", raf.Stat.WriteCount)
+	}
+	if raf.Stat.ReadCount != 0 {
+		t.Errorf("Expected initial ReadCount 0, got %d", raf.Stat.ReadCount)
+	}
+	if raf.Stat.PunchHoleCount != 0 {
+		t.Errorf("Expected initial PunchHoleCount 0, got %d", raf.Stat.PunchHoleCount)
+	}
+
+	// Perform operations and verify statistics
+	data := createAlignedBuffer(4096, 4096)
+
+	// Write operation
+	_, err = raf.Pwrite(data)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+	if raf.Stat.WriteCount != 1 {
+		t.Errorf("Expected WriteCount 1, got %d", raf.Stat.WriteCount)
+	}
+
+	// Read operation
+	_, err = raf.Pread(0, data)
+	if err != nil {
+		t.Fatalf("Pread failed: %v", err)
+	}
+	if raf.Stat.ReadCount != 1 {
+		t.Errorf("Expected ReadCount 1, got %d", raf.Stat.ReadCount)
+	}
+
+	// Trim operation
+	err = raf.TrimHead()
+	if err != nil {
+		t.Fatalf("TrimHead failed: %v", err)
+	}
+	if raf.Stat.PunchHoleCount != 1 {
+		t.Errorf("Expected PunchHoleCount 1, got %d", raf.Stat.PunchHoleCount)
+	}
+}
+
+// Helper function to clean up resources
+func cleanup(raf *RollingAppendFile) {
+	if raf.WriteFile != nil {
+		raf.WriteFile.Close()
+	}
+	if raf.ReadFile != nil {
+		raf.ReadFile.Close()
+	}
+	if raf.WriteFile != nil {
+		os.Remove(raf.WriteFile.Name())
+	}
+}
diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go
new file mode 100644
index 00000000..fc91e006
--- /dev/null
+++ b/flashring/internal/fs/wrap_file.go
@@ -0,0 +1,174 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"os"
+	"syscall"
+
+	"golang.org/x/sys/unix"
+)
+
+type WrapAppendFile struct {
+	WriteDirectIO        bool
+	ReadDirectIO         bool
+	wrapped              bool
+	blockSize            int
+	WriteFd              int      // write file descriptor
+	ReadFd               int      // read file descriptor
+	MaxFileSize          int64    // max file size in bytes
+	FilePunchHoleSize    int64    // file punch hole size in bytes
+	PhysicalStartOffset  int64    // physical start offset in bytes
+	LogicalCurrentOffset int64    // file current size in bytes
+	PhysicalWriteOffset  int64    // file current physical offset in bytes
+	WriteFile            *os.File // write file
+	ReadFile             *os.File // read file
+	Stat                 *Stat    // file statistics
+}
+
+func NewWrapAppendFile(config FileConfig) (*WrapAppendFile, error) {
+	filename := config.Filename
+	maxFileSize := config.MaxFileSize
+	filePunchHoleSize := config.FilePunchHoleSize
+
+	writeFd, writeFile, wDirectIO, err := createPreAllocatedWriteFileDescriptor(filename, maxFileSize)
+	if err != nil {
+		return nil, err
+	}
+	readFd, readFile, rDirectIO, err := createReadFileDescriptor(filename)
+	if err != nil {
+		return nil, err
+	}
+	blockSize := config.BlockSize
+	if blockSize == 0 {
+		blockSize = BLOCK_SIZE
+	}
+	return &WrapAppendFile{
+		WriteDirectIO:        wDirectIO,
+		ReadDirectIO:         rDirectIO,
+		blockSize:            blockSize,
+		WriteFd:              writeFd,
+		ReadFd:               readFd,
+		WriteFile:            writeFile,
+		ReadFile:             readFile,
+		MaxFileSize:          maxFileSize,
+		FilePunchHoleSize:    filePunchHoleSize,
+		PhysicalStartOffset:  0,
+		LogicalCurrentOffset: 0,
+		PhysicalWriteOffset:  0,
+		Stat: &Stat{
+			WriteCount:         0,
+			ReadCount:          0,
+			PunchHoleCount:     0,
+			CurrentLogicalSize: 0,
+		},
+	}, nil
+}
+
+func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err error) {
+	if r.WriteDirectIO {
+		if !isAlignedBuffer(buf, r.blockSize) {
+			return 0, ErrBufNoAlign
+		}
+	}
+	n, err := syscall.Pwrite(r.WriteFd, buf, r.PhysicalWriteOffset)
+	if err != nil {
+		return 0, err
+	}
+	r.PhysicalWriteOffset += int64(n)
+	if r.PhysicalWriteOffset >= r.MaxFileSize {
+		r.wrapped = true
+		r.PhysicalWriteOffset = r.PhysicalStartOffset
+	}
+	r.LogicalCurrentOffset += int64(n)
+	r.Stat.WriteCount++
+	return r.PhysicalWriteOffset, nil
+}
+
+func (r *WrapAppendFile) TrimHeadIfNeeded() bool {
+	if r.wrapped && r.PhysicalWriteOffset == r.PhysicalStartOffset {
+		return true
+	}
+	return false
+}
+
+func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) {
+	if r.ReadDirectIO {
+		if !isAlignedOffset(fileOffset, r.blockSize) {
+			return 0, ErrOffsetNotAligned
+		}
+		if !isAlignedBuffer(buf, r.blockSize) {
+			return 0, ErrBufNoAlign
+		}
+	}
+
+	// Validate read window depending on wrap state
+	readEnd := fileOffset + int64(len(buf))
+	valid := false
+
+	if !r.wrapped {
+		// Single valid region: [PhysicalStartOffset, PhysicalWriteOffset)
+		valid = fileOffset >= r.PhysicalStartOffset && readEnd <= r.PhysicalWriteOffset
+	} else {
+		// Two valid regions:
+		// 1. [PhysicalStartOffset, MaxFileSize)
+		// 2. [0, PhysicalWriteOffset)
+		fileOffset = fileOffset % r.MaxFileSize
+		readEnd = readEnd % r.MaxFileSize
+		if fileOffset >= r.PhysicalStartOffset {
+			valid = readEnd <= r.MaxFileSize
+		} else {
+			valid = readEnd <= r.PhysicalWriteOffset
+		}
+	}
+	if !valid {
+		return 0, ErrFileOffsetOutOfRange
+	}
+
+	n, err := syscall.Pread(r.ReadFd, buf, fileOffset)
+	// flags := unix.RWF_HIPRI // optionally: | unix.RWF_NOWAIT
+	// n, err := preadv2(r.ReadFd, buf, fileOffset, flags)
+	if err != nil {
+		return 0, err
+	}
+	r.Stat.ReadCount++
+	return int32(n), nil
+}
+
+func (r *WrapAppendFile) TrimHead() (err error) {
+	if r.WriteDirectIO {
+		if !isAlignedOffset(r.PhysicalStartOffset, r.blockSize) {
+			return ErrOffsetNotAligned
+		}
+	}
+	err = unix.Fallocate(r.WriteFd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, r.PhysicalStartOffset, int64(r.FilePunchHoleSize))
+	if err != nil {
+		return err
+	}
+	r.PhysicalStartOffset += int64(r.FilePunchHoleSize)
+	if r.PhysicalStartOffset >= r.MaxFileSize {
+		r.PhysicalStartOffset = 0
+	}
+	r.Stat.PunchHoleCount++
+	return nil
+}
+
+func (r *WrapAppendFile) Close() {
+	syscall.Close(r.WriteFd)
+	syscall.Close(r.ReadFd)
+	os.Remove(r.WriteFile.Name())
+	os.Remove(r.ReadFile.Name())
+}
+
+func preadv2(fd int, buf []byte, off int64, flags int) (int, error) {
+	if len(buf) == 0 {
+		return 0, nil
+	}
+	n, err := unix.Preadv2(fd, [][]byte{buf}, off, flags)
+	// Kernel or FS may not support preadv2/flags; fall back
+	if err == unix.ENOSYS || err == unix.EOPNOTSUPP || err == unix.EINVAL {
+		return unix.Pread(fd, buf, off)
+	}
+	return n, err
+}
diff --git a/flashring/internal/fs/wrap_file_test.go b/flashring/internal/fs/wrap_file_test.go
new file mode 100644
index 00000000..c0fa975d
--- /dev/null
+++ b/flashring/internal/fs/wrap_file_test.go
@@ -0,0 +1,792 @@
+//go:build linux
+// +build linux
+
+package fs
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestNewWrapAppendFile(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024, // 1MB
+		FilePunchHoleSize: 64 * 1024,   // 64KB
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Verify initial state
+	if waf.MaxFileSize != config.MaxFileSize {
+		t.Errorf("Expected MaxFileSize %d, got %d", config.MaxFileSize, waf.MaxFileSize)
+	}
+	if waf.FilePunchHoleSize != config.FilePunchHoleSize {
+		t.Errorf("Expected FilePunchHoleSize %d, got %d", config.FilePunchHoleSize, waf.FilePunchHoleSize)
+	}
+	if waf.blockSize != config.BlockSize {
+		t.Errorf("Expected BlockSize %d, got %d", config.BlockSize, waf.blockSize)
+	}
+	if waf.LogicalCurrentOffset != 0 {
+		t.Errorf("Expected LogicalCurrentOffset 0, got %d", waf.LogicalCurrentOffset)
+	}
+	if waf.PhysicalWriteOffset != 0 {
+		t.Errorf("Expected PhysicalWriteOffset 0, got %d", waf.PhysicalWriteOffset)
+	}
+	if waf.PhysicalStartOffset != 0 {
+		t.Errorf("Expected PhysicalStartOffset 0, got %d", waf.PhysicalStartOffset)
+	}
+	if waf.wrapped {
+		t.Errorf("Expected wrapped to be false initially")
+	}
+}
+
+func TestNewWrapAppendFile_DefaultBlockSize(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         0, // Should default to BLOCK_SIZE
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	if waf.blockSize != BLOCK_SIZE {
+		t.Errorf("Expected default BlockSize %d, got %d", BLOCK_SIZE, waf.blockSize)
+	}
+}
+
+func TestWrapAppendFile_Pwrite_Success(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Create aligned buffer
+	data := createAlignedBuffer(4096, 4096)
+	for i := range data {
+		data[i] = byte(i % 256)
+	}
+
+	offset, err := waf.Pwrite(data)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	if offset != int64(len(data)) {
+		t.Errorf("Expected offset %d, got %d", len(data), offset)
+	}
+
+	if waf.PhysicalWriteOffset != int64(len(data)) {
+		t.Errorf("Expected PhysicalWriteOffset %d, got %d", len(data), waf.PhysicalWriteOffset)
+	}
+
+	if waf.LogicalCurrentOffset != int64(len(data)) {
+		t.Errorf("Expected LogicalCurrentOffset %d, got %d", len(data), waf.LogicalCurrentOffset)
+	}
+
+	if waf.Stat.WriteCount != 1 {
+		t.Errorf("Expected WriteCount 1, got %d", waf.Stat.WriteCount)
+	}
+
+	if waf.wrapped {
+		t.Errorf("Expected wrapped to be false")
+	}
+}
+
+func TestPwrite_WrapAround(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       8192, // Small max size for easy wrapping
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Write first block
+	data1 := createAlignedBuffer(4096, 4096)
+	for i := range data1 {
+		data1[i] = byte(1)
+	}
+
+	_, err = waf.Pwrite(data1)
+	if err != nil {
+		t.Fatalf("First Pwrite failed: %v", err)
+	}
+
+	if waf.wrapped {
+		t.Errorf("Should not be wrapped after first write")
+	}
+
+	// Write second block - should trigger wrap
+	data2 := createAlignedBuffer(4096, 4096)
+	for i := range data2 {
+		data2[i] = byte(2)
+	}
+
+	offset, err := waf.Pwrite(data2)
+	if err != nil {
+		t.Fatalf("Second Pwrite failed: %v", err)
+	}
+
+	// After wrapping, should be at PhysicalStartOffset
+	if !waf.wrapped {
+		t.Errorf("Should be wrapped after exceeding MaxFileSize")
+	}
+
+	if waf.PhysicalWriteOffset != waf.PhysicalStartOffset {
+		t.Errorf("Expected PhysicalWriteOffset %d after wrap, got %d", waf.PhysicalStartOffset, waf.PhysicalWriteOffset)
+	}
+
+	if offset != waf.PhysicalStartOffset {
+		t.Errorf("Expected return offset %d after wrap, got %d", waf.PhysicalStartOffset, offset)
+	}
+
+	if waf.LogicalCurrentOffset != int64(8192) {
+		t.Errorf("Expected LogicalCurrentOffset %d, got %d", 8192, waf.LogicalCurrentOffset)
+	}
+}
+
+func TestWrapAppendFile_Pwrite_BufferNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Only test if using DirectIO
+	if waf.WriteDirectIO {
+		// Create unaligned buffer
+		data := make([]byte, 4097) // Not aligned to 4096
+
+		_, err = waf.Pwrite(data)
+		if err != ErrBufNoAlign {
+			t.Errorf("Expected ErrBufNoAlign, got %v", err)
+		}
+	}
+}
+
+func TestPread_Success_NoWrap(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Write some data first
+	writeData := createAlignedBuffer(4096, 4096)
+	for i := range writeData {
+		writeData[i] = byte(i % 256)
+	}
+
+	_, err = waf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	// Read the data back
+	readData := createAlignedBuffer(4096, 4096)
+	n, err := waf.Pread(0, readData)
+	if err != nil {
+		t.Fatalf("Pread failed: %v", err)
+	}
+
+	if n != int32(len(readData)) {
+		t.Errorf("Expected read length %d, got %d", len(readData), n)
+	}
+
+	// Verify data matches
+	for i := range readData {
+		if readData[i] != writeData[i] {
+			t.Errorf("Data mismatch at index %d: expected %d, got %d", i, writeData[i], readData[i])
+		}
+	}
+
+	if waf.Stat.ReadCount != 1 {
+		t.Errorf("Expected ReadCount 1, got %d", waf.Stat.ReadCount)
+	}
+}
+
+func TestPread_Success_WithWrap(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       8192, // Small for easy wrapping
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Fill the file to cause wrapping
+	data1 := createAlignedBuffer(4096, 4096)
+	for i := range data1 {
+		data1[i] = byte(1)
+	}
+	_, err = waf.Pwrite(data1)
+	if err != nil {
+		t.Fatalf("First Pwrite failed: %v", err)
+	}
+
+	data2 := createAlignedBuffer(4096, 4096)
+	for i := range data2 {
+		data2[i] = byte(2)
+	}
+	_, err = waf.Pwrite(data2)
+	if err != nil {
+		t.Fatalf("Second Pwrite failed: %v", err)
+	}
+
+	// Now write more to wrap around
+	data3 := createAlignedBuffer(4096, 4096)
+	for i := range data3 {
+		data3[i] = byte(3)
+	}
+	_, err = waf.Pwrite(data3)
+	if err != nil {
+		t.Fatalf("Third Pwrite failed: %v", err)
+	}
+
+	if !waf.wrapped {
+		t.Errorf("Expected wrapped to be true")
+	}
+
+	// Read from valid regions after wrap
+	// Region 1: [PhysicalStartOffset, MaxFileSize) - should contain data2
+	readData := createAlignedBuffer(4096, 4096)
+	n, err := waf.Pread(4096, readData)
+	if err != nil {
+		t.Fatalf("Pread from high region failed: %v", err)
+	}
+	if n != 4096 {
+		t.Errorf("Expected read length 4096, got %d", n)
+	}
+
+	// Region 2: [0, PhysicalWriteOffset) - should contain data3
+	readData2 := createAlignedBuffer(4096, 4096)
+	n, err = waf.Pread(0, readData2)
+	if err != nil {
+		t.Fatalf("Pread from low region failed: %v", err)
+	}
+	if n != 4096 {
+		t.Errorf("Expected read length 4096, got %d", n)
+	}
+
+	// Verify data3 in wrapped position
+	for i := range readData2 {
+		if readData2[i] != byte(3) {
+			t.Errorf("Data mismatch in wrapped region at index %d: expected %d, got %d", i, 3, readData2[i])
+		}
+	}
+}
+
+func TestPread_FileOffsetOutOfRange_NoWrap(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Try to read without writing anything
+	readData := createAlignedBuffer(4096, 4096)
+	_, err = waf.Pread(0, readData)
+	if err != ErrFileOffsetOutOfRange {
+		t.Errorf("Expected ErrFileOffsetOutOfRange, got %v", err)
+	}
+
+	// Write some data
+	writeData := createAlignedBuffer(4096, 4096)
+	_, err = waf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	// Try to read beyond written data
+	_, err = waf.Pread(4096, readData)
+	if err != ErrFileOffsetOutOfRange {
+		t.Errorf("Expected ErrFileOffsetOutOfRange, got %v", err)
+	}
+}
+
+func TestPread_FileOffsetOutOfRange_WithWrap(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       8192,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Cause wrapping
+	for i := 0; i < 3; i++ {
+		data := createAlignedBuffer(4096, 4096)
+		_, err = waf.Pwrite(data)
+		if err != nil {
+			t.Fatalf("Pwrite %d failed: %v", i, err)
+		}
+	}
+
+	if !waf.wrapped {
+		t.Errorf("Expected wrapped to be true")
+	}
+
+	// Try to read from invalid gap between PhysicalWriteOffset and PhysicalStartOffset
+	// After 3 writes with wrapping, valid regions are [PhysicalStartOffset, MaxFileSize) and [0, PhysicalWriteOffset)
+	// Try reading from an aligned offset that should be invalid
+	readData := createAlignedBuffer(4096, 4096)
+
+	// Try reading from aligned offset that's out of valid range
+	// Since PhysicalStartOffset=0 after auto-trim and PhysicalWriteOffset=4096,
+	// reading from offset 8192 should be out of range (beyond MaxFileSize for wrapped file)
+	_, err = waf.Pread(8192, readData) // Should be out of range - beyond MaxFileSize
+	if err != ErrFileOffsetOutOfRange {
+		t.Errorf("Expected ErrFileOffsetOutOfRange for gap read, got %v", err)
+	}
+}
+
+func TestWrapAppendFile_Pread_OffsetNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Only test if using DirectIO
+	if waf.ReadDirectIO {
+		// Write some data first
+		writeData := createAlignedBuffer(8192, 4096)
+		_, err = waf.Pwrite(writeData)
+		if err != nil {
+			t.Fatalf("Pwrite failed: %v", err)
+		}
+
+		// Try to read from unaligned offset
+		readData := createAlignedBuffer(4096, 4096)
+		_, err = waf.Pread(100, readData) // Not aligned to 4096
+		if err != ErrOffsetNotAligned {
+			t.Errorf("Expected ErrOffsetNotAligned, got %v", err)
+		}
+	}
+}
+
+func TestPread_BufferNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Only test if using DirectIO
+	if waf.ReadDirectIO {
+		// Write some data first
+		writeData := createAlignedBuffer(4096, 4096)
+		_, err = waf.Pwrite(writeData)
+		if err != nil {
+			t.Fatalf("Pwrite failed: %v", err)
+		}
+
+		// Try to read with unaligned buffer
+		readData := make([]byte, 4097) // Not aligned
+		_, err = waf.Pread(0, readData)
+		if err != ErrBufNoAlign {
+			t.Errorf("Expected ErrBufNoAlign, got %v", err)
+		}
+	}
+}
+
+func TestWrapAppendFile_TrimHead_Success(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096, // One block
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Write some data first
+	writeData := createAlignedBuffer(8192, 4096) // 2 blocks
+	_, err = waf.Pwrite(writeData)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+
+	initialStartOffset := waf.PhysicalStartOffset
+
+	// Trim head
+	err = waf.TrimHead()
+	if err != nil {
+		t.Fatalf("TrimHead failed: %v", err)
+	}
+
+	// Verify state changes
+	expectedStartOffset := initialStartOffset + int64(config.FilePunchHoleSize)
+	if waf.PhysicalStartOffset != expectedStartOffset {
+		t.Errorf("Expected PhysicalStartOffset %d, got %d", expectedStartOffset, waf.PhysicalStartOffset)
+	}
+
+	if waf.Stat.PunchHoleCount != 1 {
+		t.Errorf("Expected PunchHoleCount 1, got %d", waf.Stat.PunchHoleCount)
+	}
+}
+
+func TestTrimHead_WrapAround(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       8192,
+		FilePunchHoleSize: 8192, // Same as max file size
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Set PhysicalStartOffset to near end
+	waf.PhysicalStartOffset = 4096
+
+	// Trim head - should wrap around to 0
+	err = waf.TrimHead()
+	if err != nil {
+		t.Fatalf("TrimHead failed: %v", err)
+	}
+
+	// Should wrap to 0 since 4096 + 8192 >= 8192
+	if waf.PhysicalStartOffset != 0 {
+		t.Errorf("Expected PhysicalStartOffset to wrap to 0, got %d", waf.PhysicalStartOffset)
+	}
+}
+
+func TestTrimHead_OffsetNotAligned(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Only test if using DirectIO
+	if waf.WriteDirectIO {
+		// Set unaligned PhysicalStartOffset
+		waf.PhysicalStartOffset = 100
+
+		err = waf.TrimHead()
+		if err != ErrOffsetNotAligned {
+			t.Errorf("Expected ErrOffsetNotAligned, got %v", err)
+		}
+	}
+}
+
+func TestPwrite_AutoTrimAfterWrap(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       8192,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Write to cause wrap
+	for i := 0; i < 2; i++ {
+		data := createAlignedBuffer(4096, 4096)
+		_, err = waf.Pwrite(data)
+		if err != nil {
+			t.Fatalf("Pwrite %d failed: %v", i, err)
+		}
+	}
+
+	if !waf.wrapped {
+		t.Errorf("Expected wrapped to be true")
+	}
+
+	initialPunchHoleCount := waf.Stat.PunchHoleCount
+
+	// Write again - should trigger auto trim since wrapped && PhysicalWriteOffset == PhysicalStartOffset
+	data := createAlignedBuffer(4096, 4096)
+	_, err = waf.Pwrite(data)
+	if err != nil {
+		t.Fatalf("Auto-trim Pwrite failed: %v", err)
+	}
+
+	// Should have called TrimHead automatically
+	if waf.Stat.PunchHoleCount <= initialPunchHoleCount {
+		t.Errorf("Expected PunchHoleCount to increase due to auto-trim, got %d", waf.Stat.PunchHoleCount)
+	}
+}
+
+func TestClose(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 64 * 1024,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+
+	// Verify file exists
+	if _, err := os.Stat(filename); os.IsNotExist(err) {
+		t.Errorf("File should exist before Close")
+	}
+
+	// Close and verify cleanup
+	waf.Close()
+
+	// File should be removed
+	if _, err := os.Stat(filename); !os.IsNotExist(err) {
+		t.Errorf("File should be removed after Close")
+	}
+}
+
+func TestWrapAppendFile_MultipleOperations(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       16384, // 4 blocks
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Write multiple blocks to test wrap behavior
+	for i := 0; i < 6; i++ { // More than max file size / block size
+		data := createAlignedBuffer(4096, 4096)
+		for j := range data {
+			data[j] = byte((i*256 + j) % 256)
+		}
+
+		_, err = waf.Pwrite(data)
+		if err != nil {
+			t.Fatalf("Pwrite %d failed: %v", i, err)
+		}
+	}
+
+	// Should be wrapped
+	if !waf.wrapped {
+		t.Errorf("Expected wrapped to be true after writing 6 blocks")
+	}
+
+	// Verify logical offset continues to grow
+	expectedLogicalOffset := int64(6 * 4096)
+	if waf.LogicalCurrentOffset != expectedLogicalOffset {
+		t.Errorf("Expected LogicalCurrentOffset %d, got %d", expectedLogicalOffset, waf.LogicalCurrentOffset)
+	}
+
+	// Verify statistics
+	if waf.Stat.WriteCount != 6 {
+		t.Errorf("Expected WriteCount 6, got %d", waf.Stat.WriteCount)
+	}
+}
+
+func TestWrapAppendFile_Statistics(t *testing.T) {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_wrap_file.dat")
+
+	config := FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024,
+		FilePunchHoleSize: 4096,
+		BlockSize:         4096,
+	}
+
+	waf, err := NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create WrapAppendFile: %v", err)
+	}
+	defer cleanupWrapFile(waf)
+
+	// Initial state
+	if waf.Stat.WriteCount != 0 {
+		t.Errorf("Expected initial WriteCount 0, got %d", waf.Stat.WriteCount)
+	}
+	if waf.Stat.ReadCount != 0 {
+		t.Errorf("Expected initial ReadCount 0, got %d", waf.Stat.ReadCount)
+	}
+	if waf.Stat.PunchHoleCount != 0 {
+		t.Errorf("Expected initial PunchHoleCount 0, got %d", waf.Stat.PunchHoleCount)
+	}
+
+	// Perform operations and verify statistics
+	data := createAlignedBuffer(4096, 4096)
+
+	// Write operation
+	_, err = waf.Pwrite(data)
+	if err != nil {
+		t.Fatalf("Pwrite failed: %v", err)
+	}
+	if waf.Stat.WriteCount != 1 {
+		t.Errorf("Expected WriteCount 1, got %d", waf.Stat.WriteCount)
+	}
+
+	// Read operation
+	_, err = waf.Pread(0, data)
+	if err != nil {
+		t.Fatalf("Pread failed: %v", err)
+	}
+	if waf.Stat.ReadCount != 1 {
+		t.Errorf("Expected ReadCount 1, got %d", waf.Stat.ReadCount)
+	}
+
+	// Trim operation
+	err = waf.TrimHead()
+	if err != nil {
+		t.Fatalf("TrimHead failed: %v", err)
+	}
+	if waf.Stat.PunchHoleCount != 1 {
+		t.Errorf("Expected PunchHoleCount 1, got %d", waf.Stat.PunchHoleCount)
+	}
+}
+
+// Helper function to clean up resources for WrapAppendFile
+func cleanupWrapFile(waf *WrapAppendFile) {
+	if waf.WriteFile != nil {
+		waf.WriteFile.Close()
+	}
+	if waf.ReadFile != nil {
+		waf.ReadFile.Close()
+	}
+	if waf.WriteFile != nil {
+		os.Remove(waf.WriteFile.Name())
+	}
+}
diff --git a/flashring/internal/indices/constants.go b/flashring/internal/indices/constants.go
new file mode 100644
index 00000000..7062bcde
--- /dev/null
+++ b/flashring/internal/indices/constants.go
@@ -0,0 +1,84 @@
+package indices
+
+const (
+	LENGTH_MASK      = (1 << 16) - 1
+	LAST_ACCESS_MASK = (1 << 24) - 1
+	FREQ_MASK        = (1 << 24) - 1
+	H10_MASK         = (1 << 10) - 1
+	EXPTIME_MASK     = (1 << 22) - 1
+	SLICE_POS_MASK   = (1 << 14) - 1
+	ROUND_MASK       = (1 << 4) - 1
+	ROUTE_MASK       = (1 << 24) - 1
+	MEM_ID_MASK      = (1 << 32) - 1
+	OFFSET_MASK      = (1 << 32) - 1
+	ROUND_SHIFT      = 60
+	ROUTE_SHIFT      = 36
+	SLICE_POS_SHIFT  = 22
+	EXPTIME_SHIFT    = 0
+	SET_BIT_0        = 1 << 0
+	SET_BIT_1        = 1 << 1
+	SET_BIT_2        = 1 << 2
+	SET_BIT_3        = 1 << 3
+	SET_BIT_4        = 1 << 4
+	SET_BIT_5        = 1 << 5
+	SET_BIT_6        = 1 << 6
+	SET_BIT_7        = 1 << 7
+	SET_BIT_8        = 1 << 8
+	SET_BIT_9        = 1 << 9
+	SET_BIT_10       = 1 << 10
+	SET_BIT_11       = 1 << 11
+	SET_BIT_12       = 1 << 12
+	SET_BIT_13       = 1 << 13
+	SET_BIT_14       = 1 << 14
+	SET_BIT_15       = 1 << 15
+	SET_BIT_16       = 1 << 16
+	SET_BIT_17       = 1 << 17
+	SET_BIT_18       = 1 << 18
+	SET_BIT_19       = 1 << 19
+	SET_BIT_20       = 1 << 20
+	SET_BIT_21       = 1 << 21
+	SET_BIT_22       = 1 << 22
+	SET_BIT_23       = 1 << 23
+	SET_BIT_24       = 1 << 24
+	SET_BIT_25       = 1 << 25
+	SET_BIT_26       = 1 << 26
+	SET_BIT_27       = 1 << 27
+	SET_BIT_28       = 1 << 28
+	SET_BIT_29       = 1 << 29
+	SET_BIT_30       = 1 << 30
+	SET_BIT_31       = 1 << 31
+	SET_BIT_32       = 1 << 32
+	SET_BIT_33       = 1 << 33
+	SET_BIT_34       = 1 << 34
+	SET_BIT_35       = 1 << 35
+	SET_BIT_36       = 1 << 36
+	SET_BIT_37       = 1 << 37
+	SET_BIT_38       = 1 << 38
+	SET_BIT_39       = 1 << 39
+	SET_BIT_40       = 1 << 40
+	SET_BIT_41       = 1 << 41
+	SET_BIT_42       = 1 << 42
+	SET_BIT_43       = 1 << 43
+	SET_BIT_44       = 1 << 44
+	SET_BIT_45       = 1 << 45
+	SET_BIT_46       = 1 << 46
+	SET_BIT_47       = 1 << 47
+	SET_BIT_48       = 1 << 48
+	SET_BIT_49       = 1 << 49
+	SET_BIT_50       = 1 << 50
+	SET_BIT_51       = 1 << 51
+	SET_BIT_52       = 1 << 52
+	SET_BIT_53       = 1 << 53
+	SET_BIT_54       = 1 << 54
+	SET_BIT_55       = 1 << 55
+	SET_BIT_56       = 1 << 56
+	SET_BIT_57       = 1 << 57
+	SET_BIT_58       = 1 << 58
+	SET_BIT_59       = 1 << 59
+	SET_BIT_60       = 1 << 60
+	SET_BIT_61       = 1 << 61
+	SET_BIT_62       = 1 << 62
+	SET_BIT_63       = 1 << 63
+)
+
+var ()
diff --git a/flashring/internal/indices/delete_manager.go b/flashring/internal/indices/delete_manager.go
new file mode 100644
index 00000000..da454722
--- /dev/null
+++ b/flashring/internal/indices/delete_manager.go
@@ -0,0 +1,76 @@
+package indices
+
+import (
+	"fmt"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/rs/zerolog/log"
+)
+
+type DeleteManager struct {
+	memtableData        map[uint32]int
+	toBeDeletedMemId    uint32
+	keyIndex            *KeyIndex
+	wrapFile            *fs.WrapAppendFile
+	deleteInProgress    bool
+	deleteAmortizedStep int
+	deleteCount         int
+}
+
+func NewDeleteManager(keyIndex *KeyIndex, wrapFile *fs.WrapAppendFile, deleteAmortizedStep int) *DeleteManager {
+	return &DeleteManager{
+		memtableData:        make(map[uint32]int),
+		toBeDeletedMemId:    0,
+		keyIndex:            keyIndex,
+		wrapFile:            wrapFile,
+		deleteInProgress:    false,
+		deleteAmortizedStep: deleteAmortizedStep,
+	}
+}
+
+func (dm *DeleteManager) IncMemtableKeyCount(memId uint32) {
+	dm.memtableData[memId]++
+}
+
+func (dm *DeleteManager) ExecuteDeleteIfNeeded() error {
+	if dm.deleteInProgress {
+		memtableId, count := dm.keyIndex.Delete(dm.deleteCount)
+		if count == -1 {
+			return fmt.Errorf("delete failed")
+		}
+		if memtableId != dm.toBeDeletedMemId {
+			dm.memtableData[dm.toBeDeletedMemId] = dm.memtableData[dm.toBeDeletedMemId] - count
+			log.Debug().Msgf("memtableId: %d, toBeDeletedMemId: %d", memtableId, dm.toBeDeletedMemId)
+			if dm.memtableData[dm.toBeDeletedMemId] != 0 {
+				return fmt.Errorf("memtableData[dm.toBeDeletedMemId] != 0")
+			}
+			delete(dm.memtableData, dm.toBeDeletedMemId)
+			dm.toBeDeletedMemId = memtableId
+			dm.deleteInProgress = false
+			dm.deleteCount = 0
+			return nil
+		} else {
+			dm.memtableData[memtableId] -= count
+			//log.Debug().Msgf("memtableData[%d] = %d", memtableId, dm.memtableData[memtableId])
+		}
+		return nil
+	}
+
+	trimNeeded := dm.wrapFile.TrimHeadIfNeeded()
+	nextAddNeedsDelete := dm.keyIndex.GetRB().NextAddNeedsDelete()
+
+	if trimNeeded || nextAddNeedsDelete {
+		dm.deleteInProgress = true
+		dm.deleteCount = int(dm.memtableData[dm.toBeDeletedMemId] / dm.deleteAmortizedStep)
+		memIdAtHead, err := dm.keyIndex.PeekMemIdAtHead()
+		if err != nil {
+			return err
+		}
+		if memIdAtHead != dm.toBeDeletedMemId {
+			return fmt.Errorf("memIdAtHead: %d, toBeDeletedMemId: %d", memIdAtHead, dm.toBeDeletedMemId)
+		}
+		dm.wrapFile.TrimHead()
+		return nil
+	}
+	return nil
+}
diff --git a/flashring/internal/indices/encoder.go b/flashring/internal/indices/encoder.go
new file mode 100644
index 00000000..d4e952da
--- /dev/null
+++ b/flashring/internal/indices/encoder.go
@@ -0,0 +1,85 @@
+package indices
+
+/*
+-----------
+uint64
+-----------
+round 4 bits
+route 24 bits
+slice pos 14 bits
+exp in minutes 22 bits
+---------
+uint64
+---------
+length 16 bits
+access 24 bits
+freq 24 bits
+-------------
+uint64
+-------------
+memId 32 bits
+offset 32 bits
+*/
+func encode(length uint16, memId, offset, lastAccess, freq uint32, exptime uint64, round, route, slicePos int, entry *Entry) {
+
+	d1 := uint64(round&ROUND_MASK) << 60
+	d1 |= uint64(route&ROUTE_MASK) << 36
+	d1 |= uint64(slicePos&SLICE_POS_MASK) << 22
+	d1 |= uint64(exptime & EXPTIME_MASK)
+
+	d2 := uint64(length&LENGTH_MASK) << 48
+	d2 |= uint64(lastAccess&LAST_ACCESS_MASK) << 24
+	d2 |= uint64(freq & FREQ_MASK)
+
+	d3 := uint64(memId&MEM_ID_MASK) << 32
+	d3 |= uint64(offset & OFFSET_MASK)
+
+	ByteOrder.PutUint64(entry[:8], d1)
+	ByteOrder.PutUint64(entry[8:16], d2)
+	ByteOrder.PutUint64(entry[16:24], d3)
+}
+
+func encodeD2(length uint16, lastAccess, freq uint32, entry *Entry) {
+	d2 := uint64(length&LENGTH_MASK) << 48
+	d2 |= uint64(lastAccess&LAST_ACCESS_MASK) << 24
+	d2 |= uint64(freq & FREQ_MASK)
+	ByteOrder.PutUint64(entry[8:16], d2)
+}
+
+func extract(entry *Entry) (length uint16, memId, offset, lastAccess, freq uint32, exptime uint64, round, route, slicePos int) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	d2 := ByteOrder.Uint64(entry[8:16])
+	d3 := ByteOrder.Uint64(entry[16:24])
+
+	round = int(d1>>60) & ROUND_MASK
+	route = int(d1>>36) & ROUTE_MASK
+	slicePos = int(d1>>22) & SLICE_POS_MASK
+	exptime = d1 & EXPTIME_MASK
+
+	length = uint16(d2>>48) & LENGTH_MASK
+	lastAccess = uint32(d2>>24) & LAST_ACCESS_MASK
+	freq = uint32(d2) & FREQ_MASK
+
+	memId = uint32(d3>>32) & MEM_ID_MASK
+	offset = uint32(d3) & OFFSET_MASK
+	return
+}
+
+func extractD1(entry *Entry) (round, route, slicePos int) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	round = int(d1>>60) & ROUND_MASK
+	route = int(d1>>36) & ROUTE_MASK
+	slicePos = int(d1>>22) & SLICE_POS_MASK
+	return
+}
+
+func extractD3(entry *Entry) (memId, offset uint32) {
+	d3 := ByteOrder.Uint64(entry[16:24])
+	memId = uint32(d3>>32) & MEM_ID_MASK
+	offset = uint32(d3) & OFFSET_MASK
+	return
+}
+
+func extractMemId(entry *Entry) (memId uint32) {
+	return ByteOrder.Uint32(entry[8:12])
+}
diff --git a/flashring/internal/indices/flat_bitmap.go b/flashring/internal/indices/flat_bitmap.go
new file mode 100644
index 00000000..61000e4c
--- /dev/null
+++ b/flashring/internal/indices/flat_bitmap.go
@@ -0,0 +1,242 @@
+package indices
+
+import (
+	"encoding/binary"
+)
+
+const (
+	_64_BITS_COUNT = (1 << 18) // 2^24/64 as we are using uint64
+)
+
+var bitIndex = [64]uint64{
+	SET_BIT_0, SET_BIT_1, SET_BIT_2, SET_BIT_3, SET_BIT_4, SET_BIT_5, SET_BIT_6, SET_BIT_7,
+	SET_BIT_8, SET_BIT_9, SET_BIT_10, SET_BIT_11, SET_BIT_12, SET_BIT_13, SET_BIT_14, SET_BIT_15,
+	SET_BIT_16, SET_BIT_17, SET_BIT_18, SET_BIT_19, SET_BIT_20, SET_BIT_21, SET_BIT_22, SET_BIT_23,
+	SET_BIT_24, SET_BIT_25, SET_BIT_26, SET_BIT_27, SET_BIT_28, SET_BIT_29, SET_BIT_30, SET_BIT_31,
+	SET_BIT_32, SET_BIT_33, SET_BIT_34, SET_BIT_35, SET_BIT_36, SET_BIT_37, SET_BIT_38, SET_BIT_39,
+	SET_BIT_40, SET_BIT_41, SET_BIT_42, SET_BIT_43, SET_BIT_44, SET_BIT_45, SET_BIT_46, SET_BIT_47,
+	SET_BIT_48, SET_BIT_49, SET_BIT_50, SET_BIT_51, SET_BIT_52, SET_BIT_53, SET_BIT_54, SET_BIT_55,
+	SET_BIT_56, SET_BIT_57, SET_BIT_58, SET_BIT_59, SET_BIT_60, SET_BIT_61, SET_BIT_62, SET_BIT_63,
+}
+
+type FlatBitmap struct {
+	bitmap     [_64_BITS_COUNT]uint64
+	valueSlice [_64_BITS_COUNT][]Entry12
+}
+
+func NewFlatBitmap() *FlatBitmap {
+	return &FlatBitmap{}
+}
+
+// Entry12 is a packed 12-byte entry: [8-byte tag][4-byte idx] in little-endian.
+type Entry12 [12]byte
+
+// buildTag packs last28bits (28 bits) and h2 (up to 34 bits) into a 64-bit tag:
+// tag = (last28bits & 0x0FFFFFFF) << 34 | (h2 & ((1<<34)-1))
+func buildTag(last28bits, h2 uint64) uint64 {
+	const mask28 = 0x0FFFFFFF
+	const mask34 = (uint64(1) << 34) - 1
+	return ((last28bits & mask28) << 34) | (h2 & mask34)
+}
+
+func putEntry(e *Entry12, tag uint64, idx uint32) {
+	binary.LittleEndian.PutUint64(e[0:8], tag)
+	binary.LittleEndian.PutUint32(e[8:12], idx)
+}
+
+func getTag(e *Entry12) uint64 {
+	return binary.LittleEndian.Uint64(e[0:8])
+}
+
+func getIdx(e *Entry12) uint32 {
+	return binary.LittleEndian.Uint32(e[8:12])
+}
+
+func zeroEntry(e *Entry12) {
+	for i := range e {
+		e[i] = 0
+	}
+}
+
+// FlatBitmapStats contains aggregated statistics for a FlatBitmap instance.
+type FlatBitmapStats struct {
+	BucketsUsed           uint32 // number of buckets with at least one bit set
+	BucketsWithOverflow   uint32 // buckets whose slice length > 64
+	TotalEntries          uint64 // total present entries (tag != 0)
+	PrimaryEntries        uint64 // entries present in primary region (first 64)
+	OverflowEntries       uint64 // entries present in overflow region (index >= 64)
+	ReusableOverflowSlots uint64 // zeroed overflow slots available for reuse
+
+	AvgValueSliceLen float64 // average slice length among used buckets
+	MaxValueSliceLen int     // maximum slice length among used buckets
+	AvgOverflowLen   float64 // average overflow length among buckets that have overflow
+
+	TotalAllocatedBytes uint64 // bytes allocated for value slices (len * 12)
+}
+
+// Stats computes aggregated statistics by scanning buckets and their slices.
+// This is O(number of buckets + total slice length) and intended for diagnostics.
+func (fb *FlatBitmap) Stats() FlatBitmapStats {
+	var st FlatBitmapStats
+	var sumLen uint64
+	var sumOverflowLen uint64
+
+	for pos := 0; pos < _64_BITS_COUNT; pos++ {
+		if fb.bitmap[pos] == 0 {
+			continue
+		}
+		st.BucketsUsed++
+		sl := fb.valueSlice[pos]
+		l := len(sl)
+		if l == 0 {
+			// Should not normally happen for a used bucket, but guard anyway
+			continue
+		}
+		sumLen += uint64(l)
+		if l > st.MaxValueSliceLen {
+			st.MaxValueSliceLen = l
+		}
+		st.TotalAllocatedBytes += uint64(l * 12)
+
+		// Primary region present entries
+		primMax := l
+		if primMax > 64 {
+			primMax = 64
+		}
+		for i := 0; i < primMax; i++ {
+			if getTag(&sl[i]) != 0 {
+				st.PrimaryEntries++
+				st.TotalEntries++
+			}
+		}
+
+		// Overflow region stats
+		if l > 64 {
+			st.BucketsWithOverflow++
+			overLen := l - 64
+			sumOverflowLen += uint64(overLen)
+			for i := 64; i < l; i++ {
+				if getTag(&sl[i]) != 0 {
+					st.OverflowEntries++
+					st.TotalEntries++
+				} else {
+					st.ReusableOverflowSlots++
+				}
+			}
+		}
+	}
+
+	if st.BucketsUsed > 0 {
+		st.AvgValueSliceLen = float64(sumLen) / float64(st.BucketsUsed)
+	}
+	if st.BucketsWithOverflow > 0 {
+		st.AvgOverflowLen = float64(sumOverflowLen) / float64(st.BucketsWithOverflow)
+	}
+	return st
+}
+
+func (fb *FlatBitmap) Set(next24bits, last28bits, h34 uint64, idx uint32) int {
+	pos := int((next24bits >> 6) & 0x3FFFF)
+	bitPos := next24bits & 0x3F
+	qTag := buildTag(last28bits, h34)
+	if fb.bitmap[pos] == 0 {
+		fb.valueSlice[pos] = make([]Entry12, 64)
+		fb.bitmap[pos] |= bitIndex[bitPos]
+		putEntry(&fb.valueSlice[pos][bitPos], qTag, idx)
+		return int(bitPos)
+	} else if fb.bitmap[pos]&bitIndex[bitPos] == 0 {
+		fb.bitmap[pos] |= bitIndex[bitPos]
+		putEntry(&fb.valueSlice[pos][bitPos], qTag, idx)
+		return int(bitPos)
+	} else {
+		// First check the initial position for existing key
+		if getTag(&fb.valueSlice[pos][bitPos]) == qTag {
+			putEntry(&fb.valueSlice[pos][bitPos], qTag, idx)
+			return int(bitPos)
+		}
+
+		// Then check collision list starting from index 64
+		i := 64
+		firstZeroIdx := -1
+		for i < len(fb.valueSlice[pos]) {
+			if getTag(&fb.valueSlice[pos][i]) == qTag {
+				putEntry(&fb.valueSlice[pos][i], qTag, idx)
+				return int(i)
+			} else if getTag(&fb.valueSlice[pos][i]) == 0 && firstZeroIdx == -1 {
+				firstZeroIdx = i
+			}
+			i++
+		}
+		if firstZeroIdx != -1 {
+			putEntry(&fb.valueSlice[pos][firstZeroIdx], qTag, idx)
+			return int(firstZeroIdx)
+		} else {
+			fb.valueSlice[pos] = append(fb.valueSlice[pos], Entry12{})
+			putEntry(&fb.valueSlice[pos][len(fb.valueSlice[pos])-1], qTag, idx)
+			return int(len(fb.valueSlice[pos]) - 1)
+		}
+	}
+}
+
+func (fb *FlatBitmap) Get(next24bits, last28bits, h34 uint64) (uint32, int, bool) {
+	pos := int((next24bits >> 6) & 0x3FFFF)
+	bitPos := next24bits & 0x3F
+	if fb.bitmap[pos] == 0 || fb.bitmap[pos]&bitIndex[bitPos] == 0 {
+		return 0, -1, false
+	}
+	if fb.bitmap[pos]&bitIndex[bitPos] == bitIndex[bitPos] {
+		qTag := buildTag(last28bits, h34)
+		if getTag(&fb.valueSlice[pos][bitPos]) == qTag {
+			return getIdx(&fb.valueSlice[pos][bitPos]), int(bitPos), true
+		}
+		i := 64
+		for i < len(fb.valueSlice[pos]) {
+			if getTag(&fb.valueSlice[pos][i]) == qTag {
+				return getIdx(&fb.valueSlice[pos][i]), int(i), true
+			}
+			i++
+		}
+		return 0, -1, false
+	}
+	return 0, -1, false
+}
+
+func (fb *FlatBitmap) Remove(next24bits, last28bits, h34 uint64) (uint32, bool) {
+	pos := int((next24bits >> 6) & 0x3FFFF)
+	bitPos := next24bits & 0x3F
+	if fb.bitmap[pos] == 0 || fb.bitmap[pos]&bitIndex[bitPos] == 0 {
+		return 0, false
+	}
+	if fb.bitmap[pos]&bitIndex[bitPos] == bitIndex[bitPos] {
+		qTag := buildTag(last28bits, h34)
+		if getTag(&fb.valueSlice[pos][bitPos]) == qTag {
+			idx := getIdx(&fb.valueSlice[pos][bitPos])
+			zeroEntry(&fb.valueSlice[pos][bitPos])
+			return idx, true
+		}
+		i := 64
+		for i < len(fb.valueSlice[pos]) {
+			if getTag(&fb.valueSlice[pos][i]) == qTag {
+				idx := getIdx(&fb.valueSlice[pos][i])
+				zeroEntry(&fb.valueSlice[pos][i])
+				return idx, true
+			}
+			i++
+		}
+	}
+	return 0, false
+}
+
+func (fb *FlatBitmap) RemoveV2(next24bits, slicePos int) (uint32, bool) {
+	pos := int((next24bits >> 6) & 0x3FFFF)
+	bitPos := next24bits & 0x3F
+	if fb.bitmap[pos] == 0 || fb.bitmap[pos]&bitIndex[bitPos] == 0 {
+		return 0, false
+	}
+	if fb.bitmap[pos]&bitIndex[bitPos] == bitIndex[bitPos] {
+		rbIdx := getIdx(&fb.valueSlice[pos][slicePos])
+		zeroEntry(&fb.valueSlice[pos][slicePos])
+		return uint32(rbIdx), true
+	}
+	return 0, false
+}
diff --git a/flashring/internal/indices/flat_bitmap_bench_test.go b/flashring/internal/indices/flat_bitmap_bench_test.go
new file mode 100644
index 00000000..2c93d7d9
--- /dev/null
+++ b/flashring/internal/indices/flat_bitmap_bench_test.go
@@ -0,0 +1 @@
+package indices
diff --git a/flashring/internal/indices/flat_bitmap_test.go b/flashring/internal/indices/flat_bitmap_test.go
new file mode 100644
index 00000000..2c93d7d9
--- /dev/null
+++ b/flashring/internal/indices/flat_bitmap_test.go
@@ -0,0 +1 @@
+package indices
diff --git a/flashring/internal/indices/key_index.go b/flashring/internal/indices/key_index.go
new file mode 100644
index 00000000..3828397d
--- /dev/null
+++ b/flashring/internal/indices/key_index.go
@@ -0,0 +1,120 @@
+package indices
+
+import (
+	"errors"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/maths"
+)
+
+var (
+	ErrGettingHeadEntry = errors.New("getting head entry failed")
+)
+
+type KeyIndex struct {
+	rm      *RoundMap
+	rb      *RingBuffer
+	mc      *maths.MorrisLogCounter
+	startAt int64
+}
+
+func NewKeyIndex(rounds int, rbInitial, rbMax, deleteAmortizedStep int) *KeyIndex {
+	if ByteOrder == nil {
+		loadByteOrder()
+	}
+	return &KeyIndex{
+		rm:      NewRoundMap(rounds),
+		rb:      NewRingBuffer(rbInitial, rbMax),
+		mc:      maths.New(10),
+		startAt: time.Now().Unix(),
+	}
+}
+
+func (ki *KeyIndex) Put(key string, length uint16, memId, offset uint32, exptime uint64) {
+	lastAccess := ki.GenerateLastAccess()
+	freq := uint32(1)
+	h64 := Hash64(key)
+	h34 := Hash34(key)
+	entry, idx, _ := ki.rb.GetEntry()
+	round, next24bits, slicePos := ki.rm.Add(key, uint32(idx), h64, h34)
+	encode(length, memId, offset, lastAccess, freq, exptime, round, next24bits, slicePos, entry)
+}
+
+func (ki *KeyIndex) GenerateLastAccess() uint32 {
+	return uint32(time.Now().Unix()-ki.startAt) / 60
+}
+
+func (ki *KeyIndex) Get(key string) (uint32, uint16, uint32, uint32, uint64, uint64, uint32, bool) {
+	h64 := Hash64(key)
+	h34 := Hash34(key)
+	idx, slicePos, found := ki.rm.Get(h64, h34)
+	if !found {
+		return 0, 0, 0, 0, 0, 0, 0, false // TODO: return error
+	}
+	entry, ok := ki.rb.Get(int(idx))
+	if !ok {
+		return 0, 0, 0, 0, 0, 0, 0, false // TODO: return error
+	}
+	length, memId, offset, lastAccessAt, freq, exptime, _, _, gotSlicePos := extract(entry)
+	if gotSlicePos != slicePos {
+		return 0, 0, 0, 0, 0, 0, 0, false // TODO: return error
+	}
+	lastAccess := ki.GenerateLastAccess()
+	freq, _ = ki.mc.Inc(freq)
+	encodeD2(length, lastAccess, freq, entry)
+	lastAccess = ki.GenerateLastAccess() - lastAccessAt
+	return memId, length, offset, lastAccess, ki.mc.Value(freq), exptime, uint32(idx), true
+}
+
+func (ki *KeyIndex) Delete(nKeys int) (uint32, int) {
+	for i := 0; i < nKeys; i++ {
+		deleted, next := ki.rb.Delete()
+		if deleted == nil {
+			return 0, -1
+		}
+		round, route, slicePos := extractD1(deleted)
+		ki.rm.RemoveV2(round, route, slicePos)
+		delMemId := extractMemId(deleted)
+		nextMemId := extractMemId(next)
+		if nextMemId == delMemId+1 {
+			return nextMemId, i + 1
+		} else if nextMemId == delMemId && i == nKeys-1 {
+			return delMemId, i + 1
+		} else if nextMemId == delMemId {
+			continue
+		} else {
+			return 0, -1
+		}
+	}
+	return 0, -1
+}
+
+func (ki *KeyIndex) GetRB() *RingBuffer {
+	return ki.rb
+}
+
+func (ki *KeyIndex) PeekMemIdAtHead() (uint32, error) {
+	entry, ok := ki.rb.Get(ki.rb.head)
+	if !ok {
+		return 0, ErrGettingHeadEntry
+	}
+	memId, _ := extractD3(entry)
+	return memId, nil
+}
+
+// Debug methods to expose ring buffer state
+func (ki *KeyIndex) GetRingBufferNextIndex() int {
+	return ki.rb.nextIndex
+}
+
+func (ki *KeyIndex) GetRingBufferSize() int {
+	return ki.rb.size
+}
+
+func (ki *KeyIndex) GetRingBufferCapacity() int {
+	return ki.rb.capacity
+}
+
+func (ki *KeyIndex) GetRingBufferActiveEntries() int {
+	return ki.rb.ActiveEntries()
+}
diff --git a/flashring/internal/indices/key_index_test.go b/flashring/internal/indices/key_index_test.go
new file mode 100644
index 00000000..2c93d7d9
--- /dev/null
+++ b/flashring/internal/indices/key_index_test.go
@@ -0,0 +1 @@
+package indices
diff --git a/flashring/internal/indices/rb.go b/flashring/internal/indices/rb.go
new file mode 100644
index 00000000..d91862ac
--- /dev/null
+++ b/flashring/internal/indices/rb.go
@@ -0,0 +1,90 @@
+package indices
+
+// Entry represents a 32-byte value. Adjust fields as needed.
+type Entry [24]byte
+
+// RingBuffer is a fixed-size circular queue that wraps around when full.
+// It maintains a sliding window of the most recent entries. Add returns an
+// absolute index which can be used with Get.
+type RingBuffer struct {
+	buf       []Entry
+	head      int
+	tail      int
+	size      int
+	nextIndex int
+	capacity  int // Fixed capacity (initial = max)
+	wrapped   bool
+}
+
+// NewRingBuffer creates a ring buffer with the given initial and maximum
+// capacity. Since we use a fixed-size buffer, initial and max should be the same.
+func NewRingBuffer(initial, max int) *RingBuffer {
+	if initial <= 0 || initial > max {
+		panic("invalid capacity")
+	}
+	// Use max capacity for fixed-size buffer (initial = max in practice)
+	capacity := max
+	return &RingBuffer{
+		buf:      make([]Entry, capacity),
+		capacity: capacity,
+		wrapped:  false,
+	}
+}
+
+// Add inserts e into the buffer and returns its absolute index. When the buffer
+// is full it wraps around and overwrites the oldest entry.
+func (rb *RingBuffer) Add(e *Entry) int {
+	// Store the entry at current tail position
+	rb.buf[rb.nextIndex] = *e
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	if rb.nextIndex == rb.head {
+		rb.head = (rb.head + 1) % rb.capacity
+	}
+
+	return idx
+}
+
+func (rb *RingBuffer) NextAddNeedsDelete() bool {
+	return rb.nextIndex == rb.head && rb.wrapped
+}
+
+func (rb *RingBuffer) GetEntry() (*Entry, int, bool) {
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	shouldDelete := false
+	if rb.nextIndex == rb.head {
+		// rb.head = (rb.head + 1) % rb.capacity
+		rb.wrapped = true
+		shouldDelete = true
+
+	}
+
+	return &rb.buf[idx], idx, shouldDelete
+}
+
+// Get retrieves an entry by its absolute index. The boolean return is false if
+// the index is out of range (either overwritten or not yet added).
+func (rb *RingBuffer) Get(index int) (*Entry, bool) {
+	// Calculate the valid window based on current state
+	if index > rb.capacity {
+		return nil, false
+	}
+	return &rb.buf[index], true
+}
+
+// Delete removes the oldest entry from the buffer if it is not empty.
+// For a fixed-size ring buffer, this only decreases size if not at capacity.
+func (rb *RingBuffer) Delete() (*Entry, *Entry) {
+	deleted := rb.buf[rb.head]
+	rb.head = (rb.head + 1) % rb.capacity
+	return &deleted, &rb.buf[rb.head]
+}
+
+// TailIndex returns the absolute index that will be assigned to the next Add.
+func (rb *RingBuffer) TailIndex() int {
+	return rb.nextIndex
+}
+func (rb *RingBuffer) ActiveEntries() int {
+	return (rb.nextIndex - rb.head + rb.capacity) % rb.capacity
+}
diff --git a/flashring/internal/indices/rb_bench_test.go b/flashring/internal/indices/rb_bench_test.go
new file mode 100644
index 00000000..566975a9
--- /dev/null
+++ b/flashring/internal/indices/rb_bench_test.go
@@ -0,0 +1,22 @@
+package indices
+
+import (
+	"testing"
+)
+
+// BenchmarkRingBufferPush50M benchmarks pushing 50 million elements to the ring buffer
+func BenchmarkRingBufferPush50M(b *testing.B) {
+	rb := NewRingBuffer(1000, 50_000_000)
+
+	b.ResetTimer()
+	b.Run("Add", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Add(&Entry{})
+		}
+	})
+	b.Run("Get", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Get(i)
+		}
+	})
+}
diff --git a/flashring/internal/indices/round_map.go b/flashring/internal/indices/round_map.go
new file mode 100644
index 00000000..de995300
--- /dev/null
+++ b/flashring/internal/indices/round_map.go
@@ -0,0 +1,75 @@
+package indices
+
+import (
+	"github.com/cespare/xxhash/v2"
+	"github.com/zeebo/xxh3"
+)
+
+const (
+	_LO_28BIT_IN_32BIT = (1 << 28) - 1
+	_LO_20BIT_IN_32BIT = (1 << 20) - 1
+	_LO_12BIT_IN_32BIT = (1 << 12) - 1
+	_LO_24BIT_IN_32BIT = (1 << 24) - 1
+	_LO_28BIT_IN_64BIT = (1 << 28) - 1
+	_LO_6BIT_IN_32BIT  = (1 << 6) - 1
+	_LO_9BIT_IN_32BIT  = (1 << 9) - 1
+	_LO_3BIT_IN_32BIT  = (1 << 3) - 1
+	_LO_54BIT_IN_64BIT = (1 << 54) - 1
+	_LO_34BIT_IN_64BIT = (1 << 34) - 1
+)
+
+func Hash34(data string) uint64 {
+	return uint64(xxh3.HashString(data) & _LO_34BIT_IN_64BIT) // mask 10 bits
+}
+
+func Hash64(data string) uint64 {
+	return xxhash.Sum64String(data)
+}
+
+type RoundMap struct {
+	bitmaps []*FlatBitmap
+}
+
+func NewRoundMap(numRounds int) *RoundMap {
+	bitmaps := make([]*FlatBitmap, numRounds)
+	for i := 0; i < numRounds; i++ {
+		bitmaps[i] = NewFlatBitmap()
+	}
+	return &RoundMap{
+		bitmaps: bitmaps,
+	}
+}
+
+func (rm *RoundMap) Add(key string, idx uint32, h64, h10 uint64) (int, int, int) {
+	first12bits, next24bits, last28bits := extractHashSegments(h64) // Bits 27–0
+
+	round := first12bits % uint64(len(rm.bitmaps))
+	slicePos := rm.bitmaps[round].Set(uint64(next24bits), uint64(last28bits), h10, idx)
+	return int(round), int(next24bits), slicePos
+}
+
+func extractHashSegments(h64 uint64) (uint64, uint64, uint64) {
+	first12bits := (h64 >> 52) & _LO_12BIT_IN_32BIT // Bits 63–52
+	next24bits := (h64 >> 28) & _LO_24BIT_IN_32BIT  // Bits 51–28
+	last28bits := h64 & _LO_28BIT_IN_32BIT
+	return first12bits, next24bits, last28bits
+}
+
+func (rm *RoundMap) Get(h64, h10 uint64) (uint32, int, bool) {
+	first12bits, next24bits, last28bits := extractHashSegments(h64) // Bits 27–0
+
+	round := first12bits % uint64(len(rm.bitmaps))
+	return rm.bitmaps[round].Get(uint64(next24bits), uint64(last28bits), h10)
+}
+
+func (rm *RoundMap) Remove(h64, h10 uint64) (uint32, bool) {
+
+	first12bits, next24bits, last28bits := extractHashSegments(h64) // Bits 27–0
+
+	round := first12bits % uint64(len(rm.bitmaps))
+	return rm.bitmaps[round].Remove(uint64(next24bits), uint64(last28bits), h10)
+}
+
+func (rm *RoundMap) RemoveV2(round, next24bits, slicePos int) (uint32, bool) {
+	return rm.bitmaps[round].RemoveV2(next24bits, slicePos)
+}
diff --git a/flashring/internal/indices/round_map_bench_test.go b/flashring/internal/indices/round_map_bench_test.go
new file mode 100644
index 00000000..2c93d7d9
--- /dev/null
+++ b/flashring/internal/indices/round_map_bench_test.go
@@ -0,0 +1 @@
+package indices
diff --git a/flashring/internal/indices/round_map_test.go b/flashring/internal/indices/round_map_test.go
new file mode 100644
index 00000000..2c93d7d9
--- /dev/null
+++ b/flashring/internal/indices/round_map_test.go
@@ -0,0 +1 @@
+package indices
diff --git a/flashring/internal/indices/system.go b/flashring/internal/indices/system.go
new file mode 100644
index 00000000..8b949f05
--- /dev/null
+++ b/flashring/internal/indices/system.go
@@ -0,0 +1,50 @@
+package indices
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+var ByteOrder *CustomByteOrder
+
+type CustomByteOrder struct {
+	binary.ByteOrder
+}
+
+func loadByteOrder() {
+	buf := [2]byte{}
+	*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
+
+	switch buf {
+	case [2]byte{0xCD, 0xAB}:
+		ByteOrder = &CustomByteOrder{binary.LittleEndian}
+	case [2]byte{0xAB, 0xCD}:
+		ByteOrder = &CustomByteOrder{binary.BigEndian}
+	default:
+		panic("Could not determine endianness.")
+	}
+}
+
+func (c *CustomByteOrder) PutInt64(b []byte, v int64) {
+	c.PutUint64(b, uint64(v))
+}
+
+func (c *CustomByteOrder) Int64(b []byte) int64 {
+	return int64(c.Uint64(b))
+}
+
+func (c *CustomByteOrder) PutInt32(b []byte, v int32) {
+	c.PutUint32(b, uint32(v))
+}
+
+func (c *CustomByteOrder) Int32(b []byte) int32 {
+	return int32(c.Uint32(b))
+}
+
+func (c *CustomByteOrder) PutUint32(b []byte, v uint32) {
+	c.ByteOrder.PutUint32(b, v)
+}
+
+func (c *CustomByteOrder) Uint32(b []byte) uint32 {
+	return c.ByteOrder.Uint32(b)
+}
diff --git a/flashring/internal/indicesV2/constant.go b/flashring/internal/indicesV2/constant.go
new file mode 100644
index 00000000..ad467899
--- /dev/null
+++ b/flashring/internal/indicesV2/constant.go
@@ -0,0 +1,22 @@
+package indicesv2
+
+const (
+
+	//[0]uint64
+	LENGTH_MASK        = (1 << 16) - 1
+	DELTA_EXPTIME_MASK = (1 << 16) - 1
+	LAST_ACCESS_MASK   = (1 << 16) - 1
+	FREQ_MASK          = (1 << 16) - 1
+
+	//[1]uint64
+	MEM_ID_MASK = (1 << 32) - 1
+	OFFSET_MASK = (1 << 32) - 1
+
+	LENGTH_SHIFT        = 48
+	DELTA_EXPTIME_SHIFT = 32
+	LAST_ACCESS_SHIFT   = 16
+	FREQ_SHIFT          = 0
+
+	MEM_ID_SHIFT = 32
+	OFFSET_SHIFT = 0
+)
diff --git a/flashring/internal/indicesV2/delete_manager.go b/flashring/internal/indicesV2/delete_manager.go
new file mode 100644
index 00000000..6b218915
--- /dev/null
+++ b/flashring/internal/indicesV2/delete_manager.go
@@ -0,0 +1,76 @@
+package indicesv2
+
+import (
+	"fmt"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/rs/zerolog/log"
+)
+
+type DeleteManager struct {
+	memtableData        map[uint32]int
+	toBeDeletedMemId    uint32
+	keyIndex            *Index
+	wrapFile            *fs.WrapAppendFile
+	deleteInProgress    bool
+	deleteAmortizedStep int
+	deleteCount         int
+}
+
+func NewDeleteManager(keyIndex *Index, wrapFile *fs.WrapAppendFile, deleteAmortizedStep int) *DeleteManager {
+	return &DeleteManager{
+		memtableData:        make(map[uint32]int),
+		toBeDeletedMemId:    0,
+		keyIndex:            keyIndex,
+		wrapFile:            wrapFile,
+		deleteInProgress:    false,
+		deleteAmortizedStep: deleteAmortizedStep,
+	}
+}
+
+func (dm *DeleteManager) IncMemtableKeyCount(memId uint32) {
+	dm.memtableData[memId]++
+}
+
+func (dm *DeleteManager) ExecuteDeleteIfNeeded() error {
+	if dm.deleteInProgress {
+		memtableId, count := dm.keyIndex.Delete(dm.deleteCount)
+		if count == -1 {
+			return fmt.Errorf("delete failed")
+		}
+		if memtableId != dm.toBeDeletedMemId {
+			dm.memtableData[dm.toBeDeletedMemId] = dm.memtableData[dm.toBeDeletedMemId] - count
+			log.Debug().Msgf("memtableId: %d, toBeDeletedMemId: %d", memtableId, dm.toBeDeletedMemId)
+			if dm.memtableData[dm.toBeDeletedMemId] != 0 {
+				return fmt.Errorf("memtableData[dm.toBeDeletedMemId] != 0")
+			}
+			delete(dm.memtableData, dm.toBeDeletedMemId)
+			dm.toBeDeletedMemId = memtableId
+			dm.deleteInProgress = false
+			dm.deleteCount = 0
+			return nil
+		} else {
+			dm.memtableData[memtableId] -= count
+			//log.Debug().Msgf("memtableData[%d] = %d", memtableId, dm.memtableData[memtableId])
+		}
+		return nil
+	}
+
+	trimNeeded := dm.wrapFile.TrimHeadIfNeeded()
+	nextAddNeedsDelete := dm.keyIndex.GetRB().NextAddNeedsDelete()
+
+	if trimNeeded || nextAddNeedsDelete {
+		dm.deleteInProgress = true
+		dm.deleteCount = int(dm.memtableData[dm.toBeDeletedMemId] / dm.deleteAmortizedStep)
+		memIdAtHead, err := dm.keyIndex.PeekMemIdAtHead()
+		if err != nil {
+			return err
+		}
+		if memIdAtHead != dm.toBeDeletedMemId {
+			return fmt.Errorf("memIdAtHead: %d, toBeDeletedMemId: %d", memIdAtHead, dm.toBeDeletedMemId)
+		}
+		dm.wrapFile.TrimHead()
+		return nil
+	}
+	return nil
+}
diff --git a/flashring/internal/indicesV2/encoder.go b/flashring/internal/indicesV2/encoder.go
new file mode 100644
index 00000000..3ccf986a
--- /dev/null
+++ b/flashring/internal/indicesV2/encoder.go
@@ -0,0 +1,53 @@
+package indicesv2
+
+func encode(key string, length, deltaExptime, lastAccess, freq uint16, memId, offset uint32, entry *Entry) {
+
+	d1 := uint64(length&LENGTH_MASK) << LENGTH_SHIFT
+	d1 |= uint64(deltaExptime&DELTA_EXPTIME_MASK) << DELTA_EXPTIME_SHIFT
+	d1 |= uint64(lastAccess&LAST_ACCESS_MASK) << LAST_ACCESS_SHIFT
+	d1 |= uint64(freq&FREQ_MASK) << FREQ_SHIFT
+
+	ByteOrder.PutUint64(entry[:8], d1)
+
+	d2 := uint64(memId&MEM_ID_MASK) << MEM_ID_SHIFT
+	d2 |= uint64(offset&OFFSET_MASK) << OFFSET_SHIFT
+
+	ByteOrder.PutUint64(entry[8:16], d2)
+}
+
+func decode(entry *Entry) (length, deltaExptime, lastAccess, freq uint16, memId, offset uint32) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	d2 := ByteOrder.Uint64(entry[8:16])
+
+	length = uint16(d1>>LENGTH_SHIFT) & LENGTH_MASK
+	deltaExptime = uint16(d1>>DELTA_EXPTIME_SHIFT) & DELTA_EXPTIME_MASK
+	lastAccess = uint16(d1>>LAST_ACCESS_SHIFT) & LAST_ACCESS_MASK
+	freq = uint16(d1>>FREQ_SHIFT) & FREQ_MASK
+
+	memId = uint32(d2>>MEM_ID_SHIFT) & MEM_ID_MASK
+	offset = uint32(d2>>OFFSET_SHIFT) & OFFSET_MASK
+
+	return length, deltaExptime, lastAccess, freq, memId, offset
+}
+
+func decodeLastAccessNFreq(entry *Entry) (lastAccess, freq uint16) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	lastAccess = uint16(d1>>LAST_ACCESS_SHIFT) & LAST_ACCESS_MASK
+	freq = uint16(d1>>FREQ_SHIFT) & FREQ_MASK
+
+	return lastAccess, freq
+}
+
+func encodeLastAccessNFreq(lastAccess, freq uint16, entry *Entry) {
+	d1 := uint64(lastAccess&LAST_ACCESS_MASK) << LAST_ACCESS_SHIFT
+	d1 |= uint64(freq&FREQ_MASK) << FREQ_SHIFT
+
+	ByteOrder.PutUint64(entry[:8], d1)
+}
+
+func decodeMemIdOffset(entry *Entry) (memId, offset uint32) {
+	d2 := ByteOrder.Uint64(entry[8:16])
+	memId = uint32(d2>>MEM_ID_SHIFT) & MEM_ID_MASK
+	offset = uint32(d2>>OFFSET_SHIFT) & OFFSET_MASK
+	return memId, offset
+}
diff --git a/flashring/internal/indicesV2/index.go b/flashring/internal/indicesV2/index.go
new file mode 100644
index 00000000..0b803f56
--- /dev/null
+++ b/flashring/internal/indicesV2/index.go
@@ -0,0 +1,125 @@
+package indicesv2
+
+import (
+	"errors"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/maths"
+)
+
+var ErrGettingHeadEntry = errors.New("getting head entry failed")
+
+type Status int
+
+const (
+	StatusOK Status = iota
+	StatusNotFound
+	StatusExpired
+)
+
+type Index struct {
+	rm       map[string]int
+	rb       *RingBuffer
+	mc       *maths.MorrisLogCounter
+	startAt  int64
+	hashBits int
+}
+
+func NewIndex(hashBits int, rbInitial, rbMax, deleteAmortizedStep int) *Index {
+	if ByteOrder == nil {
+		loadByteOrder()
+	}
+	rm := make(map[string]int)
+	return &Index{
+		rm:       rm,
+		rb:       NewRingBuffer(rbInitial, rbMax),
+		mc:       maths.New(12),
+		startAt:  time.Now().Unix(),
+		hashBits: hashBits,
+	}
+}
+
+func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint32) {
+	if _, ok := i.rm[key]; ok {
+		idx := i.rm[key]
+		entry, _ := i.rb.Get(idx)
+		length, delta, lastAccess, freq, _, _ := decode(entry)
+		idx, _ = i.rb.PutInNextFreeSlot(func(entry *Entry) string {
+			encode(key, length, delta, lastAccess, freq, memId, offset, entry)
+			return key
+		})
+		i.rm[key] = idx
+		return
+	}
+	lastAccess := i.generateLastAccess()
+	freq := uint16(1)
+	expiryAt := (time.Now().Unix() / 60) + int64(ttlInMinutes)
+	delta := uint16(expiryAt - (i.startAt / 60))
+	idx, _ := i.rb.PutInNextFreeSlot(func(entry *Entry) string {
+		encode(key, length, delta, lastAccess, freq, memId, offset, entry)
+		return key
+	})
+	i.rm[key] = idx
+}
+
+func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq uint64, memId, offset uint32, status Status) {
+	if idx, ok := i.rm[key]; ok {
+		entry, _ := i.rb.Get(idx)
+		length, deltaExptime, lastAccess, freq, memId, offset := decode(entry)
+		exptime := int(deltaExptime) + int(i.startAt/60)
+		currentTime := int(time.Now().Unix() / 60)
+		remainingTTL := exptime - currentTime
+		if remainingTTL <= 0 {
+			return 0, 0, 0, 0, 0, 0, StatusExpired
+		}
+		lastAccess = i.generateLastAccess()
+		freq = i.incrFreq(freq)
+		encodeLastAccessNFreq(lastAccess, freq, entry)
+		return length, lastAccess, uint16(remainingTTL), i.mc.Value(uint32(freq)), memId, offset, StatusOK
+	}
+	return 0, 0, 0, 0, 0, 0, StatusNotFound
+}
+
+func (ix *Index) Delete(count int) (uint32, int) {
+	for i := 0; i < count; i++ {
+		deleted, deletedKey, next, _ := ix.rb.Delete()
+		if deleted == nil {
+			return 0, -1
+		}
+		delMemId, _ := decodeMemIdOffset(deleted)
+		delete(ix.rm, deletedKey)
+		nextMemId, _ := decodeMemIdOffset(next)
+		if nextMemId == delMemId+1 {
+			return nextMemId, i + 1
+		} else if nextMemId == delMemId && i == count-1 {
+			return delMemId, i + 1
+		} else if nextMemId == delMemId {
+			continue
+		} else {
+			return 0, -1
+		}
+	}
+	return 0, -1
+}
+
+func (ki *Index) GetRB() *RingBuffer {
+	return ki.rb
+}
+
+func (ki *Index) PeekMemIdAtHead() (uint32, error) {
+	entry, ok := ki.rb.Get(ki.rb.head)
+	if !ok {
+		return 0, ErrGettingHeadEntry
+	}
+	memId, _ := decodeMemIdOffset(entry)
+	return memId, nil
+}
+
+func (i *Index) generateLastAccess() uint16 {
+	return uint16((time.Now().Unix() - i.startAt) / 60)
+}
+
+func (i *Index) incrFreq(freq uint16) uint16 {
+	newFreq, _ := i.mc.Inc(uint32(freq))
+	return uint16(newFreq)
+}
diff --git a/flashring/internal/indicesV2/index_test.go b/flashring/internal/indicesV2/index_test.go
new file mode 100644
index 00000000..5915691b
--- /dev/null
+++ b/flashring/internal/indicesV2/index_test.go
@@ -0,0 +1,135 @@
+package indicesv2
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestIndexAddRbMax(t *testing.T) {
+	loadByteOrder()
+
+	// Use equal initial and max capacity for the fixed-size ring buffer.
+	rbMax := 1000_000
+	rbInitial := rbMax
+	hashBits := 16
+	idx := NewIndex(hashBits, rbInitial, rbMax, 1)
+
+	// Insert exactly rbMax distinct keys
+	for i := 0; i < rbMax; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120) // ensure no expiry during test
+		memID := uint32(1000 + i)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	// All keys should be present in the reverse map
+	if got := len(idx.rm); got != rbMax {
+		t.Fatalf("expected %d keys in index map, got %d", rbMax, got)
+	}
+
+	// After filling to capacity, next add should require delete (ring wrapped)
+	if !idx.rb.NextAddNeedsDelete() {
+		t.Fatalf("expected ring buffer to report NextAddNeedsDelete == true after %d inserts", rbMax)
+	}
+
+	// Verify we can Get every inserted key and fields match
+	for i := 0; i < rbMax; i++ {
+		key := fmt.Sprintf("k%d", i)
+		expLength := uint16(100 + i)
+		expMemID := uint32(1000 + i)
+		expOffset := uint32(2000 + i)
+
+		length, _, _, _, memID, offset, status := idx.Get(key)
+		if status != StatusOK {
+			t.Fatalf("Get(%q) status = %v, want %v", key, status, StatusOK)
+		}
+		if length != expLength {
+			t.Fatalf("Get(%q) length = %d, want %d", key, length, expLength)
+		}
+		if memID != expMemID {
+			t.Fatalf("Get(%q) memID = %d, want %d", key, memID, expMemID)
+		}
+		if offset != expOffset {
+			t.Fatalf("Get(%q) offset = %d, want %d", key, offset, expOffset)
+		}
+	}
+}
+
+func TestIndexDeleteAndGet(t *testing.T) {
+	loadByteOrder()
+
+	// Keep this small and fast
+	rbMax := 99
+	rbInitial := rbMax
+	hashBits := 16
+	idx := NewIndex(hashBits, rbInitial, rbMax, 1)
+
+	// Insert exactly rbMax distinct keys in order
+	for i := 0; i < 33; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(1)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	for i := 33; i < 66; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(2)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+	for i := 66; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(3)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	if len(idx.rm) != rbMax {
+		t.Fatalf("expected %d keys after fill, got %d", rbMax, len(idx.rm))
+	}
+
+	// Ensure buffer is in the full state (next add would need delete)
+	if !idx.rb.NextAddNeedsDelete() {
+		t.Fatalf("expected NextAddNeedsDelete() to be true after fill")
+	}
+
+	for i := 0; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusOK {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+		}
+	}
+	// Delete oldest entries one-by-one and verify via Get
+	toDelete := 33
+	idx.Delete(toDelete)
+
+	if len(idx.rm) != rbMax-toDelete {
+		t.Fatalf("expected map size %d after deletes, got %d", rbMax-toDelete, len(idx.rm))
+	}
+
+	for i := 0; i < toDelete; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusNotFound {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusNotFound)
+		}
+	}
+
+	for i := toDelete; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusOK {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+		}
+	}
+}
diff --git a/flashring/internal/indicesV2/rb.go b/flashring/internal/indicesV2/rb.go
new file mode 100644
index 00000000..7394e289
--- /dev/null
+++ b/flashring/internal/indicesV2/rb.go
@@ -0,0 +1,95 @@
+package indicesv2
+
+// Entry represents a 32-byte value. Adjust fields as needed.
+type Entry [16]byte
+
+// RingBuffer is a fixed-size circular queue that wraps around when full.
+// It maintains a sliding window of the most recent entries. Add returns an
+// absolute index which can be used with Get.
+type RingBuffer struct {
+	buf       []Entry
+	keyTable  []string
+	head      int
+	tail      int
+	size      int
+	nextIndex int
+	capacity  int // Fixed capacity (initial = max)
+	wrapped   bool
+}
+
+// NewRingBuffer creates a ring buffer with the given initial and maximum
+// capacity. Since we use a fixed-size buffer, initial and max should be the same.
+func NewRingBuffer(initial, max int) *RingBuffer {
+	if initial <= 0 || initial > max {
+		panic("invalid capacity")
+	}
+	// Use max capacity for fixed-size buffer (initial = max in practice)
+	capacity := max
+	return &RingBuffer{
+		buf:      make([]Entry, capacity),
+		keyTable: make([]string, capacity),
+		capacity: capacity,
+		wrapped:  false,
+	}
+}
+
+// Add inserts e into the buffer and returns its absolute index. When the buffer
+// is full it wraps around and overwrites the oldest entry.
+func (rb *RingBuffer) Add(e *Entry) int {
+	// Store the entry at current tail position
+	rb.buf[rb.nextIndex] = *e
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	if rb.nextIndex == rb.head {
+		rb.head = (rb.head + 1) % rb.capacity
+	}
+
+	return idx
+}
+
+func (rb *RingBuffer) NextAddNeedsDelete() bool {
+	return rb.nextIndex == rb.head && rb.wrapped
+}
+
+func (rb *RingBuffer) PutInNextFreeSlot(putFunc func(*Entry) string) (int, bool) {
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	shouldDelete := false
+	if rb.nextIndex == rb.head {
+		// rb.head = (rb.head + 1) % rb.capacity
+		rb.wrapped = true
+		shouldDelete = true
+
+	}
+	key := putFunc(&rb.buf[idx])
+	rb.keyTable[idx] = key
+
+	return idx, shouldDelete
+}
+
+// Get retrieves an entry by its absolute index. The boolean return is false if
+// the index is out of range (either overwritten or not yet added).
+func (rb *RingBuffer) Get(index int) (*Entry, bool) {
+	// Calculate the valid window based on current state
+	if index > rb.capacity {
+		return nil, false
+	}
+	return &rb.buf[index], true
+}
+
+// Delete removes the oldest entry from the buffer if it is not empty.
+// For a fixed-size ring buffer, this only decreases size if not at capacity.
+func (rb *RingBuffer) Delete() (*Entry, string, *Entry, string) {
+	deleted := rb.buf[rb.head]
+	deletedKey := rb.keyTable[rb.head]
+	rb.head = (rb.head + 1) % rb.capacity
+	return &deleted, deletedKey, &rb.buf[rb.head], rb.keyTable[rb.head]
+}
+
+// TailIndex returns the absolute index that will be assigned to the next Add.
+func (rb *RingBuffer) TailIndex() int {
+	return rb.nextIndex
+}
+func (rb *RingBuffer) ActiveEntries() int {
+	return (rb.nextIndex - rb.head + rb.capacity) % rb.capacity
+}
diff --git a/flashring/internal/indicesV2/rb_bench_test.go b/flashring/internal/indicesV2/rb_bench_test.go
new file mode 100644
index 00000000..0baeece0
--- /dev/null
+++ b/flashring/internal/indicesV2/rb_bench_test.go
@@ -0,0 +1,22 @@
+package indicesv2
+
+import (
+	"testing"
+)
+
+// BenchmarkRingBufferPush50M benchmarks pushing 50 million elements to the ring buffer
+func BenchmarkRingBufferPush50M(b *testing.B) {
+	rb := NewRingBuffer(1000, 50_000_000)
+
+	b.ResetTimer()
+	b.Run("Add", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Add(&Entry{})
+		}
+	})
+	b.Run("Get", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Get(i)
+		}
+	})
+}
diff --git a/flashring/internal/indicesV2/system.go b/flashring/internal/indicesV2/system.go
new file mode 100644
index 00000000..a5368576
--- /dev/null
+++ b/flashring/internal/indicesV2/system.go
@@ -0,0 +1,50 @@
+package indicesv2
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+var ByteOrder *CustomByteOrder
+
+type CustomByteOrder struct {
+	binary.ByteOrder
+}
+
+func loadByteOrder() {
+	buf := [2]byte{}
+	*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
+
+	switch buf {
+	case [2]byte{0xCD, 0xAB}:
+		ByteOrder = &CustomByteOrder{binary.LittleEndian}
+	case [2]byte{0xAB, 0xCD}:
+		ByteOrder = &CustomByteOrder{binary.BigEndian}
+	default:
+		panic("Could not determine endianness.")
+	}
+}
+
+func (c *CustomByteOrder) PutInt64(b []byte, v int64) {
+	c.PutUint64(b, uint64(v))
+}
+
+func (c *CustomByteOrder) Int64(b []byte) int64 {
+	return int64(c.Uint64(b))
+}
+
+func (c *CustomByteOrder) PutInt32(b []byte, v int32) {
+	c.PutUint32(b, uint32(v))
+}
+
+func (c *CustomByteOrder) Int32(b []byte) int32 {
+	return int32(c.Uint32(b))
+}
+
+func (c *CustomByteOrder) PutUint32(b []byte, v uint32) {
+	c.ByteOrder.PutUint32(b, v)
+}
+
+func (c *CustomByteOrder) Uint32(b []byte) uint32 {
+	return c.ByteOrder.Uint32(b)
+}
diff --git a/flashring/internal/indicesV3/constant.go b/flashring/internal/indicesV3/constant.go
new file mode 100644
index 00000000..2abcacff
--- /dev/null
+++ b/flashring/internal/indicesV3/constant.go
@@ -0,0 +1,24 @@
+package indicesv2
+
+const (
+
+	//[0]uint64
+	LENGTH_MASK        = (1 << 16) - 1
+	DELTA_EXPTIME_MASK = (1 << 16) - 1
+	LAST_ACCESS_MASK   = (1 << 16) - 1
+	FREQ_MASK          = (1 << 16) - 1
+	PREV_MASK          = (1 << 32) - 1
+	NEXT_MASK          = (1 << 32) - 1
+
+	//[1]uint64
+	MEM_ID_MASK = (1 << 32) - 1
+	OFFSET_MASK = (1 << 32) - 1
+
+	LENGTH_SHIFT        = 48
+	DELTA_EXPTIME_SHIFT = 32
+	LAST_ACCESS_SHIFT   = 16
+	FREQ_SHIFT          = 0
+
+	MEM_ID_SHIFT = 32
+	OFFSET_SHIFT = 0
+)
diff --git a/flashring/internal/indicesV3/delete_manager.go b/flashring/internal/indicesV3/delete_manager.go
new file mode 100644
index 00000000..6b218915
--- /dev/null
+++ b/flashring/internal/indicesV3/delete_manager.go
@@ -0,0 +1,76 @@
+package indicesv2
+
+import (
+	"fmt"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/rs/zerolog/log"
+)
+
+type DeleteManager struct {
+	memtableData        map[uint32]int
+	toBeDeletedMemId    uint32
+	keyIndex            *Index
+	wrapFile            *fs.WrapAppendFile
+	deleteInProgress    bool
+	deleteAmortizedStep int
+	deleteCount         int
+}
+
+func NewDeleteManager(keyIndex *Index, wrapFile *fs.WrapAppendFile, deleteAmortizedStep int) *DeleteManager {
+	return &DeleteManager{
+		memtableData:        make(map[uint32]int),
+		toBeDeletedMemId:    0,
+		keyIndex:            keyIndex,
+		wrapFile:            wrapFile,
+		deleteInProgress:    false,
+		deleteAmortizedStep: deleteAmortizedStep,
+	}
+}
+
+func (dm *DeleteManager) IncMemtableKeyCount(memId uint32) {
+	dm.memtableData[memId]++
+}
+
+func (dm *DeleteManager) ExecuteDeleteIfNeeded() error {
+	if dm.deleteInProgress {
+		memtableId, count := dm.keyIndex.Delete(dm.deleteCount)
+		if count == -1 {
+			return fmt.Errorf("delete failed")
+		}
+		if memtableId != dm.toBeDeletedMemId {
+			dm.memtableData[dm.toBeDeletedMemId] = dm.memtableData[dm.toBeDeletedMemId] - count
+			log.Debug().Msgf("memtableId: %d, toBeDeletedMemId: %d", memtableId, dm.toBeDeletedMemId)
+			if dm.memtableData[dm.toBeDeletedMemId] != 0 {
+				return fmt.Errorf("memtableData[dm.toBeDeletedMemId] != 0")
+			}
+			delete(dm.memtableData, dm.toBeDeletedMemId)
+			dm.toBeDeletedMemId = memtableId
+			dm.deleteInProgress = false
+			dm.deleteCount = 0
+			return nil
+		} else {
+			dm.memtableData[memtableId] -= count
+			//log.Debug().Msgf("memtableData[%d] = %d", memtableId, dm.memtableData[memtableId])
+		}
+		return nil
+	}
+
+	trimNeeded := dm.wrapFile.TrimHeadIfNeeded()
+	nextAddNeedsDelete := dm.keyIndex.GetRB().NextAddNeedsDelete()
+
+	if trimNeeded || nextAddNeedsDelete {
+		dm.deleteInProgress = true
+		dm.deleteCount = int(dm.memtableData[dm.toBeDeletedMemId] / dm.deleteAmortizedStep)
+		memIdAtHead, err := dm.keyIndex.PeekMemIdAtHead()
+		if err != nil {
+			return err
+		}
+		if memIdAtHead != dm.toBeDeletedMemId {
+			return fmt.Errorf("memIdAtHead: %d, toBeDeletedMemId: %d", memIdAtHead, dm.toBeDeletedMemId)
+		}
+		dm.wrapFile.TrimHead()
+		return nil
+	}
+	return nil
+}
diff --git a/flashring/internal/indicesV3/encoder.go b/flashring/internal/indicesV3/encoder.go
new file mode 100644
index 00000000..6db19207
--- /dev/null
+++ b/flashring/internal/indicesV3/encoder.go
@@ -0,0 +1,82 @@
+package indicesv2
+
+func encode(key string, length, deltaExptime, lastAccess, freq uint16, memId, offset uint32, entry *Entry) {
+
+	d1 := uint64(length&LENGTH_MASK) << LENGTH_SHIFT
+	d1 |= uint64(deltaExptime&DELTA_EXPTIME_MASK) << DELTA_EXPTIME_SHIFT
+	d1 |= uint64(lastAccess&LAST_ACCESS_MASK) << LAST_ACCESS_SHIFT
+	d1 |= uint64(freq&FREQ_MASK) << FREQ_SHIFT
+
+	ByteOrder.PutUint64(entry[:8], d1)
+
+	d2 := uint64(memId&MEM_ID_MASK) << MEM_ID_SHIFT
+	d2 |= uint64(offset&OFFSET_MASK) << OFFSET_SHIFT
+
+	ByteOrder.PutUint64(entry[8:16], d2)
+}
+
+func encodeHashNextPrev(hhi, hlo uint64, prev, next int32, entry *HashNextPrev) {
+	entry[0] = hhi
+	entry[1] = hlo
+	entry[2] = uint64(uint32(prev))<<32 | uint64(uint32(next))
+}
+
+func encodeUpdatePrev(prev int32, entry *HashNextPrev) {
+	next := entry[2] & NEXT_MASK
+	entry[2] = uint64(uint32(prev))<<32 | next
+}
+
+func encodeUpdateNext(next int32, entry *HashNextPrev) {
+	prev := (entry[2] >> 32) & PREV_MASK
+	entry[2] = uint64(uint32(prev))<<32 | uint64(uint32(next))
+}
+
+func decodeNext(entry *HashNextPrev) int32 {
+	return int32(uint32(entry[2] & NEXT_MASK))
+}
+
+func decodePrev(entry *HashNextPrev) int32 {
+	return int32(uint32(entry[2]>>32) & PREV_MASK)
+}
+
+func decodeHashLo(entry *HashNextPrev) uint64 {
+	return entry[1]
+}
+
+func decode(entry *Entry) (length, deltaExptime, lastAccess, freq uint16, memId, offset uint32) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	d2 := ByteOrder.Uint64(entry[8:16])
+
+	length = uint16(d1>>LENGTH_SHIFT) & LENGTH_MASK
+	deltaExptime = uint16(d1>>DELTA_EXPTIME_SHIFT) & DELTA_EXPTIME_MASK
+	lastAccess = uint16(d1>>LAST_ACCESS_SHIFT) & LAST_ACCESS_MASK
+	freq = uint16(d1>>FREQ_SHIFT) & FREQ_MASK
+
+	memId = uint32(d2>>MEM_ID_SHIFT) & MEM_ID_MASK
+	offset = uint32(d2>>OFFSET_SHIFT) & OFFSET_MASK
+
+	return length, deltaExptime, lastAccess, freq, memId, offset
+}
+
+func decodeLastAccessNFreq(entry *Entry) (lastAccess, freq uint16) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	lastAccess = uint16(d1>>LAST_ACCESS_SHIFT) & LAST_ACCESS_MASK
+	freq = uint16(d1>>FREQ_SHIFT) & FREQ_MASK
+
+	return lastAccess, freq
+}
+
+func encodeLastAccessNFreq(lastAccess, freq uint16, entry *Entry) {
+	d1 := ByteOrder.Uint64(entry[:8])
+	d1 |= uint64(lastAccess&LAST_ACCESS_MASK) << LAST_ACCESS_SHIFT
+	d1 |= uint64(freq&FREQ_MASK) << FREQ_SHIFT
+
+	ByteOrder.PutUint64(entry[:8], d1)
+}
+
+func decodeMemIdOffset(entry *Entry) (memId, offset uint32) {
+	d2 := ByteOrder.Uint64(entry[8:16])
+	memId = uint32(d2>>MEM_ID_SHIFT) & MEM_ID_MASK
+	offset = uint32(d2>>OFFSET_SHIFT) & OFFSET_MASK
+	return memId, offset
+}
diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go
new file mode 100644
index 00000000..29261585
--- /dev/null
+++ b/flashring/internal/indicesV3/index.go
@@ -0,0 +1,167 @@
+package indicesv2
+
+import (
+	"errors"
+	"sync"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/maths"
+	"github.com/cespare/xxhash/v2"
+	"github.com/rs/zerolog/log"
+	"github.com/zeebo/xxh3"
+)
+
+var ErrGettingHeadEntry = errors.New("getting head entry failed")
+
+type Status int
+
+const (
+	StatusOK Status = iota
+	StatusNotFound
+	StatusExpired
+)
+
+type Index struct {
+	rm       sync.Map
+	rb       *RingBuffer
+	mc       *maths.MorrisLogCounter
+	startAt  int64
+	hashBits int
+}
+
+func NewIndex(hashBits int, rbInitial, rbMax, deleteAmortizedStep int) *Index {
+	if ByteOrder == nil {
+		loadByteOrder()
+	}
+	// rm := make(map[uint64]int)
+	return &Index{
+		rm:       sync.Map{},
+		rb:       NewRingBuffer(rbInitial, rbMax),
+		mc:       maths.New(12),
+		startAt:  time.Now().Unix(),
+		hashBits: hashBits,
+	}
+}
+
+func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint32) {
+	hhi, hlo := hash128(key)
+	entry, hashNextPrev, idx, _ := i.rb.GetNextFreeSlot()
+	lastAccess := i.generateLastAccess()
+	freq := uint16(1)
+	expiryAt := (time.Now().Unix() / 60) + int64(ttlInMinutes)
+	delta := uint16(expiryAt - (i.startAt / 60))
+	encode(key, length, delta, lastAccess, freq, memId, offset, entry)
+
+	if headIdx, ok := i.rm.Load(hlo); !ok {
+		encodeHashNextPrev(hhi, hlo, -1, -1, hashNextPrev)
+		i.rm.Store(hlo, idx)
+		return
+	} else {
+		_, headHashNextPrev, _ := i.rb.Get(int(headIdx.(int)))
+		encodeUpdatePrev(int32(idx), headHashNextPrev)
+		encodeHashNextPrev(hhi, hlo, -1, int32(headIdx.(int)), hashNextPrev)
+		i.rm.Store(hlo, idx)
+		return
+	}
+
+}
+
+func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq uint64, memId, offset uint32, status Status) {
+	hhi, hlo := hash128(key)
+	if idx, ok := i.rm.Load(hlo); ok {
+		entry, hashNextPrev, _ := i.rb.Get(int(idx.(int)))
+		for {
+			if isHashMatch(hhi, hlo, hashNextPrev) {
+				length, deltaExptime, lastAccess, freq, memId, offset := decode(entry)
+				exptime := int(deltaExptime) + int(i.startAt/60)
+				currentTime := int(time.Now().Unix() / 60)
+				remainingTTL := exptime - currentTime
+				if remainingTTL <= 0 {
+					return 0, 0, 0, 0, 0, 0, StatusExpired
+				}
+				lastAccess = i.generateLastAccess()
+				freq = i.incrFreq(freq)
+				encodeLastAccessNFreq(lastAccess, freq, entry)
+				return length, lastAccess, uint16(remainingTTL), i.mc.Value(uint32(freq)), memId, offset, StatusOK
+			}
+			if hasNext(hashNextPrev) {
+				idx = int(decodeNext(hashNextPrev))
+			} else {
+				return 0, 0, 0, 0, 0, 0, StatusNotFound
+			}
+		}
+
+	}
+	return 0, 0, 0, 0, 0, 0, StatusNotFound
+}
+
+func (ix *Index) Delete(count int) (uint32, int) {
+	for i := 0; i < count; i++ {
+		deleted, deletedHashNextPrev, deletedIdx, next := ix.rb.Delete()
+		if deleted == nil {
+			return 0, -1
+		}
+		delMemId, _ := decodeMemIdOffset(deleted)
+		deletedHlo := decodeHashLo(deletedHashNextPrev)
+		mapIdx, ok := ix.rm.Load(deletedHlo)
+		if ok && mapIdx.(int) == deletedIdx {
+			ix.rm.Delete(deletedHlo)
+		} else if ok && hasPrev(deletedHashNextPrev) {
+			prevIdx := decodePrev(deletedHashNextPrev)
+			_, hashNextPrev, _ := ix.rb.Get(int(prevIdx))
+			encodeUpdateNext(-1, hashNextPrev)
+		} else {
+			log.Warn().Msgf("broken link. Entry in RB but cannot be linked to map. deletedIdx: %d", deletedIdx)
+		}
+
+		nextMemId, _ := decodeMemIdOffset(next)
+		if nextMemId == delMemId+1 {
+			return nextMemId, i + 1
+		} else if nextMemId == delMemId && i == count-1 {
+			return delMemId, i + 1
+		} else if nextMemId == delMemId {
+			continue
+		} else {
+			return 0, -1
+		}
+	}
+	return 0, -1
+}
+
+func (ki *Index) GetRB() *RingBuffer {
+	return ki.rb
+}
+
+func (ki *Index) PeekMemIdAtHead() (uint32, error) {
+	entry, _, ok := ki.rb.Get(ki.rb.head)
+	if !ok {
+		return 0, ErrGettingHeadEntry
+	}
+	memId, _ := decodeMemIdOffset(entry)
+	return memId, nil
+}
+
+func (i *Index) generateLastAccess() uint16 {
+	return uint16((time.Now().Unix() - i.startAt) / 60)
+}
+
+func (i *Index) incrFreq(freq uint16) uint16 {
+	newFreq, _ := i.mc.Inc(uint32(freq))
+	return uint16(newFreq)
+}
+
+func hash128(key string) (uint64, uint64) {
+	return xxhash.Sum64String(key), xxh3.HashString(key)
+}
+
+func isHashMatch(hhi, hlo uint64, entry *HashNextPrev) bool {
+	return entry[0] == hhi && entry[1] == hlo
+}
+
+func hasNext(entry *HashNextPrev) bool {
+	return int32(entry[2]&NEXT_MASK) != -1
+}
+
+func hasPrev(entry *HashNextPrev) bool {
+	return int32((entry[2]>>32)&PREV_MASK) != -1
+}
diff --git a/flashring/internal/indicesV3/index_test.go b/flashring/internal/indicesV3/index_test.go
new file mode 100644
index 00000000..3eecea9d
--- /dev/null
+++ b/flashring/internal/indicesV3/index_test.go
@@ -0,0 +1,224 @@
+package indicesv2
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestIndexAddRbMax(t *testing.T) {
+	loadByteOrder()
+
+	// Use equal initial and max capacity for the fixed-size ring buffer.
+	rbMax := 1000_000
+	rbInitial := rbMax
+	hashBits := 16
+	idx := NewIndex(hashBits, rbInitial, rbMax, 1)
+
+	// Insert exactly rbMax distinct keys
+	for i := 0; i < rbMax; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120) // ensure no expiry during test
+		memID := uint32(1000 + i)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	// All keys should be present in the reverse map
+	if got := len(idx.rm); got != rbMax {
+		t.Fatalf("expected %d keys in index map, got %d", rbMax, got)
+	}
+
+	// After filling to capacity, next add should require delete (ring wrapped)
+	if !idx.rb.NextAddNeedsDelete() {
+		t.Fatalf("expected ring buffer to report NextAddNeedsDelete == true after %d inserts", rbMax)
+	}
+
+	// Verify we can Get every inserted key and fields match
+	for i := 0; i < rbMax; i++ {
+		key := fmt.Sprintf("k%d", i)
+		expLength := uint16(100 + i)
+		expMemID := uint32(1000 + i)
+		expOffset := uint32(2000 + i)
+
+		length, _, _, _, memID, offset, status := idx.Get(key)
+		if status != StatusOK {
+			t.Fatalf("Get(%q) status = %v, want %v", key, status, StatusOK)
+		}
+		if length != expLength {
+			t.Fatalf("Get(%q) length = %d, want %d", key, length, expLength)
+		}
+		if memID != expMemID {
+			t.Fatalf("Get(%q) memID = %d, want %d", key, memID, expMemID)
+		}
+		if offset != expOffset {
+			t.Fatalf("Get(%q) offset = %d, want %d", key, offset, expOffset)
+		}
+	}
+}
+
+func TestIndexDeleteAndGet(t *testing.T) {
+	loadByteOrder()
+
+	// Keep this small and fast
+	rbMax := 99
+	rbInitial := rbMax
+	hashBits := 16
+	idx := NewIndex(hashBits, rbInitial, rbMax, 1)
+
+	// Insert exactly rbMax distinct keys in order
+	for i := 0; i < 33; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(1)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	for i := 33; i < 66; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(2)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+	for i := 66; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(3)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	if len(idx.rm) != rbMax {
+		t.Fatalf("expected %d keys after fill, got %d", rbMax, len(idx.rm))
+	}
+
+	// Ensure buffer is in the full state (next add would need delete)
+	if !idx.rb.NextAddNeedsDelete() {
+		t.Fatalf("expected NextAddNeedsDelete() to be true after fill")
+	}
+
+	for i := 0; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusOK {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+		}
+	}
+	// Delete oldest entries one-by-one and verify via Get
+	toDelete := 33
+	idx.Delete(toDelete)
+
+	if len(idx.rm) != rbMax-toDelete {
+		t.Fatalf("expected map size %d after deletes, got %d", rbMax-toDelete, len(idx.rm))
+	}
+
+	for i := 0; i < toDelete; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusNotFound {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusNotFound)
+		}
+	}
+
+	for i := toDelete; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusOK {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+		}
+	}
+}
+
+func TestIndexDeleteAndGetOverlappingHash(t *testing.T) {
+	loadByteOrder()
+
+	// Keep this small and fast
+	rbMax := 99
+	rbInitial := rbMax
+	hashBits := 16
+	idx := NewIndex(hashBits, rbInitial, rbMax, 1)
+
+	// Insert exactly rbMax distinct keys in order
+	for i := 0; i < 33; i++ {
+		key := fmt.Sprintf("k%d", i%33)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(1)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	for i := 33; i < 66; i++ {
+		key := fmt.Sprintf("k%d", i%33)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(2)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+	for i := 66; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		length := uint16(100 + i)
+		ttlMinutes := uint16(120)
+		memID := uint32(3)
+		offset := uint32(2000 + i)
+		idx.Put(key, length, ttlMinutes, memID, offset)
+	}
+
+	if len(idx.rm) != 2*rbMax/3 {
+		t.Fatalf("expected %d keys after fill, got %d", 2*rbMax/3, len(idx.rm))
+	}
+
+	// Ensure buffer is in the full state (next add would need delete)
+	if !idx.rb.NextAddNeedsDelete() {
+		t.Fatalf("expected NextAddNeedsDelete() to be true after fill")
+	}
+
+	for i := 0; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if i >= 0 && i < 33 || i >= 66 && i < 99 {
+			if st != StatusOK {
+				t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+			}
+		} else {
+			if st != StatusNotFound {
+				t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusNotFound)
+			}
+		}
+	}
+	// Delete oldest entries one-by-one and verify via Get
+	toDelete := 33
+	idx.Delete(toDelete)
+
+	if len(idx.rm) != rbMax-toDelete {
+		t.Fatalf("expected map size %d after deletes, got %d", rbMax-toDelete, len(idx.rm))
+	}
+
+	for i := 0; i < toDelete; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if st != StatusOK {
+			t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+		}
+	}
+
+	for i := toDelete; i < 99; i++ {
+		key := fmt.Sprintf("k%d", i)
+		_, _, _, _, _, _, st := idx.Get(key)
+		if i >= 0 && i < 33 || i >= 66 && i < 99 {
+			if st != StatusOK {
+				t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusOK)
+			}
+		} else {
+			if st != StatusNotFound {
+				t.Fatalf("Get(%q) status=%v, want %v", key, st, StatusNotFound)
+			}
+		}
+	}
+}
diff --git a/flashring/internal/indicesV3/rb.go b/flashring/internal/indicesV3/rb.go
new file mode 100644
index 00000000..10850bb3
--- /dev/null
+++ b/flashring/internal/indicesV3/rb.go
@@ -0,0 +1,94 @@
+package indicesv2
+
+// Entry represents a 32-byte value. Adjust fields as needed.
+type Entry [16]byte
+type HashNextPrev [3]uint64
+
+// RingBuffer is a fixed-size circular queue that wraps around when full.
+// It maintains a sliding window of the most recent entries. Add returns an
+// absolute index which can be used with Get.
+type RingBuffer struct {
+	buf       []Entry
+	hashTable []HashNextPrev
+	head      int
+	tail      int
+	size      int
+	nextIndex int
+	capacity  int // Fixed capacity (initial = max)
+	wrapped   bool
+}
+
+// NewRingBuffer creates a ring buffer with the given initial and maximum
+// capacity. Since we use a fixed-size buffer, initial and max should be the same.
+func NewRingBuffer(initial, max int) *RingBuffer {
+	if initial <= 0 || initial > max {
+		panic("invalid capacity")
+	}
+	// Use max capacity for fixed-size buffer (initial = max in practice)
+	capacity := max
+	return &RingBuffer{
+		buf:       make([]Entry, capacity),
+		hashTable: make([]HashNextPrev, capacity),
+		capacity:  capacity,
+		wrapped:   false,
+	}
+}
+
+// Add inserts e into the buffer and returns its absolute index. When the buffer
+// is full it wraps around and overwrites the oldest entry.
+func (rb *RingBuffer) Add(e *Entry) int {
+	// Store the entry at current tail position
+	rb.buf[rb.nextIndex] = *e
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	if rb.nextIndex == rb.head {
+		rb.head = (rb.head + 1) % rb.capacity
+	}
+
+	return idx
+}
+
+func (rb *RingBuffer) NextAddNeedsDelete() bool {
+	return rb.nextIndex == rb.head && rb.wrapped
+}
+
+func (rb *RingBuffer) GetNextFreeSlot() (*Entry, *HashNextPrev, int, bool) {
+	idx := rb.nextIndex
+	rb.nextIndex = (rb.nextIndex + 1) % rb.capacity
+	shouldDelete := false
+	if rb.nextIndex == rb.head {
+		// rb.head = (rb.head + 1) % rb.capacity
+		rb.wrapped = true
+		shouldDelete = true
+
+	}
+	return &rb.buf[idx], &rb.hashTable[idx], idx, shouldDelete
+}
+
+// Get retrieves an entry by its absolute index. The boolean return is false if
+// the index is out of range (either overwritten or not yet added).
+func (rb *RingBuffer) Get(index int) (*Entry, *HashNextPrev, bool) {
+	// Calculate the valid window based on current state
+	if index > rb.capacity {
+		return nil, nil, false
+	}
+	return &rb.buf[index], &rb.hashTable[index], true
+}
+
+// Delete removes the oldest entry from the buffer if it is not empty.
+// For a fixed-size ring buffer, this only decreases size if not at capacity.
+func (rb *RingBuffer) Delete() (*Entry, *HashNextPrev, int, *Entry) {
+	deletedIdx := rb.head
+	deleted := rb.buf[rb.head]
+	deletedHashNextPrev := rb.hashTable[rb.head]
+	rb.head = (rb.head + 1) % rb.capacity
+	return &deleted, &deletedHashNextPrev, deletedIdx, &rb.buf[rb.head]
+}
+
+// TailIndex returns the absolute index that will be assigned to the next Add.
+func (rb *RingBuffer) TailIndex() int {
+	return rb.nextIndex
+}
+func (rb *RingBuffer) ActiveEntries() int {
+	return (rb.nextIndex - rb.head + rb.capacity) % rb.capacity
+}
diff --git a/flashring/internal/indicesV3/rb_bench_test.go b/flashring/internal/indicesV3/rb_bench_test.go
new file mode 100644
index 00000000..0baeece0
--- /dev/null
+++ b/flashring/internal/indicesV3/rb_bench_test.go
@@ -0,0 +1,22 @@
+package indicesv2
+
+import (
+	"testing"
+)
+
+// BenchmarkRingBufferPush50M benchmarks pushing 50 million elements to the ring buffer
+func BenchmarkRingBufferPush50M(b *testing.B) {
+	rb := NewRingBuffer(1000, 50_000_000)
+
+	b.ResetTimer()
+	b.Run("Add", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Add(&Entry{})
+		}
+	})
+	b.Run("Get", func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			rb.Get(i)
+		}
+	})
+}
diff --git a/flashring/internal/indicesV3/system.go b/flashring/internal/indicesV3/system.go
new file mode 100644
index 00000000..a5368576
--- /dev/null
+++ b/flashring/internal/indicesV3/system.go
@@ -0,0 +1,50 @@
+package indicesv2
+
+import (
+	"encoding/binary"
+	"unsafe"
+)
+
+var ByteOrder *CustomByteOrder
+
+type CustomByteOrder struct {
+	binary.ByteOrder
+}
+
+func loadByteOrder() {
+	buf := [2]byte{}
+	*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
+
+	switch buf {
+	case [2]byte{0xCD, 0xAB}:
+		ByteOrder = &CustomByteOrder{binary.LittleEndian}
+	case [2]byte{0xAB, 0xCD}:
+		ByteOrder = &CustomByteOrder{binary.BigEndian}
+	default:
+		panic("Could not determine endianness.")
+	}
+}
+
+func (c *CustomByteOrder) PutInt64(b []byte, v int64) {
+	c.PutUint64(b, uint64(v))
+}
+
+func (c *CustomByteOrder) Int64(b []byte) int64 {
+	return int64(c.Uint64(b))
+}
+
+func (c *CustomByteOrder) PutInt32(b []byte, v int32) {
+	c.PutUint32(b, uint32(v))
+}
+
+func (c *CustomByteOrder) Int32(b []byte) int32 {
+	return int32(c.Uint32(b))
+}
+
+func (c *CustomByteOrder) PutUint32(b []byte, v uint32) {
+	c.ByteOrder.PutUint32(b, v)
+}
+
+func (c *CustomByteOrder) Uint32(b []byte) uint32 {
+	return c.ByteOrder.Uint32(b)
+}
diff --git a/flashring/internal/maths/estimator.go b/flashring/internal/maths/estimator.go
new file mode 100644
index 00000000..f477d96e
--- /dev/null
+++ b/flashring/internal/maths/estimator.go
@@ -0,0 +1,178 @@
+// Package estimator implements online adaptive grid search for tuning
+// weights (wFreq, wLA) to optimize cache rewrite decisions based on hit ratio.
+package maths
+
+import (
+	"math"
+	"time"
+)
+
+const (
+	missBaseline = float64(1e-9)
+)
+
+type WeightTuple struct {
+	WFreq float64
+	WLA   float64
+}
+
+type Stats struct {
+	HitRate float64 // averaged hit rate over time window
+	Trials  int
+}
+
+type GridSearchEstimator struct {
+	Tuples         []WeightTuple
+	InitialTuples  []WeightTuple
+	bestTuple      WeightTuple
+	TupleStats     map[WeightTuple]*Stats
+	CurrIndex      int
+	StartTime      time.Time
+	Duration       time.Duration
+	LiveEstimator  *Estimator
+	stopGridSearch bool
+	bestHitRate    float64
+	epsilon        float64
+}
+
+type Estimator struct {
+	WFreq float64
+	WLA   float64
+}
+
+func NewGridSearchEstimator(duration time.Duration, initialTuples []WeightTuple, estimator *Estimator, epsilon float64) *GridSearchEstimator {
+	return &GridSearchEstimator{
+		Tuples:         initialTuples,
+		InitialTuples:  initialTuples,
+		bestTuple:      initialTuples[0],
+		TupleStats:     make(map[WeightTuple]*Stats),
+		CurrIndex:      0,
+		StartTime:      time.Now(),
+		Duration:       duration,
+		LiveEstimator:  estimator,
+		bestHitRate:    0,
+		stopGridSearch: false,
+		epsilon:        epsilon,
+	}
+}
+
+func (e *Estimator) CalculateRewriteScore(freq uint64, lastAccess uint64, keyMemId, activeMemId, maxMemTableCount uint32) float32 {
+	overWriteRisk := (activeMemId - keyMemId + maxMemTableCount) % maxMemTableCount
+	overWriteRiskScore := float32(overWriteRisk) / float32(maxMemTableCount)
+
+	fScore := 1 - math.Exp(-e.WFreq*float64(freq))
+	laScore := math.Exp(-e.WLA * float64(lastAccess))
+	return float32(fScore+laScore) * overWriteRiskScore
+}
+
+func (g *GridSearchEstimator) RecordHitRate(hitRate float64) {
+	if g.stopGridSearch {
+		tuple := g.bestTuple
+		if _, ok := g.TupleStats[tuple]; !ok {
+			g.TupleStats[tuple] = &Stats{}
+		}
+		stat := g.TupleStats[tuple]
+		stat.HitRate = (stat.HitRate*float64(stat.Trials) + hitRate) / float64(stat.Trials+1)
+		stat.Trials++
+		if stat.HitRate < g.bestHitRate*0.9 {
+			g.RestartGridSearch()
+		}
+		return
+	}
+	tuple := g.Tuples[g.CurrIndex]
+	if _, ok := g.TupleStats[tuple]; !ok {
+		g.TupleStats[tuple] = &Stats{}
+	}
+	stat := g.TupleStats[tuple]
+	stat.HitRate = (stat.HitRate*float64(stat.Trials) + hitRate) / float64(stat.Trials+1)
+	stat.Trials++
+
+	if time.Since(g.StartTime) < g.Duration {
+		return
+	}
+	// Advance to next tuple
+	g.CurrIndex = (g.CurrIndex + 1) % len(g.Tuples)
+	if g.CurrIndex == 0 {
+		ok := g.RefineGridAroundBest(2, 0.001)
+		if !ok {
+			g.stopGridSearch = true
+			return
+		}
+	}
+	g.StartTime = time.Now()
+
+	// Update live estimator
+	next := g.Tuples[g.CurrIndex]
+	g.LiveEstimator.WFreq = next.WFreq
+	g.LiveEstimator.WLA = next.WLA
+}
+
+func (g *GridSearchEstimator) BestTuple() WeightTuple {
+
+	best := WeightTuple{}
+	bestScore := -1.0
+
+	for _, tup := range g.Tuples {
+		stat := g.TupleStats[tup]
+		if stat == nil || stat.Trials < 3 {
+			continue
+		}
+		if stat.HitRate > bestScore {
+			bestScore = stat.HitRate
+			best = tup
+		}
+	}
+
+	return best
+}
+
+func (g *GridSearchEstimator) GenerateRefinedGrid(base WeightTuple, steps int, delta float64) ([]WeightTuple, bool) {
+	refined := make([]WeightTuple, 0, (2*steps+1)*(2*steps+1))
+	for i := -steps; i <= steps; i++ {
+		for j := -steps; j <= steps; j++ {
+			wf := base.WFreq + float64(i)*delta
+			la := base.WLA + float64(j)*delta
+			if math.Abs(wf-base.WFreq) < g.epsilon && math.Abs(la-base.WLA) < g.epsilon {
+				return refined, false
+			}
+			if wf > 0 && la > 0 {
+				refined = append(refined, WeightTuple{wf, la})
+			}
+		}
+	}
+	return refined, true
+}
+
+func (g *GridSearchEstimator) RefineGridAroundBest(steps int, delta float64) bool {
+	best := g.BestTuple()
+	refined, ok := g.GenerateRefinedGrid(best, steps, delta)
+	if !ok {
+		g.LiveEstimator.WFreq = best.WFreq
+		g.LiveEstimator.WLA = best.WLA
+		g.bestHitRate = g.TupleStats[best].HitRate
+		g.bestTuple = best
+		return false
+	}
+	g.Tuples = refined
+	g.CurrIndex = 0
+	g.TupleStats = make(map[WeightTuple]*Stats)
+	g.LiveEstimator.WFreq = g.Tuples[0].WFreq
+	g.LiveEstimator.WLA = g.Tuples[0].WLA
+	g.StartTime = time.Now()
+	return true
+}
+
+func (g *GridSearchEstimator) RestartGridSearch() {
+	g.stopGridSearch = false
+	g.Tuples = g.InitialTuples
+	g.CurrIndex = 0
+	g.TupleStats = make(map[WeightTuple]*Stats)
+	g.LiveEstimator.WFreq = g.Tuples[0].WFreq
+	g.LiveEstimator.WLA = g.Tuples[0].WLA
+	g.StartTime = time.Now()
+	g.bestHitRate = 0
+}
+
+func (g *GridSearchEstimator) IsGridSearchActive() bool {
+	return !g.stopGridSearch
+}
diff --git a/flashring/internal/maths/freq.go b/flashring/internal/maths/freq.go
new file mode 100644
index 00000000..3471c123
--- /dev/null
+++ b/flashring/internal/maths/freq.go
@@ -0,0 +1,140 @@
+// freq.go
+package maths
+
+/*
+Package maths implements a **decimal Morris‑style probabilistic counter**
+compressed into a single `uint32`.
+
+------------------------------------------------------------------------
+How the algorithm works
+------------------------------------------------------------------------
+1. **Layout (24 bits)**
+| exponent : 20 bits | mantissa : 4 bits |
+e m (0‑9)
+The counter encodes ≈ `m · 10ᵉ`.
+* Mantissa cycles at 10 (`mOverflow = 10`).
+* `expClamp` chosen at construction bounds the maximum exponent.
+
+2. **Increment rule**
+On each logical “event”, we increment the *stored* value only with
+probability **1 / 10ᵉ** (Bernoulli trial).
+- A 32‑bit xorshift PRNG generates `rand32()`.
+- We pre‑compute `th[e] = ⌊2³² / 10ᵉ⌋`.
+- `rand32() < th[e]` ⇢ *hit* ⇒ advance mantissa (`m++`).
+- When `m == 10` we reset `m = 0` and bump the exponent
+  (until `expClamp`, then we saturate).
+
+This is the classic idea introduced by **Robert Morris** for “counting
+large numbers of events in small registers” :contentReference[oaicite:0]{index=0}.
+
+3. **Decoding**
+To retrieve an approximate frequency, multiply
+`m · 10ᵉ` (done with a tiny `pow10` table).
+
+4. **Error guarantees**
+For mantissa 0‑9 the standard deviation of the estimate is
+`σ ≈ √m · 10ᵉ`, so the relative error is ≤ `1/√m`
+(≤ 33 % worst‑case, ≤ 10 % once `m ≥ 10`).
+Such accuracy is typical for Morris‑style counters used in
+streaming & LFU/TinyLFU cache admission :contentReference[oaicite:1]{index=1}.
+
+5. **Complexity & footprint**
+* **State per key:** 4 bytes.
+* **Increment:** ~7 integer ops for PRNG + 1 compare + a few bit‑ops
+  ⇒ ~5‑7 ns on modern CPUs (counter update is usually cheaper than the
+  surrounding map/slice access).
+* **No floating‑point or division** in the hot path; thresholds
+  are prepared once in `New`.
+
+------------------------------------------------------------------------
+References
+------------------------------------------------------------------------
+* R. Morris. “Counting large numbers of events in small registers.”
+*Communications of the ACM*, 21(10): 840‑842, 1978. :contentReference[oaicite:2]{index=2}
+* P. Flajolet. “Approximate Counting: A Detailed Analysis.” *BIT* 25, 1985. :contentReference[oaicite:3]{index=3}
+* G. Gundersen, “Approximate Counting with Morris’s Algorithm,” blog post, 2019. :contentReference[oaicite:4]{index=4}
+*/
+
+// 4‑bit mantissa (0‑9).  20‑bit exponent (0 … expClamp).
+const (
+	mBits     = 4
+	eBits     = 24 - mBits
+	mMask     = (1 << mBits) - 1 // 0xF
+	eShift    = mBits
+	mOverflow = 10 // mantissa cycles at 10
+)
+
+// ----------- fast RNG (xorshift32) --------
+var rng uint32 = 0x7263b8e4 // non‑zero seed
+
+type MorrisLogCounter struct {
+	th       []uint32 // thresholds   th[e] = floor(2^32 / 10^e)
+	pow10    []uint64 // pow10[e] = 10^e
+	expClamp uint32
+	rng      uint32
+}
+
+// New prepares tables for a desired exponent ceiling.
+// expClamp must fit in the 20‑bit exponent field.
+func New(expClamp uint32) *MorrisLogCounter {
+	if expClamp >= 1<<eBits {
+		panic("expClamp exceeds 20‑bit exponent capacity")
+	}
+
+	th := make([]uint32, expClamp+1)
+	pow10 := make([]uint64, expClamp+1)
+
+	var p10 uint64 = 1 // 10^0
+	max32 := uint64(^uint32(0))
+	for e := uint32(0); e <= expClamp; e++ {
+		if e > 0 {
+			p10 *= 10
+		}
+		pow10[e] = p10
+		th[e] = uint32(max32 / p10) // floor(2^32 / 10^e)
+	}
+
+	return &MorrisLogCounter{
+		th:       th,
+		pow10:    pow10,
+		expClamp: expClamp,
+		rng:      rng,
+	}
+}
+
+func (c *MorrisLogCounter) Inc(v uint32) (uint32, bool) {
+	m := v & mMask   // mantissa
+	e := v >> eShift // exponent (0 … expClamp)
+
+	// 1 / 10^e probability check
+	if c.rand32() >= c.th[e] {
+		return v, false // miss
+	}
+
+	// hit
+	m++
+	if m == mOverflow {
+		m = 0
+		if e < c.expClamp {
+			e++
+		} else { // saturated at top state
+			m = mOverflow - 1
+		}
+	}
+	return (e << eShift) | m, true
+}
+
+func (c *MorrisLogCounter) Value(v uint32) uint64 {
+	m := uint64(v & mMask)
+	e := v >> eShift
+	return m * c.pow10[e]
+}
+
+func (c *MorrisLogCounter) rand32() uint32 {
+	r := rng
+	r ^= r << 13
+	r ^= r >> 17
+	r ^= r << 5
+	rng = r
+	return r
+}
diff --git a/flashring/internal/maths/freq_test.go b/flashring/internal/maths/freq_test.go
new file mode 100644
index 00000000..4eae335f
--- /dev/null
+++ b/flashring/internal/maths/freq_test.go
@@ -0,0 +1,402 @@
+package maths
+
+import (
+	"testing"
+)
+
+func TestNew(t *testing.T) {
+	tests := []struct {
+		name     string
+		expClamp uint32
+		wantErr  bool
+	}{
+		{
+			name:     "valid small expClamp",
+			expClamp: 5,
+			wantErr:  false,
+		},
+		{
+			name:     "valid zero expClamp",
+			expClamp: 0,
+			wantErr:  false,
+		},
+		{
+			name:     "valid medium expClamp",
+			expClamp: 15, // smaller reasonable test value
+			wantErr:  false,
+		},
+		{
+			name:     "invalid expClamp exceeds 20-bit",
+			expClamp: 1 << eBits, // exceeds 20-bit capacity
+			wantErr:  true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			defer func() {
+				if r := recover(); (r != nil) != tt.wantErr {
+					t.Errorf("New() panic = %v, wantErr %v", r != nil, tt.wantErr)
+				}
+			}()
+
+			counter := New(tt.expClamp)
+			if !tt.wantErr {
+				if counter == nil {
+					t.Error("New() returned nil for valid input")
+					return
+				}
+				if counter.expClamp != tt.expClamp {
+					t.Errorf("New() expClamp = %v, want %v", counter.expClamp, tt.expClamp)
+				}
+				if len(counter.th) != int(tt.expClamp+1) {
+					t.Errorf("New() threshold table length = %v, want %v", len(counter.th), tt.expClamp+1)
+				}
+				if len(counter.pow10) != int(tt.expClamp+1) {
+					t.Errorf("New() pow10 table length = %v, want %v", len(counter.pow10), tt.expClamp+1)
+				}
+			}
+		})
+	}
+}
+
+func TestPow10Table(t *testing.T) {
+	counter := New(5)
+
+	expected := []uint64{1, 10, 100, 1000, 10000, 100000}
+	for i, exp := range expected {
+		if counter.pow10[i] != exp {
+			t.Errorf("pow10[%d] = %v, want %v", i, counter.pow10[i], exp)
+		}
+	}
+}
+
+func TestThresholdTable(t *testing.T) {
+	counter := New(3)
+
+	// th[e] should equal floor(2^32 / 10^e)
+	max32 := uint64(^uint32(0)) // 2^32 - 1
+
+	for e := uint32(0); e <= 3; e++ {
+		var pow10e uint64 = 1
+		for i := uint32(0); i < e; i++ {
+			pow10e *= 10
+		}
+		expected := uint32(max32 / pow10e)
+		if counter.th[e] != expected {
+			t.Errorf("th[%d] = %v, want %v", e, counter.th[e], expected)
+		}
+	}
+}
+
+func TestValue(t *testing.T) {
+	counter := New(5)
+
+	tests := []struct {
+		name     string
+		v        uint32
+		expected uint64
+	}{
+		{
+			name:     "mantissa 0, exponent 0",
+			v:        0, // m=0, e=0
+			expected: 0,
+		},
+		{
+			name:     "mantissa 5, exponent 0",
+			v:        5, // m=5, e=0
+			expected: 5,
+		},
+		{
+			name:     "mantissa 3, exponent 1",
+			v:        (1 << eShift) | 3, // m=3, e=1
+			expected: 30,
+		},
+		{
+			name:     "mantissa 7, exponent 2",
+			v:        (2 << eShift) | 7, // m=7, e=2
+			expected: 700,
+		},
+		{
+			name:     "mantissa 9, exponent 3",
+			v:        (3 << eShift) | 9, // m=9, e=3
+			expected: 9000,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := counter.Value(tt.v)
+			if result != tt.expected {
+				t.Errorf("Value(%v) = %v, want %v", tt.v, result, tt.expected)
+			}
+		})
+	}
+}
+
+func TestIncBasicBehavior(t *testing.T) {
+	counter := New(5)
+
+	// Test mantissa increment when increment succeeds
+	// We'll force hits by setting a predictable RNG state
+	originalRng := rng
+	defer func() { rng = originalRng }()
+
+	// Set RNG to always return 0 (guaranteed hit)
+	rng = 0
+
+	v := uint32(5) // m=5, e=0
+	newV, hit := counter.Inc(v)
+
+	if !hit {
+		t.Error("Inc() should have hit with RNG=0")
+	}
+
+	expectedM := uint32(6)
+	expectedE := uint32(0)
+	expectedV := (expectedE << eShift) | expectedM
+
+	if newV != expectedV {
+		t.Errorf("Inc(%v) = %v, want %v", v, newV, expectedV)
+	}
+}
+
+func TestIncMantissaOverflow(t *testing.T) {
+	counter := New(5)
+
+	// Force hits by setting RNG to 0
+	originalRng := rng
+	defer func() { rng = originalRng }()
+	rng = 0
+
+	// Test mantissa overflow: m=9 -> m=0, e++
+	v := uint32(9) // m=9, e=0
+	newV, hit := counter.Inc(v)
+
+	if !hit {
+		t.Error("Inc() should have hit with RNG=0")
+	}
+
+	expectedM := uint32(0)
+	expectedE := uint32(1)
+	expectedV := (expectedE << eShift) | expectedM
+
+	if newV != expectedV {
+		t.Errorf("Inc(%v) = %v, want %v (m=0, e=1)", v, newV, expectedV)
+	}
+}
+
+func TestIncExponentSaturation(t *testing.T) {
+	counter := New(2) // expClamp = 2
+
+	// Force hits by setting RNG to 0
+	originalRng := rng
+	defer func() { rng = originalRng }()
+	rng = 0
+
+	// Test saturation at expClamp: m=9, e=expClamp
+	v := (uint32(2) << eShift) | 9 // m=9, e=2 (at expClamp)
+	newV, hit := counter.Inc(v)
+
+	if !hit {
+		t.Error("Inc() should have hit with RNG=0")
+	}
+
+	// Should saturate at m=9, e=2 (not overflow)
+	expectedM := uint32(9) // mOverflow - 1
+	expectedE := uint32(2) // stays at expClamp
+	expectedV := (expectedE << eShift) | expectedM
+
+	if newV != expectedV {
+		t.Errorf("Inc(%v) = %v, want %v (saturated)", v, newV, expectedV)
+	}
+}
+
+func TestIncMissBehavior(t *testing.T) {
+	counter := New(5)
+
+	originalRng := rng
+	defer func() { rng = originalRng }()
+
+	// Use a higher exponent where th[e] is smaller and easier to exceed
+	// th[3] = 4294967 (from debug output)
+	v := uint32((3 << eShift) | 5) // m=5, e=3
+
+	// Find an RNG value that will cause rand32() to return >= th[3]
+	// We'll try a few seeds until we find one that causes a miss
+	missFound := false
+	for seed := uint32(0xFFFFFF00); seed != 0; seed++ {
+		rng = seed
+		testRand := counter.rand32()
+		if testRand >= counter.th[3] {
+			// Reset and use this seed
+			rng = seed
+			newV, hit := counter.Inc(v)
+
+			if !hit && newV == v {
+				missFound = true
+				break
+			}
+		}
+	}
+
+	if !missFound {
+		t.Skip("Could not find RNG seed that causes miss - test may be flaky")
+	}
+}
+
+func TestIncStatisticalBehavior(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping statistical test in short mode")
+	}
+
+	counter := New(10)
+
+	// Reset RNG to ensure reproducible but varied sequence
+	originalRng := rng
+	defer func() { rng = originalRng }()
+	rng = 12345
+
+	// Test with e=0 (should hit approximately 100% of the time)
+	v := uint32(5) // m=5, e=0
+	hits := 0
+	trials := 1000
+
+	for i := 0; i < trials; i++ {
+		_, hit := counter.Inc(v)
+		if hit {
+			hits++
+		}
+	}
+
+	// With e=0, probability should be close to 1.0
+	hitRate := float64(hits) / float64(trials)
+	if hitRate < 0.95 { // Allow some variance due to PRNG
+		t.Errorf("Hit rate for e=0 = %v, want > 0.95", hitRate)
+	}
+
+	// Test with e=1 (should hit approximately 10% of the time)
+	v = (1 << eShift) | 5 // m=5, e=1
+	hits = 0
+
+	for i := 0; i < trials; i++ {
+		_, hit := counter.Inc(v)
+		if hit {
+			hits++
+		}
+	}
+
+	hitRate = float64(hits) / float64(trials)
+	// Allow reasonable variance: 0.05 to 0.15 for 10% expected
+	if hitRate < 0.05 || hitRate > 0.15 {
+		t.Errorf("Hit rate for e=1 = %v, want ~0.10 (0.05-0.15)", hitRate)
+	}
+}
+
+func TestIntegrationCountingApproximation(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	counter := New(10)
+
+	// Reset RNG to ensure reproducible results
+	originalRng := rng
+	defer func() { rng = originalRng }()
+	rng = 98765
+
+	// Simulate counting events - start with higher initial state
+	v := uint32(5) // start with m=5, e=0 to avoid edge cases
+	actualIncrements := 0
+
+	// Perform many logical increments
+	for i := 0; i < 10000; i++ {
+		newV, hit := counter.Inc(v)
+		if hit {
+			v = newV
+			actualIncrements++
+		}
+	}
+
+	// Get the approximate count
+	approxCount := counter.Value(v)
+
+	// Since we started with m=5, the base count is 5
+	// The approximation should account for this
+	if actualIncrements == 0 && approxCount == 5 {
+		// If no actual increments happened, approxCount should still be the initial value
+		return
+	}
+
+	// The approximation should be reasonable
+	// Given the probabilistic nature, we expect some error
+	if actualIncrements > 0 && approxCount > 0 {
+		ratio := float64(approxCount) / float64(actualIncrements+5) // +5 for initial value
+
+		// The ratio should be reasonably close to 1.0
+		// Morris counters can have significant variance, so we allow a wide range
+		if ratio < 0.1 || ratio > 10.0 {
+			t.Errorf("Approximation ratio = %v, actualIncrements = %v, approxCount = %v",
+				ratio, actualIncrements, approxCount)
+		}
+	}
+}
+
+func TestBitPacking(t *testing.T) {
+	// Test that mantissa and exponent are properly packed/unpacked
+	counter := New(5)
+
+	tests := []struct {
+		mantissa uint32
+		exponent uint32
+	}{
+		{0, 0},
+		{9, 0},
+		{0, 5},
+		{7, 3},
+		{15, 2}, // This tests mantissa > 9 (should mask to 4 bits)
+	}
+
+	for _, tt := range tests {
+		v := (tt.exponent << eShift) | (tt.mantissa & mMask)
+
+		extractedM := v & mMask
+		extractedE := v >> eShift
+
+		expectedM := tt.mantissa & mMask // masked to 4 bits
+
+		if extractedM != expectedM {
+			t.Errorf("Mantissa packing: got %v, want %v", extractedM, expectedM)
+		}
+		if extractedE != tt.exponent {
+			t.Errorf("Exponent packing: got %v, want %v", extractedE, tt.exponent)
+		}
+
+		// Test Value() decoding
+		decoded := counter.Value(v)
+		expected := uint64(expectedM) * counter.pow10[tt.exponent]
+		if decoded != expected {
+			t.Errorf("Value() = %v, want %v", decoded, expected)
+		}
+	}
+}
+
+func BenchmarkInc(b *testing.B) {
+	counter := New(10)
+	v := uint32(123)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		v, _ = counter.Inc(v)
+	}
+}
+
+func BenchmarkValue(b *testing.B) {
+	counter := New(10)
+	v := uint32(123)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_ = counter.Value(v)
+	}
+}
diff --git a/flashring/internal/maths/predictor.go b/flashring/internal/maths/predictor.go
new file mode 100644
index 00000000..edf3b128
--- /dev/null
+++ b/flashring/internal/maths/predictor.go
@@ -0,0 +1,58 @@
+package maths
+
+import "time"
+
+type Params struct {
+	Freq        uint64
+	LastAccess  uint64
+	KeyMemId    uint32
+	ActiveMemId uint32
+}
+type Predictor struct {
+	Estimator             *Estimator
+	GridSearchEstimator   *GridSearchEstimator
+	ReWriteScoreThreshold float32
+	MaxMemTableCount      uint32
+	hitRateCh             chan float64
+}
+
+type PredictorConfig struct {
+	ReWriteScoreThreshold float32
+	Weights               []WeightTuple
+	SampleDuration        time.Duration
+	MaxMemTableCount      uint32
+	GridSearchEpsilon     float64
+}
+
+func NewPredictor(config PredictorConfig) *Predictor {
+	estimator := &Estimator{
+		WFreq: config.Weights[0].WFreq,
+		WLA:   config.Weights[0].WLA,
+	}
+	gridSearchEstimator := NewGridSearchEstimator(config.SampleDuration, config.Weights, estimator, config.GridSearchEpsilon)
+	p := &Predictor{
+		Estimator:             estimator,
+		GridSearchEstimator:   gridSearchEstimator,
+		ReWriteScoreThreshold: config.ReWriteScoreThreshold,
+		MaxMemTableCount:      config.MaxMemTableCount,
+		hitRateCh:             make(chan float64, 1024),
+	}
+	go func() {
+		for hitRate := range p.hitRateCh {
+			p.GridSearchEstimator.RecordHitRate(hitRate)
+		}
+	}()
+	return p
+}
+
+func (p *Predictor) Predict(freq uint64, lastAccess uint64, keyMemId uint32, activeMemId uint32) bool {
+	score := p.Estimator.CalculateRewriteScore(freq, lastAccess, keyMemId, activeMemId, p.MaxMemTableCount)
+	return score > p.ReWriteScoreThreshold
+}
+
+func (p *Predictor) Observe(hitRate float64) {
+	select {
+	case p.hitRateCh <- hitRate:
+	default:
+	}
+}
diff --git a/flashring/internal/maths/predictor_test.go b/flashring/internal/maths/predictor_test.go
new file mode 100644
index 00000000..56f6590d
--- /dev/null
+++ b/flashring/internal/maths/predictor_test.go
@@ -0,0 +1,483 @@
+package maths
+
+import (
+	"testing"
+	"time"
+)
+
+func TestNewPredictor(t *testing.T) {
+	config := PredictorConfig{
+		ReWriteScoreThreshold: 0.5,
+		Weights: []WeightTuple{
+			{WFreq: 0.1, WLA: 0.2},
+			{WFreq: 0.2, WLA: 0.3},
+		},
+		SampleDuration:    100 * time.Millisecond,
+		MaxMemTableCount:  10,
+		GridSearchEpsilon: 0.001,
+	}
+
+	predictor := NewPredictor(config)
+
+	// Verify predictor initialization
+	if predictor == nil {
+		t.Fatal("NewPredictor returned nil")
+	}
+	if predictor.ReWriteScoreThreshold != 0.5 {
+		t.Errorf("Expected ReWriteScoreThreshold 0.5, got %f", predictor.ReWriteScoreThreshold)
+	}
+	if predictor.MaxMemTableCount != 10 {
+		t.Errorf("Expected MaxMemTableCount 10, got %d", predictor.MaxMemTableCount)
+	}
+
+	// Verify estimator initialization
+	if predictor.Estimator == nil {
+		t.Fatal("Estimator not initialized")
+	}
+	if predictor.Estimator.WFreq != 0.1 {
+		t.Errorf("Expected WFreq 0.1, got %f", predictor.Estimator.WFreq)
+	}
+	if predictor.Estimator.WLA != 0.2 {
+		t.Errorf("Expected WLA 0.2, got %f", predictor.Estimator.WLA)
+	}
+
+	// Verify grid search estimator initialization
+	if predictor.GridSearchEstimator == nil {
+		t.Fatal("GridSearchEstimator not initialized")
+	}
+
+	// Verify channel initialization
+	if predictor.hitRateCh == nil {
+		t.Fatal("hitRateCh not initialized")
+	}
+}
+
+func TestPredictorPredict(t *testing.T) {
+	config := PredictorConfig{
+		ReWriteScoreThreshold: 0.5,
+		Weights: []WeightTuple{
+			{WFreq: 0.1, WLA: 0.2},
+		},
+		SampleDuration:    100 * time.Millisecond,
+		MaxMemTableCount:  10,
+		GridSearchEpsilon: 0.001,
+	}
+
+	predictor := NewPredictor(config)
+
+	tests := []struct {
+		name          string
+		freq          uint64
+		lastAccess    uint64
+		keyMemId      uint32
+		activeMemId   uint32
+		expectRewrite bool
+	}{
+		{
+			name:          "high frequency, recent access, high overwrite risk",
+			freq:          100,
+			lastAccess:    1,
+			keyMemId:      0,
+			activeMemId:   8,
+			expectRewrite: true,
+		},
+		{
+			name:          "low frequency, old access, low overwrite risk",
+			freq:          1,
+			lastAccess:    1000,
+			keyMemId:      5,
+			activeMemId:   6,
+			expectRewrite: false,
+		},
+		{
+			name:          "medium frequency, medium access, medium overwrite risk",
+			freq:          10,
+			lastAccess:    50,
+			keyMemId:      3,
+			activeMemId:   7,
+			expectRewrite: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := predictor.Predict(tt.freq, tt.lastAccess, tt.keyMemId, tt.activeMemId)
+			if result != tt.expectRewrite {
+				score := predictor.Estimator.CalculateRewriteScore(
+					tt.freq, tt.lastAccess, tt.keyMemId, tt.activeMemId, predictor.MaxMemTableCount)
+				t.Errorf("Expected %v, got %v (score: %f, threshold: %f)",
+					tt.expectRewrite, result, score, predictor.ReWriteScoreThreshold)
+			}
+		})
+	}
+}
+
+func TestPredictorObserve(t *testing.T) {
+	config := PredictorConfig{
+		ReWriteScoreThreshold: 0.5,
+		Weights: []WeightTuple{
+			{WFreq: 0.1, WLA: 0.2},
+		},
+		SampleDuration:    10 * time.Millisecond,
+		MaxMemTableCount:  10,
+		GridSearchEpsilon: 0.001,
+	}
+
+	predictor := NewPredictor(config)
+
+	// Test observing hit rates
+	hitRates := []float64{0.8, 0.7, 0.9, 0.6}
+
+	for _, hitRate := range hitRates {
+		predictor.Observe(hitRate)
+	}
+
+	// Give some time for the goroutine to process
+	time.Sleep(50 * time.Millisecond)
+
+	// Channel should not block on additional observations
+	for i := 0; i < 10; i++ {
+		predictor.Observe(0.5)
+	}
+}
+
+func TestEstimatorCalculateRewriteScore(t *testing.T) {
+	estimator := &Estimator{
+		WFreq: 0.1,
+		WLA:   0.2,
+	}
+
+	tests := []struct {
+		name             string
+		freq             uint64
+		lastAccess       uint64
+		keyMemId         uint32
+		activeMemId      uint32
+		maxMemTableCount uint32
+		expectHighScore  bool
+	}{
+		{
+			name:             "high frequency, recent access, high overwrite risk",
+			freq:             100,
+			lastAccess:       1,
+			keyMemId:         0,
+			activeMemId:      9,
+			maxMemTableCount: 10,
+			expectHighScore:  true,
+		},
+		{
+			name:             "low frequency, old access, low overwrite risk",
+			freq:             1,
+			lastAccess:       1000,
+			keyMemId:         5,
+			activeMemId:      6,
+			maxMemTableCount: 10,
+			expectHighScore:  false,
+		},
+		{
+			name:             "zero frequency should give low score",
+			freq:             0,
+			lastAccess:       0,
+			keyMemId:         0,
+			activeMemId:      0,
+			maxMemTableCount: 10,
+			expectHighScore:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			score := estimator.CalculateRewriteScore(
+				tt.freq, tt.lastAccess, tt.keyMemId, tt.activeMemId, tt.maxMemTableCount)
+
+			if tt.expectHighScore && score < 0.1 {
+				t.Errorf("Expected high score, got %f", score)
+			}
+			if !tt.expectHighScore && score > 0.5 {
+				t.Errorf("Expected low score, got %f", score)
+			}
+
+			// Score should always be non-negative
+			if score < 0 {
+				t.Errorf("Score should be non-negative, got %f", score)
+			}
+		})
+	}
+}
+
+func TestEstimatorScoreComponents(t *testing.T) {
+	estimator := &Estimator{
+		WFreq: 0.1,
+		WLA:   0.2,
+	}
+
+	// Test that frequency score increases with frequency
+	score1 := estimator.CalculateRewriteScore(1, 100, 0, 5, 10)
+	score2 := estimator.CalculateRewriteScore(10, 100, 0, 5, 10)
+	score3 := estimator.CalculateRewriteScore(100, 100, 0, 5, 10)
+
+	if !(score1 < score2 && score2 < score3) {
+		t.Errorf("Score should increase with frequency: %f, %f, %f", score1, score2, score3)
+	}
+
+	// Test that last access score decreases with time
+	score1 = estimator.CalculateRewriteScore(10, 1, 0, 5, 10)
+	score2 = estimator.CalculateRewriteScore(10, 10, 0, 5, 10)
+	score3 = estimator.CalculateRewriteScore(10, 100, 0, 5, 10)
+
+	if !(score1 > score2 && score2 > score3) {
+		t.Errorf("Score should decrease with last access time: %f, %f, %f", score1, score2, score3)
+	}
+
+	// Test overwrite risk calculation
+	score1 = estimator.CalculateRewriteScore(10, 10, 0, 1, 10) // low risk
+	score2 = estimator.CalculateRewriteScore(10, 10, 0, 5, 10) // medium risk
+	score3 = estimator.CalculateRewriteScore(10, 10, 0, 9, 10) // high risk
+
+	if !(score1 < score2 && score2 < score3) {
+		t.Errorf("Score should increase with overwrite risk: %f, %f, %f", score1, score2, score3)
+	}
+}
+
+func TestGridSearchEstimator(t *testing.T) {
+	initialTuples := []WeightTuple{
+		{WFreq: 0.1, WLA: 0.1},
+		{WFreq: 0.2, WLA: 0.2},
+		{WFreq: 0.3, WLA: 0.3},
+	}
+
+	estimator := &Estimator{WFreq: 0.1, WLA: 0.1}
+	gridSearch := NewGridSearchEstimator(
+		50*time.Millisecond,
+		initialTuples,
+		estimator,
+		0.001,
+	)
+
+	// Test initialization
+	if len(gridSearch.Tuples) != 3 {
+		t.Errorf("Expected 3 tuples, got %d", len(gridSearch.Tuples))
+	}
+	if gridSearch.CurrIndex != 0 {
+		t.Errorf("Expected CurrIndex 0, got %d", gridSearch.CurrIndex)
+	}
+
+	// Test recording hit rates
+	hitRates := []float64{0.8, 0.7, 0.9}
+	for i, hitRate := range hitRates {
+		gridSearch.RecordHitRate(hitRate)
+		if i < len(hitRates)-1 {
+			time.Sleep(60 * time.Millisecond) // Wait for duration to pass
+		}
+	}
+
+	// Verify stats are recorded
+	for _, tuple := range initialTuples {
+		if stat, ok := gridSearch.TupleStats[tuple]; ok && stat.Trials > 0 {
+			if stat.HitRate < 0 || stat.HitRate > 1 {
+				t.Errorf("Invalid hit rate %f for tuple %+v", stat.HitRate, tuple)
+			}
+		}
+	}
+}
+
+func TestGridSearchBestTuple(t *testing.T) {
+	initialTuples := []WeightTuple{
+		{WFreq: 0.1, WLA: 0.1},
+		{WFreq: 0.2, WLA: 0.2},
+		{WFreq: 0.3, WLA: 0.3},
+	}
+
+	estimator := &Estimator{WFreq: 0.1, WLA: 0.1}
+	gridSearch := NewGridSearchEstimator(
+		10*time.Millisecond,
+		initialTuples,
+		estimator,
+		0.001,
+	)
+
+	// Manually add stats
+	gridSearch.TupleStats[initialTuples[0]] = &Stats{HitRate: 0.7, Trials: 5}
+	gridSearch.TupleStats[initialTuples[1]] = &Stats{HitRate: 0.9, Trials: 5}
+	gridSearch.TupleStats[initialTuples[2]] = &Stats{HitRate: 0.6, Trials: 5}
+
+	best := gridSearch.BestTuple()
+	expected := initialTuples[1] // Should be the one with 0.9 hit rate
+
+	if best.WFreq != expected.WFreq || best.WLA != expected.WLA {
+		t.Errorf("Expected best tuple %+v, got %+v", expected, best)
+	}
+}
+
+func TestGridSearchRefinement(t *testing.T) {
+	initialTuples := []WeightTuple{
+		{WFreq: 0.2, WLA: 0.2},
+	}
+
+	estimator := &Estimator{WFreq: 0.2, WLA: 0.2}
+	gridSearch := NewGridSearchEstimator(
+		10*time.Millisecond,
+		initialTuples,
+		estimator,
+		0.01, // Larger epsilon
+	)
+
+	// Test grid refinement with delta larger than epsilon
+	base := WeightTuple{WFreq: 0.2, WLA: 0.2}
+	_, ok := gridSearch.GenerateRefinedGrid(base, 1, 0.1)
+
+	// The function returns false when it encounters the center point (i=0, j=0)
+	// where both differences are 0 (which is < epsilon), so it will return false
+	// This is actually the expected behavior - it means the grid is too fine
+	if ok {
+		t.Error("Grid refinement should return false due to center point having zero difference")
+	}
+
+	// Test with a different approach - use larger delta relative to epsilon
+	gridSearch2 := NewGridSearchEstimator(
+		10*time.Millisecond,
+		initialTuples,
+		estimator,
+		0.001, // Smaller epsilon
+	)
+
+	// Test with delta much larger than epsilon and non-zero base that avoids zero differences
+	base2 := WeightTuple{WFreq: 0.5, WLA: 0.5}
+	_, ok2 := gridSearch2.GenerateRefinedGrid(base2, 2, 0.1)
+
+	// This should also return false due to the center point issue
+	if ok2 {
+		t.Error("Grid refinement should return false due to center point check")
+	}
+
+	// The function logic checks if differences are small at any point during iteration
+	// and returns false when it finds the center point where difference is 0
+	// This seems to be the intended behavior to detect when refinement should stop
+}
+
+func TestGridSearchConvergence(t *testing.T) {
+	initialTuples := []WeightTuple{
+		{WFreq: 0.1, WLA: 0.1},
+	}
+
+	estimator := &Estimator{WFreq: 0.1, WLA: 0.1}
+	gridSearch := NewGridSearchEstimator(
+		1*time.Millisecond,
+		initialTuples,
+		estimator,
+		0.1, // Large epsilon for quick convergence
+	)
+
+	// Test convergence with very small delta
+	base := WeightTuple{WFreq: 0.1, WLA: 0.1}
+	_, ok := gridSearch.GenerateRefinedGrid(base, 1, 0.01) // Small delta
+
+	if ok {
+		t.Error("Grid refinement should fail when delta is smaller than epsilon")
+	}
+}
+
+func BenchmarkEstimatorCalculateRewriteScore(b *testing.B) {
+	estimator := &Estimator{
+		WFreq: 0.1,
+		WLA:   0.2,
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		estimator.CalculateRewriteScore(
+			uint64(i%100+1),  // freq
+			uint64(i%1000+1), // lastAccess
+			uint32(i%10),     // keyMemId
+			uint32((i+5)%10), // activeMemId
+			10,               // maxMemTableCount
+		)
+	}
+}
+
+func BenchmarkPredictorPredict(b *testing.B) {
+	config := PredictorConfig{
+		ReWriteScoreThreshold: 0.5,
+		Weights: []WeightTuple{
+			{WFreq: 0.1, WLA: 0.2},
+		},
+		SampleDuration:    100 * time.Millisecond,
+		MaxMemTableCount:  10,
+		GridSearchEpsilon: 0.001,
+	}
+
+	predictor := NewPredictor(config)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		predictor.Predict(
+			uint64(i%100+1),  // freq
+			uint64(i%1000+1), // lastAccess
+			uint32(i%10),     // keyMemId
+			uint32((i+5)%10), // activeMemId
+		)
+	}
+}
+
+// Integration test that simulates a realistic cache scenario
+func TestPredictorIntegration(t *testing.T) {
+	config := PredictorConfig{
+		ReWriteScoreThreshold: 0.3,
+		Weights: []WeightTuple{
+			{WFreq: 0.1, WLA: 0.1},
+			{WFreq: 0.2, WLA: 0.2},
+			{WFreq: 0.3, WLA: 0.3},
+		},
+		SampleDuration:    20 * time.Millisecond,
+		MaxMemTableCount:  8,
+		GridSearchEpsilon: 0.01,
+	}
+
+	predictor := NewPredictor(config)
+
+	// Simulate cache operations
+	type cacheOp struct {
+		freq        uint64
+		lastAccess  uint64
+		keyMemId    uint32
+		activeMemId uint32
+	}
+
+	operations := []cacheOp{
+		{freq: 100, lastAccess: 1, keyMemId: 0, activeMemId: 7},  // Should rewrite
+		{freq: 1, lastAccess: 1000, keyMemId: 6, activeMemId: 7}, // Should not rewrite
+		{freq: 50, lastAccess: 10, keyMemId: 2, activeMemId: 6},  // Maybe rewrite
+		{freq: 200, lastAccess: 5, keyMemId: 1, activeMemId: 7},  // Should rewrite
+	}
+
+	rewriteCount := 0
+	for i, op := range operations {
+		shouldRewrite := predictor.Predict(op.freq, op.lastAccess, op.keyMemId, op.activeMemId)
+		if shouldRewrite {
+			rewriteCount++
+		}
+
+		// Simulate hit rate feedback
+		var hitRate float64
+		if shouldRewrite {
+			hitRate = 0.8 + 0.1*float64(i%3) // Simulated good hit rate for rewrites
+		} else {
+			hitRate = 0.6 + 0.1*float64(i%2) // Simulated moderate hit rate for no rewrites
+		}
+
+		predictor.Observe(hitRate)
+
+		// Small delay to allow processing
+		time.Sleep(5 * time.Millisecond)
+	}
+
+	// Should have made some rewrite decisions
+	if rewriteCount == 0 {
+		t.Error("Expected at least some rewrite decisions")
+	}
+	if rewriteCount == len(operations) {
+		t.Error("Should not rewrite everything")
+	}
+
+	t.Logf("Made %d rewrites out of %d operations", rewriteCount, len(operations))
+}
diff --git a/flashring/internal/memtables/manager.go b/flashring/internal/memtables/manager.go
new file mode 100644
index 00000000..a86fb108
--- /dev/null
+++ b/flashring/internal/memtables/manager.go
@@ -0,0 +1,119 @@
+package memtables
+
+import (
+	"github.com/Meesho/BharatMLStack/flashring/internal/allocators"
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/rs/zerolog/log"
+)
+
+type MemtableManager struct {
+	file     *fs.WrapAppendFile
+	Capacity int32
+
+	memtable1      *Memtable
+	memtable2      *Memtable
+	activeMemtable *Memtable
+	nextFileOffset int64
+	nextId         uint32
+	semaphore      chan int
+	stats          Stats
+}
+
+type Stats struct {
+	Flushes int64
+}
+
+func NewMemtableManager(file *fs.WrapAppendFile, capacity int32) (*MemtableManager, error) {
+	allocatorConfig := allocators.SlabAlignedPageAllocatorConfig{
+		SizeClasses: []allocators.SizeClass{
+			{Size: int(capacity), MinCount: 2},
+		},
+	}
+	allocator, err := allocators.NewSlabAlignedPageAllocator(allocatorConfig)
+	if err != nil {
+		return nil, err
+	}
+	page1 := allocator.Get(int(capacity))
+	page2 := allocator.Get(int(capacity))
+	memtable1, err := NewMemtable(MemtableConfig{
+		capacity: int(capacity),
+		id:       0,
+		page:     page1,
+		file:     file,
+	})
+	if err != nil {
+		return nil, err
+	}
+	memtable2, err := NewMemtable(MemtableConfig{
+		capacity: int(capacity),
+		id:       1,
+		page:     page2,
+		file:     file,
+	})
+	if err != nil {
+		return nil, err
+	}
+	memtableManager := &MemtableManager{
+		file:           file,
+		Capacity:       capacity,
+		memtable1:      memtable1,
+		memtable2:      memtable2,
+		activeMemtable: memtable1,
+		nextFileOffset: 2 * int64(capacity),
+		nextId:         2,
+		semaphore:      make(chan int, 1),
+		stats:          Stats{},
+	}
+	return memtableManager, nil
+}
+
+func (mm *MemtableManager) GetMemtable() (*Memtable, uint32, uint64) {
+	return mm.activeMemtable, mm.activeMemtable.Id, uint64(mm.activeMemtable.Id) * uint64(mm.Capacity)
+}
+
+func (mm *MemtableManager) GetMemtableById(id uint32) *Memtable {
+	if mm.memtable1.Id == id {
+		return mm.memtable1
+	}
+	if mm.memtable2.Id == id {
+		return mm.memtable2
+	}
+	return nil
+}
+
+func (mm *MemtableManager) flushConsumer(memtable *Memtable) {
+	n, fileOffset, err := memtable.Flush()
+	if n != int(mm.Capacity) {
+		log.Error().Msgf("Flush size mismatch: memId:%d fileOffset:%d nextFileOffset:%d n:%d err:%v", memtable.Id, fileOffset, mm.nextFileOffset, n, err)
+	}
+	if err != nil {
+		log.Error().Msgf("Failed to flush memtable: memId:%d fileOffset:%d nextFileOffset:%d n:%d err:%v", memtable.Id, fileOffset, mm.nextFileOffset, n, err)
+	}
+	memtable.Id = mm.nextId
+	mm.nextId++
+	mm.nextFileOffset += int64(n)
+	mm.stats.Flushes++
+}
+func (mm *MemtableManager) Flush() error {
+
+	memtableToFlush := mm.activeMemtable
+	mm.semaphore <- 1
+
+	// Swap to the other memtable
+	if mm.activeMemtable == mm.memtable1 {
+		mm.activeMemtable = mm.memtable2
+	} else {
+		mm.activeMemtable = mm.memtable1
+	}
+	go func() {
+		defer func() {
+			<-mm.semaphore
+			if r := recover(); r != nil {
+				log.Error().Msgf("Recovered from panic in goroutine: %v", r)
+			}
+		}()
+		mm.flushConsumer(memtableToFlush)
+	}()
+
+	return nil
+}
diff --git a/flashring/internal/memtables/manager_bench_test.go b/flashring/internal/memtables/manager_bench_test.go
new file mode 100644
index 00000000..28738185
--- /dev/null
+++ b/flashring/internal/memtables/manager_bench_test.go
@@ -0,0 +1,55 @@
+package memtables
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+)
+
+// Helper function to create a test file for benchmarks
+func createManagerBenchmarkFile(b *testing.B) *fs.WrapAppendFile {
+	filename := fmt.Sprintf("/media/a0d00kc/freedom/tmp/bench_memtable_%d.dat", time.Now().UnixNano())
+
+	config := fs.FileConfig{
+		Filename:          filename,
+		MaxFileSize:       20 * 1024 * 1024 * 1024, // 20GB for benchmarks
+		FilePunchHoleSize: 1024 * 1024 * 1024,      // 1GB
+		BlockSize:         fs.BLOCK_SIZE,
+	}
+
+	file, err := fs.NewWrapAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create benchmark file: %v", err)
+	}
+	return file
+}
+
+func Benchmark_Puts(b *testing.B) {
+	file := createManagerBenchmarkFile(b)
+
+	manager, err := NewMemtableManager(file, 1024*1024*1024)
+	if err != nil {
+		b.Fatalf("Failed to create memtable manager: %v", err)
+	}
+
+	buf16k := make([]byte, 16*1024)
+	for j := range buf16k {
+		buf16k[j] = byte(j % 256)
+	}
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		memtable, _, _ := manager.GetMemtable()
+		_, _, readyForFlush := memtable.Put(buf16k)
+		if readyForFlush {
+			manager.Flush()
+		}
+	}
+
+	b.ReportMetric(float64(manager.stats.Flushes), "flushes")
+	b.ReportMetric(float64(b.N*16*1024)/1024/1024, "MB/s")
+	b.ReportAllocs()
+
+}
diff --git a/flashring/internal/memtables/manager_test.go b/flashring/internal/memtables/manager_test.go
new file mode 100644
index 00000000..3772f0c5
--- /dev/null
+++ b/flashring/internal/memtables/manager_test.go
@@ -0,0 +1,375 @@
+package memtables
+
+import (
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+)
+
+// Helper function to create a mock file for testing
+func createTestFileForManager(t *testing.T) *fs.WrapAppendFile {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_memtable_manager.dat")
+
+	config := fs.FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024, // 1MB
+		FilePunchHoleSize: 64 * 1024,   // 64KB
+		BlockSize:         fs.BLOCK_SIZE,
+	}
+
+	file, err := fs.NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create test file: %v", err)
+	}
+	return file
+}
+
+func TestNewMemtableManager_Success(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2) // 8192 bytes
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Verify initial state
+	if manager.file != file {
+		t.Errorf("Expected file to be set correctly")
+	}
+	if manager.Capacity != capacity {
+		t.Errorf("Expected capacity %d, got %d", capacity, manager.Capacity)
+	}
+	if manager.memtable1 == nil {
+		t.Errorf("Expected memtable1 to be initialized")
+	}
+	if manager.memtable2 == nil {
+		t.Errorf("Expected memtable2 to be initialized")
+	}
+	if manager.activeMemtable != manager.memtable1 {
+		t.Errorf("Expected activeMemtable to be memtable1 initially")
+	}
+	if manager.nextFileOffset != 2*int64(capacity) {
+		t.Errorf("Expected nextFileOffset to be %d, got %d", 2*int64(capacity), manager.nextFileOffset)
+	}
+	if manager.nextId != 2 {
+		t.Errorf("Expected nextId to be 2, got %d", manager.nextId)
+	}
+	if cap(manager.semaphore) != 1 {
+		t.Errorf("Expected semaphore capacity to be 1, got %d", cap(manager.semaphore))
+	}
+
+	// Verify memtable initial IDs
+	if manager.memtable1.Id != 0 {
+		t.Errorf("Expected memtable1 ID to be 0, got %d", manager.memtable1.Id)
+	}
+	if manager.memtable2.Id != 1 {
+		t.Errorf("Expected memtable2 ID to be 1, got %d", manager.memtable2.Id)
+	}
+}
+
+func TestNewMemtableManager_InvalidCapacity(t *testing.T) {
+	// Test with capacity not aligned to block size
+	capacity := int32(fs.BLOCK_SIZE + 1) // Should fail alignment check
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	_, err := NewMemtableManager(file, capacity)
+	if err == nil {
+		t.Errorf("Expected NewMemtableManager to fail with invalid capacity")
+	}
+}
+
+func TestNewMemtableManager_NilFile(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+
+	_, err := NewMemtableManager(nil, capacity)
+	if err == nil {
+		t.Errorf("Expected NewMemtableManager to fail with nil file")
+	}
+}
+
+func TestMemtableManager_GetMemtable(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	memtable, id, offset := manager.GetMemtable()
+
+	// Initially should return memtable1
+	if memtable != manager.memtable1 {
+		t.Errorf("Expected to get memtable1")
+	}
+	if id != 0 {
+		t.Errorf("Expected ID 0, got %d", id)
+	}
+	expectedOffset := uint64(0) * uint64(capacity)
+	if offset != expectedOffset {
+		t.Errorf("Expected offset %d, got %d", expectedOffset, offset)
+	}
+}
+
+func TestMemtableManager_GetMemtableById(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Test getting memtable1 by ID
+	memtable := manager.GetMemtableById(0)
+	if memtable != manager.memtable1 {
+		t.Errorf("Expected to get memtable1 for ID 0")
+	}
+
+	// Test getting memtable2 by ID
+	memtable = manager.GetMemtableById(1)
+	if memtable != manager.memtable2 {
+		t.Errorf("Expected to get memtable2 for ID 1")
+	}
+
+	// Test getting non-existent memtable
+	memtable = manager.GetMemtableById(999)
+	if memtable != nil {
+		t.Errorf("Expected nil for non-existent ID, got %v", memtable)
+	}
+}
+
+func TestMemtableManager_Flush(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Verify initial state
+	originalActive := manager.activeMemtable
+	originalNextId := manager.nextId
+
+	// Perform flush
+	err = manager.Flush()
+	if err != nil {
+		t.Fatalf("Flush failed: %v", err)
+	}
+
+	// Verify active memtable swapped
+	if manager.activeMemtable == originalActive {
+		t.Errorf("Expected active memtable to be swapped")
+	}
+
+	// Active should now be the other memtable
+	if originalActive == manager.memtable1 {
+		if manager.activeMemtable != manager.memtable2 {
+			t.Errorf("Expected active memtable to be memtable2")
+		}
+	} else {
+		if manager.activeMemtable != manager.memtable1 {
+			t.Errorf("Expected active memtable to be memtable1")
+		}
+	}
+
+	// Give time for background goroutine to complete
+	time.Sleep(100 * time.Millisecond)
+
+	// Verify nextId was incremented (this happens in background)
+	if manager.nextId <= originalNextId {
+		t.Errorf("Expected nextId to be incremented, got %d, expected > %d", manager.nextId, originalNextId)
+	}
+}
+
+func TestMemtableManager_FlushSwapsBetweenMemtables(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Initially active is memtable1
+	if manager.activeMemtable != manager.memtable1 {
+		t.Fatalf("Expected initial active to be memtable1")
+	}
+
+	// First flush - should swap to memtable2
+	err = manager.Flush()
+	if err != nil {
+		t.Fatalf("First flush failed: %v", err)
+	}
+	if manager.activeMemtable != manager.memtable2 {
+		t.Errorf("Expected active to be memtable2 after first flush")
+	}
+
+	// Second flush - should swap back to memtable1
+	err = manager.Flush()
+	if err != nil {
+		t.Fatalf("Second flush failed: %v", err)
+	}
+	if manager.activeMemtable != manager.memtable1 {
+		t.Errorf("Expected active to be memtable1 after second flush")
+	}
+}
+
+func TestMemtableManager_FlushConcurrency(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	const numConcurrentFlushes = 10
+	var wg sync.WaitGroup
+	errors := make(chan error, numConcurrentFlushes)
+
+	// Launch multiple concurrent flushes
+	for i := 0; i < numConcurrentFlushes; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			if err := manager.Flush(); err != nil {
+				errors <- err
+			}
+		}()
+	}
+
+	wg.Wait()
+	close(errors)
+
+	// Check for errors
+	for err := range errors {
+		t.Errorf("Concurrent flush failed: %v", err)
+	}
+
+	// Give time for all background operations to complete
+	time.Sleep(200 * time.Millisecond)
+
+	// Verify manager is still in a valid state
+	memtable, id, offset := manager.GetMemtable()
+	if memtable == nil {
+		t.Errorf("Active memtable should not be nil")
+	}
+	if id != memtable.Id {
+		t.Errorf("Returned ID %d should match memtable ID %d", id, memtable.Id)
+	}
+	expectedOffset := uint64(memtable.Id) * uint64(capacity)
+	if offset != expectedOffset {
+		t.Errorf("Expected offset %d, got %d", expectedOffset, offset)
+	}
+}
+
+func TestMemtableManager_GetMemtableAfterFlush(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Get initial memtable
+	initialMemtable, initialId, _ := manager.GetMemtable()
+
+	// Perform flush
+	err = manager.Flush()
+	if err != nil {
+		t.Fatalf("Flush failed: %v", err)
+	}
+
+	// Get memtable after flush
+	newMemtable, newId, newOffset := manager.GetMemtable()
+
+	// Should be different memtable
+	if newMemtable == initialMemtable {
+		t.Errorf("Expected different memtable after flush")
+	}
+	if newId == initialId {
+		t.Errorf("Expected different ID after flush")
+	}
+
+	// Offset calculation should be correct
+	expectedOffset := uint64(newId) * uint64(capacity)
+	if newOffset != expectedOffset {
+		t.Errorf("Expected offset %d, got %d", expectedOffset, newOffset)
+	}
+}
+
+func TestMemtableManager_Integration(t *testing.T) {
+	capacity := int32(fs.BLOCK_SIZE * 2)
+	file := createTestFileForManager(t)
+	defer file.Close()
+
+	manager, err := NewMemtableManager(file, capacity)
+	if err != nil {
+		t.Fatalf("NewMemtableManager failed: %v", err)
+	}
+
+	// Test complete workflow: get memtable, put data, flush, repeat
+	testData := []byte("Hello, MemtableManager!")
+
+	// Get initial memtable and put some data
+	memtable, id, _ := manager.GetMemtable()
+	offset, length, readyForFlush := memtable.Put(testData)
+	if readyForFlush {
+		t.Errorf("Memtable should not be ready for flush after small put")
+	}
+
+	// Verify data can be retrieved
+	data, err := memtable.Get(offset, length)
+	if err != nil {
+		t.Fatalf("Failed to get data: %v", err)
+	}
+	if string(data) != string(testData) {
+		t.Errorf("Expected %s, got %s", testData, data)
+	}
+
+	// Verify GetMemtableById works
+	retrievedMemtable := manager.GetMemtableById(id)
+	if retrievedMemtable != memtable {
+		t.Errorf("GetMemtableById should return the same memtable")
+	}
+
+	// Perform flush and verify state changes
+	err = manager.Flush()
+	if err != nil {
+		t.Fatalf("Flush failed: %v", err)
+	}
+
+	// Get new active memtable
+	newMemtable, newId, _ := manager.GetMemtable()
+	if newMemtable == memtable {
+		t.Errorf("Active memtable should have changed after flush")
+	}
+	if newId == id {
+		t.Errorf("Active memtable ID should have changed after flush")
+	}
+
+	// Old memtable should still be retrievable by its original ID
+	oldMemtable := manager.GetMemtableById(id)
+	if oldMemtable != memtable {
+		t.Errorf("Should still be able to retrieve old memtable by ID")
+	}
+
+	// Give background flush time to complete
+	time.Sleep(100 * time.Millisecond)
+}
diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go
new file mode 100644
index 00000000..bc92f0ff
--- /dev/null
+++ b/flashring/internal/memtables/memtable.go
@@ -0,0 +1,115 @@
+package memtables
+
+import (
+	"errors"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	"github.com/rs/zerolog/log"
+)
+
+var (
+	ErrCapacityNotAligned         = errors.New("capacity must be aligned to block size")
+	ErrPageNotProvided            = errors.New("page must be provided")
+	ErrFileNotProvided            = errors.New("file must be provided")
+	ErrPageBufferCapacityMismatch = errors.New("page buffer must be provided and must be of size capacity")
+	ErrOffsetOutOfBounds          = errors.New("offset out of bounds")
+	ErrMemtableNotReadyForFlush   = errors.New("memtable not ready for flush")
+)
+
+type Memtable struct {
+	Id            uint32
+	capacity      int
+	currentOffset int
+	file          *fs.WrapAppendFile
+	page          *fs.AlignedPage
+	readyForFlush bool
+	next          *Memtable
+	prev          *Memtable
+}
+
+type MemtableConfig struct {
+	capacity int
+	id       uint32
+	page     *fs.AlignedPage
+	file     *fs.WrapAppendFile
+}
+
+func NewMemtable(config MemtableConfig) (*Memtable, error) {
+	if config.capacity%fs.BLOCK_SIZE != 0 {
+		return nil, ErrCapacityNotAligned
+	}
+	if config.page == nil {
+		return nil, ErrPageNotProvided
+	}
+	if config.file == nil {
+		return nil, ErrFileNotProvided
+	}
+	if config.page.Buf == nil || len(config.page.Buf) != config.capacity {
+		return nil, ErrPageBufferCapacityMismatch
+	}
+	return &Memtable{
+		Id:            config.id,
+		capacity:      config.capacity,
+		currentOffset: 0,
+		file:          config.file,
+		page:          config.page,
+		readyForFlush: false,
+	}, nil
+}
+
+func (m *Memtable) Get(offset int, length uint16) ([]byte, error) {
+	if offset+int(length) > m.capacity {
+		return nil, ErrOffsetOutOfBounds
+	}
+	return m.page.Buf[offset : offset+int(length)], nil
+}
+
+func (m *Memtable) Put(buf []byte) (offset int, length uint16, readyForFlush bool) {
+	offset = m.currentOffset
+	if offset+len(buf) > m.capacity {
+		m.readyForFlush = true
+		return -1, 0, true
+	}
+	copy(m.page.Buf[offset:], buf)
+	m.currentOffset += len(buf)
+	return offset, uint16(len(buf)), false
+}
+
+// Efforts to make zero copy
+func (m *Memtable) GetBufForAppend(size uint16) (bbuf []byte, offset int, length uint16, readyForFlush bool) {
+	offset = m.currentOffset
+	if offset+int(size) > m.capacity {
+		m.readyForFlush = true
+		return nil, -1, 0, true
+	}
+	bbuf = m.page.Buf[offset : offset+int(size)]
+	m.currentOffset += int(size)
+	return bbuf, offset, size, false
+}
+
+func (m *Memtable) GetBufForRead(offset int, length uint16) (bbuf []byte, exists bool) {
+	if offset+int(length) > m.capacity {
+		return nil, false
+	}
+	return m.page.Buf[offset : offset+int(length)], true
+}
+
+func (m *Memtable) Flush() (n int, fileOffset int64, err error) {
+	if !m.readyForFlush {
+		return 0, 0, ErrMemtableNotReadyForFlush
+	}
+	fileOffset, err = m.file.Pwrite(m.page.Buf)
+	if err != nil {
+		return 0, 0, err
+	} else {
+		log.Debug().Msgf("Flushed memtable %d to file %d", m.Id, fileOffset)
+	}
+	m.currentOffset = 0
+	m.readyForFlush = false
+	return len(m.page.Buf), fileOffset, nil
+}
+
+func (m *Memtable) Discard() {
+	m.file = nil
+	m.page = nil
+}
diff --git a/flashring/internal/memtables/memtable_bench_test.go b/flashring/internal/memtables/memtable_bench_test.go
new file mode 100644
index 00000000..40175e62
--- /dev/null
+++ b/flashring/internal/memtables/memtable_bench_test.go
@@ -0,0 +1,580 @@
+// Benchmark tests for Memtable operations optimized for single-threaded performance
+// Uses 50GB max file size and 1GB memtable page size as specified
+package memtables
+
+import (
+	"crypto/rand"
+	"fmt"
+	"path/filepath"
+	"testing"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+)
+
+const (
+	// Configuration for single-threaded benchmarks
+	BENCH_MAX_FILE_SIZE   = 50 * 1024 * 1024 * 1024 // 50GB max file size
+	BENCH_PAGE_SIZE       = 1024 * 1024 * 1024      // 1GB memtable page size
+	BENCH_PUNCH_HOLE_SIZE = 64 * 1024 * 1024        // 64MB punch hole size
+
+	// Data sizes for single-threaded performance testing
+	SMALL_DATA_SIZE      = 256         // 256 bytes - typical small record
+	MEDIUM_DATA_SIZE     = 4096        // 4KB - typical medium record
+	LARGE_DATA_SIZE      = 64 * 1024   // 64KB - large record
+	VERY_LARGE_DATA_SIZE = 1024 * 1024 // 1MB - very large record
+)
+
+// Helper function to create benchmark file
+func createBenchmarkFile(b *testing.B) *fs.WrapAppendFile {
+	filename := filepath.Join("/media/a0d00kc/freedom/tmp/bench_memtable.dat")
+
+	config := fs.FileConfig{
+		Filename:          filename,
+		MaxFileSize:       BENCH_MAX_FILE_SIZE,
+		FilePunchHoleSize: BENCH_PUNCH_HOLE_SIZE,
+		BlockSize:         fs.BLOCK_SIZE,
+	}
+
+	file, err := fs.NewWrapAppendFile(config)
+	if err != nil {
+		b.Fatalf("Failed to create benchmark file: %v", err)
+	}
+	return file
+}
+
+// Helper function to create benchmark page
+func createBenchmarkPage() *fs.AlignedPage {
+	return fs.NewAlignedPage(BENCH_PAGE_SIZE)
+}
+
+// Helper function to create benchmark memtable
+func createBenchmarkMemtable(b *testing.B) (*Memtable, *fs.WrapAppendFile, *fs.AlignedPage) {
+	file := createBenchmarkFile(b)
+	page := createBenchmarkPage()
+
+	config := MemtableConfig{
+		capacity: BENCH_PAGE_SIZE,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		cleanup(file, page)
+		b.Fatalf("Failed to create benchmark memtable: %v", err)
+	}
+
+	return memtable, file, page
+}
+
+// Helper function to generate random data
+func generateRandomData(size int) []byte {
+	data := make([]byte, size)
+	rand.Read(data)
+	return data
+}
+
+// Benchmark Put operations with different data sizes
+func BenchmarkMemtable_Put_Small(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(SMALL_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(SMALL_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			// Reset memtable for continued benchmarking
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		_, _, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			// Don't count flush operations in this benchmark
+			b.StopTimer()
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+			b.StartTimer()
+		}
+	}
+}
+
+func BenchmarkMemtable_Put_Medium(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(MEDIUM_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(MEDIUM_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		_, _, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			b.StopTimer()
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+			b.StartTimer()
+		}
+	}
+}
+
+func BenchmarkMemtable_Put_Large(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(LARGE_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(LARGE_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		_, _, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			b.StopTimer()
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+			b.StartTimer()
+		}
+	}
+}
+
+func BenchmarkMemtable_Put_VeryLarge(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(VERY_LARGE_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(VERY_LARGE_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		_, _, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			b.StopTimer()
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+			b.StartTimer()
+		}
+	}
+}
+
+// Benchmark Get operations
+func BenchmarkMemtable_Get_Small(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Pre-populate memtable with data
+	data := generateRandomData(SMALL_DATA_SIZE)
+	numEntries := BENCH_PAGE_SIZE / SMALL_DATA_SIZE / 2 // Fill half the memtable
+
+	offsets := make([]int, numEntries)
+	lengths := make([]uint16, numEntries)
+
+	for i := 0; i < numEntries; i++ {
+		offset, length, _ := memtable.Put(data)
+		offsets[i] = offset
+		lengths[i] = length
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(SMALL_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		idx := i % numEntries
+		_, err := memtable.Get(offsets[idx], lengths[idx])
+		if err != nil {
+			b.Fatalf("Get failed: %v", err)
+		}
+	}
+}
+
+func BenchmarkMemtable_Get_Medium(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(MEDIUM_DATA_SIZE)
+	numEntries := BENCH_PAGE_SIZE / MEDIUM_DATA_SIZE / 2
+
+	offsets := make([]int, numEntries)
+	lengths := make([]uint16, numEntries)
+
+	for i := 0; i < numEntries; i++ {
+		offset, length, _ := memtable.Put(data)
+		offsets[i] = offset
+		lengths[i] = length
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(MEDIUM_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		idx := i % numEntries
+		_, err := memtable.Get(offsets[idx], lengths[idx])
+		if err != nil {
+			b.Fatalf("Get failed: %v", err)
+		}
+	}
+}
+
+func BenchmarkMemtable_Get_Large(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(LARGE_DATA_SIZE)
+	numEntries := BENCH_PAGE_SIZE / LARGE_DATA_SIZE / 2
+
+	offsets := make([]int, numEntries)
+	lengths := make([]uint16, numEntries)
+
+	for i := 0; i < numEntries; i++ {
+		offset, length, _ := memtable.Put(data)
+		offsets[i] = offset
+		lengths[i] = length
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(LARGE_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		idx := i % numEntries
+		_, err := memtable.Get(offsets[idx], lengths[idx])
+		if err != nil {
+			b.Fatalf("Get failed: %v", err)
+		}
+	}
+}
+
+// Benchmark Flush operations
+func BenchmarkMemtable_Flush(b *testing.B) {
+	file := createBenchmarkFile(b)
+	defer cleanup(file, nil)
+
+	// Create fresh memtable for each iteration
+	page := createBenchmarkPage()
+	config := MemtableConfig{
+		capacity: BENCH_PAGE_SIZE,
+		id:       uint32(0),
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		b.Fatalf("Failed to create memtable: %v", err)
+	}
+
+	// Fill memtable to near capacity then trigger flush with overflow
+	fillData := generateRandomData(BENCH_PAGE_SIZE - 1000)
+	memtable.Put(fillData)
+
+	// Now add data that will exceed capacity to trigger flush
+	overflowData := generateRandomData(2000) // This will exceed capacity
+	_, _, readyForFlush := memtable.Put(overflowData)
+	if !readyForFlush {
+		b.Fatalf("Failed to trigger flush - memtable should be ready for flush")
+	}
+	b.ReportAllocs()
+	b.SetBytes(BENCH_PAGE_SIZE)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		_, _, err = memtable.Flush()
+		if err != nil {
+			b.Fatalf("Flush failed: %v", err)
+		}
+		// Force re-flush same data in each iteration
+		memtable.readyForFlush = true
+	}
+	fs.Unmap(page)
+}
+
+// Benchmark mixed operations (realistic usage pattern)
+func BenchmarkMemtable_MixedOperations(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Pre-populate with some data
+	initialData := generateRandomData(MEDIUM_DATA_SIZE)
+	numInitial := 1000
+	offsets := make([]int, numInitial)
+	lengths := make([]uint16, numInitial)
+
+	for i := 0; i < numInitial; i++ {
+		offset, length, readyForFlush := memtable.Put(initialData)
+		if readyForFlush {
+			break
+		}
+		offsets[i] = offset
+		lengths[i] = length
+	}
+
+	putData := generateRandomData(SMALL_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		// Mix of operations: 70% gets, 30% puts
+		if i%10 < 7 {
+			// Get operation
+			idx := i % len(offsets)
+			if idx < len(offsets) && lengths[idx] > 0 {
+				_, err := memtable.Get(offsets[idx], lengths[idx])
+				if err != nil && err != ErrOffsetOutOfBounds {
+					b.Fatalf("Get failed: %v", err)
+				}
+			}
+		} else {
+			// Put operation
+			if memtable.readyForFlush {
+				// Reset for continued benchmarking
+				memtable.currentOffset = 0
+				memtable.readyForFlush = false
+			}
+			memtable.Put(putData)
+		}
+	}
+}
+
+// Benchmark sequential writes to measure throughput
+func BenchmarkMemtable_SequentialWrites(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	data := generateRandomData(MEDIUM_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(MEDIUM_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		_, _, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			b.StopTimer()
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+			b.StartTimer()
+		}
+	}
+}
+
+// Benchmark random access patterns
+func BenchmarkMemtable_RandomAccess(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Pre-populate memtable
+	data := generateRandomData(SMALL_DATA_SIZE)
+	numEntries := BENCH_PAGE_SIZE / SMALL_DATA_SIZE / 4 // Fill quarter of memtable
+
+	offsets := make([]int, numEntries)
+	lengths := make([]uint16, numEntries)
+
+	for i := 0; i < numEntries; i++ {
+		offset, length, _ := memtable.Put(data)
+		offsets[i] = offset
+		lengths[i] = length
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		// Random access pattern
+		idx := (i * 7919) % numEntries // Use prime number for better distribution
+		_, err := memtable.Get(offsets[idx], lengths[idx])
+		if err != nil {
+			b.Fatalf("Get failed: %v", err)
+		}
+	}
+}
+
+// Benchmark memory copying efficiency
+func BenchmarkMemtable_MemoryCopy(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Test different copy sizes
+	sizes := []int{64, 256, 1024, 4096, 16384, 65536}
+
+	for _, size := range sizes {
+		b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
+			data := generateRandomData(size)
+
+			b.ResetTimer()
+			b.ReportAllocs()
+			b.SetBytes(int64(size))
+
+			for i := 0; i < b.N; i++ {
+				if memtable.readyForFlush {
+					memtable.currentOffset = 0
+					memtable.readyForFlush = false
+				}
+
+				_, _, readyForFlush := memtable.Put(data)
+				if readyForFlush {
+					b.StopTimer()
+					memtable.currentOffset = 0
+					memtable.readyForFlush = false
+					b.StartTimer()
+				}
+			}
+		})
+	}
+}
+
+// Benchmark full memtable lifecycle
+func BenchmarkMemtable_FullLifecycle(b *testing.B) {
+	file := createBenchmarkFile(b)
+	defer cleanup(file, nil)
+
+	entrySize := MEDIUM_DATA_SIZE
+	entriesPerMemtable := BENCH_PAGE_SIZE / entrySize
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(int64(entriesPerMemtable * entrySize))
+
+	for i := 0; i < b.N; i++ {
+		// Create memtable
+		page := createBenchmarkPage()
+		config := MemtableConfig{
+			capacity: BENCH_PAGE_SIZE,
+			id:       uint32(i),
+			page:     page,
+			file:     file,
+		}
+
+		memtable, err := NewMemtable(config)
+		if err != nil {
+			b.Fatalf("Failed to create memtable: %v", err)
+		}
+
+		// Fill memtable to near capacity then trigger flush with overflow
+		fillData := generateRandomData(BENCH_PAGE_SIZE - 1000)
+		memtable.Put(fillData)
+
+		// Add data that will exceed capacity to trigger flush
+		overflowData := generateRandomData(2000)
+		_, _, readyForFlush := memtable.Put(overflowData)
+		if !readyForFlush {
+			b.Fatalf("Failed to trigger flush in lifecycle test")
+		}
+
+		// Flush
+		_, _, err = memtable.Flush()
+		if err != nil {
+			b.Fatalf("Flush failed: %v", err)
+		}
+
+		// Cleanup
+		memtable.Discard()
+		fs.Unmap(page)
+	}
+}
+
+// Benchmark single-threaded workload patterns (read-heavy, write-heavy, mixed)
+func BenchmarkMemtable_SingleThreadedWorkload(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Pre-populate with test data
+	data := generateRandomData(SMALL_DATA_SIZE)
+	numEntries := 10000
+	offsets := make([]int, numEntries)
+	lengths := make([]uint16, numEntries)
+	validEntries := 0
+
+	for i := 0; i < numEntries; i++ {
+		offset, length, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			break
+		}
+		offsets[validEntries] = offset
+		lengths[validEntries] = length
+		validEntries++
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+
+	for i := 0; i < b.N; i++ {
+		// Single-threaded workload pattern: 80% reads, 20% writes
+		if i%5 < 4 {
+			// Read operation (80%)
+			if validEntries > 0 {
+				idx := i % validEntries
+				memtable.Get(offsets[idx], lengths[idx])
+			}
+		} else {
+			// Write operation (20%) - only if space available
+			if !memtable.readyForFlush {
+				memtable.Put(data)
+			}
+		}
+	}
+}
+
+// Benchmark CPU-intensive single-threaded operations
+func BenchmarkMemtable_CPUIntensive(b *testing.B) {
+	memtable, file, page := createBenchmarkMemtable(b)
+	defer cleanup(file, page)
+
+	// Use medium-sized data for CPU-intensive operations
+	data := generateRandomData(MEDIUM_DATA_SIZE)
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	b.SetBytes(MEDIUM_DATA_SIZE)
+
+	for i := 0; i < b.N; i++ {
+		if memtable.readyForFlush {
+			// Reset for continued benchmarking
+			memtable.currentOffset = 0
+			memtable.readyForFlush = false
+		}
+
+		// Perform put operation
+		offset, length, readyForFlush := memtable.Put(data)
+		if !readyForFlush {
+			// Immediately read back the data to stress CPU
+			_, err := memtable.Get(offset, length)
+			if err != nil {
+				b.Fatalf("Get failed: %v", err)
+			}
+		}
+	}
+}
diff --git a/flashring/internal/memtables/memtable_test.go b/flashring/internal/memtables/memtable_test.go
new file mode 100644
index 00000000..2d694218
--- /dev/null
+++ b/flashring/internal/memtables/memtable_test.go
@@ -0,0 +1,594 @@
+package memtables
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+)
+
+// Helper function to create a mock file for testing
+func createTestFile(t *testing.T) *fs.WrapAppendFile {
+	tmpDir := t.TempDir()
+	filename := filepath.Join(tmpDir, "test_memtable.dat")
+
+	config := fs.FileConfig{
+		Filename:          filename,
+		MaxFileSize:       1024 * 1024, // 1MB
+		FilePunchHoleSize: 64 * 1024,   // 64KB
+		BlockSize:         fs.BLOCK_SIZE,
+	}
+
+	file, err := fs.NewWrapAppendFile(config)
+	if err != nil {
+		t.Fatalf("Failed to create test file: %v", err)
+	}
+	return file
+}
+
+// Helper function to create a test page
+func createTestPage(size int) *fs.AlignedPage {
+	return fs.NewAlignedPage(size)
+}
+
+// Helper function to cleanup resources
+func cleanup(file *fs.WrapAppendFile, page *fs.AlignedPage) {
+	if file != nil {
+		file.Close()
+	}
+	if page != nil {
+		fs.Unmap(page)
+	}
+}
+
+func TestNewMemtable_Success(t *testing.T) {
+	capacity := fs.BLOCK_SIZE * 2 // 8192 bytes
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	if memtable.Id != 1 {
+		t.Errorf("Expected Id 1, got %d", memtable.Id)
+	}
+	if memtable.capacity != capacity {
+		t.Errorf("Expected capacity %d, got %d", capacity, memtable.capacity)
+	}
+	if memtable.currentOffset != 0 {
+		t.Errorf("Expected currentOffset 0, got %d", memtable.currentOffset)
+	}
+	if memtable.readyForFlush != false {
+		t.Errorf("Expected readyForFlush false, got %v", memtable.readyForFlush)
+	}
+}
+
+func TestNewMemtable_CapacityNotAligned(t *testing.T) {
+	capacity := fs.BLOCK_SIZE + 100 // Not aligned to block size
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	_, err := NewMemtable(config)
+	if err != ErrCapacityNotAligned {
+		t.Errorf("Expected ErrCapacityNotAligned, got %v", err)
+	}
+}
+
+func TestNewMemtable_PageNotProvided(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	defer cleanup(file, nil)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     nil,
+		file:     file,
+	}
+
+	_, err := NewMemtable(config)
+	if err != ErrPageNotProvided {
+		t.Errorf("Expected ErrPageNotProvided, got %v", err)
+	}
+}
+
+func TestNewMemtable_FileNotProvided(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	page := createTestPage(capacity)
+	defer cleanup(nil, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     nil,
+	}
+
+	_, err := NewMemtable(config)
+	if err != ErrFileNotProvided {
+		t.Errorf("Expected ErrFileNotProvided, got %v", err)
+	}
+}
+
+func TestNewMemtable_PageBufferCapacityMismatch(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity * 2) // Wrong size
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	_, err := NewMemtable(config)
+	if err != ErrPageBufferCapacityMismatch {
+		t.Errorf("Expected ErrPageBufferCapacityMismatch, got %v", err)
+	}
+}
+
+func TestNewMemtable_PageBufferNil(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	defer cleanup(file, nil)
+
+	// Create page with nil buffer
+	page := &fs.AlignedPage{Buf: nil}
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	_, err := NewMemtable(config)
+	if err != ErrPageBufferCapacityMismatch {
+		t.Errorf("Expected ErrPageBufferCapacityMismatch, got %v", err)
+	}
+}
+
+func TestMemtable_Get_Success(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Write some test data to the page buffer
+	testData := []byte("Hello, World!")
+	copy(page.Buf[:len(testData)], testData)
+
+	// Get the data
+	result, err := memtable.Get(0, uint16(len(testData)))
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+
+	if string(result) != string(testData) {
+		t.Errorf("Expected %s, got %s", testData, result)
+	}
+}
+
+func TestMemtable_Get_OffsetOutOfBounds(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Try to get data beyond capacity
+	_, err = memtable.Get(capacity-10, 20)
+	if err != ErrOffsetOutOfBounds {
+		t.Errorf("Expected ErrOffsetOutOfBounds, got %v", err)
+	}
+}
+
+func TestMemtable_Put_Success(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	testData := []byte("Hello, World!")
+	offset, length, readyForFlush := memtable.Put(testData)
+
+	if offset != 0 {
+		t.Errorf("Expected offset 0, got %d", offset)
+	}
+	if length != uint16(len(testData)) {
+		t.Errorf("Expected length %d, got %d", len(testData), length)
+	}
+	if readyForFlush {
+		t.Errorf("Expected readyForFlush false, got %v", readyForFlush)
+	}
+	if memtable.currentOffset != len(testData) {
+		t.Errorf("Expected currentOffset %d, got %d", len(testData), memtable.currentOffset)
+	}
+
+	// Verify data was written to buffer
+	result, err := memtable.Get(0, uint16(len(testData)))
+	if err != nil {
+		t.Fatalf("Get failed: %v", err)
+	}
+	if string(result) != string(testData) {
+		t.Errorf("Expected %s, got %s", testData, result)
+	}
+}
+
+func TestMemtable_Put_ExceedsCapacity(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Fill the memtable to near capacity
+	testData := make([]byte, capacity-100)
+	_, _, _ = memtable.Put(testData)
+
+	// Try to put data that exceeds capacity
+	largeData := make([]byte, 200)
+	offset, length, readyForFlush := memtable.Put(largeData)
+
+	if offset != -1 {
+		t.Errorf("Expected offset -1, got %d", offset)
+	}
+	if length != 0 {
+		t.Errorf("Expected length 0, got %d", length)
+	}
+	if !readyForFlush {
+		t.Errorf("Expected readyForFlush true, got %v", readyForFlush)
+	}
+	if !memtable.readyForFlush {
+		t.Errorf("Expected memtable.readyForFlush true, got %v", memtable.readyForFlush)
+	}
+}
+
+func TestMemtable_Put_MultiplePuts(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Put multiple pieces of data
+	data1 := []byte("First")
+	data2 := []byte("Second")
+	data3 := []byte("Third")
+
+	offset1, length1, _ := memtable.Put(data1)
+	offset2, length2, _ := memtable.Put(data2)
+	offset3, length3, _ := memtable.Put(data3)
+
+	if offset1 != 0 {
+		t.Errorf("Expected offset1 0, got %d", offset1)
+	}
+	if offset2 != len(data1) {
+		t.Errorf("Expected offset2 %d, got %d", len(data1), offset2)
+	}
+	if offset3 != len(data1)+len(data2) {
+		t.Errorf("Expected offset3 %d, got %d", len(data1)+len(data2), offset3)
+	}
+
+	// Verify all data can be retrieved
+	result1, err := memtable.Get(offset1, length1)
+	if err != nil || string(result1) != string(data1) {
+		t.Errorf("Failed to retrieve data1: %v", err)
+	}
+
+	result2, err := memtable.Get(offset2, length2)
+	if err != nil || string(result2) != string(data2) {
+		t.Errorf("Failed to retrieve data2: %v", err)
+	}
+
+	result3, err := memtable.Get(offset3, length3)
+	if err != nil || string(result3) != string(data3) {
+		t.Errorf("Failed to retrieve data3: %v", err)
+	}
+}
+
+func TestMemtable_Flush_Success(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Fill the memtable to trigger ready for flush
+	testData := make([]byte, capacity-100)
+	memtable.Put(testData)
+
+	// Put data that exceeds capacity to trigger ready for flush
+	memtable.Put(make([]byte, 200))
+
+	if !memtable.readyForFlush {
+		t.Fatalf("Expected memtable to be ready for flush")
+	}
+
+	n, fileOffset, err := memtable.Flush()
+	if err != nil {
+		t.Fatalf("Flush failed: %v", err)
+	}
+
+	if n != len(page.Buf) {
+		t.Errorf("Expected n %d, got %d", len(page.Buf), n)
+	}
+	if fileOffset < 0 {
+		t.Errorf("Expected positive fileOffset, got %d", fileOffset)
+	}
+	if memtable.readyForFlush {
+		t.Errorf("Expected readyForFlush to be false after flush, got %v", memtable.readyForFlush)
+	}
+}
+
+func TestMemtable_Flush_NotReadyForFlush(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Try to flush without being ready
+	_, _, err = memtable.Flush()
+	if err != ErrMemtableNotReadyForFlush {
+		t.Errorf("Expected ErrMemtableNotReadyForFlush, got %v", err)
+	}
+}
+
+func TestMemtable_Discard(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	memtable.Discard()
+
+	if memtable.file != nil {
+		t.Errorf("Expected file to be nil after discard")
+	}
+	if memtable.page != nil {
+		t.Errorf("Expected page to be nil after discard")
+	}
+}
+
+func TestMemtable_Integration(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       42,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Test complete workflow: multiple puts, get, trigger flush, and flush
+	testCases := [][]byte{
+		[]byte("First entry"),
+		[]byte("Second entry with more data"),
+		[]byte("Third entry"),
+	}
+
+	var offsets []int
+	var lengths []uint16
+
+	// Put multiple entries
+	for i, data := range testCases {
+		offset, length, readyForFlush := memtable.Put(data)
+		if readyForFlush {
+			t.Logf("Memtable ready for flush after entry %d", i)
+			break
+		}
+		offsets = append(offsets, offset)
+		lengths = append(lengths, length)
+	}
+
+	// Verify all entries can be retrieved
+	for i := range offsets {
+		result, err := memtable.Get(offsets[i], lengths[i])
+		if err != nil {
+			t.Fatalf("Get failed for entry %d: %v", i, err)
+		}
+		if string(result) != string(testCases[i]) {
+			t.Errorf("Entry %d mismatch: expected %s, got %s", i, testCases[i], result)
+		}
+	}
+
+	// Fill up the memtable to trigger ready for flush
+	for !memtable.readyForFlush {
+		memtable.Put([]byte("filler"))
+	}
+
+	// Test flush
+	n, fileOffset, err := memtable.Flush()
+	if err != nil {
+		t.Fatalf("Flush failed: %v", err)
+	}
+
+	if n != capacity {
+		t.Errorf("Expected flush size %d, got %d", capacity, n)
+	}
+	if fileOffset <= 0 {
+		t.Errorf("Expected positive file offset, got %d", fileOffset)
+	}
+}
+
+func TestMemtable_EdgeCases(t *testing.T) {
+	capacity := fs.BLOCK_SIZE
+	file := createTestFile(t)
+	page := createTestPage(capacity)
+	defer cleanup(file, page)
+
+	config := MemtableConfig{
+		capacity: capacity,
+		id:       1,
+		page:     page,
+		file:     file,
+	}
+
+	memtable, err := NewMemtable(config)
+	if err != nil {
+		t.Fatalf("NewMemtable failed: %v", err)
+	}
+
+	// Test zero-length put
+	offset, length, readyForFlush := memtable.Put([]byte{})
+	if offset != 0 || length != 0 || readyForFlush {
+		t.Errorf("Zero-length put: offset=%d, length=%d, readyForFlush=%v", offset, length, readyForFlush)
+	}
+
+	// Test zero-length get
+	result, err := memtable.Get(0, 0)
+	if err != nil {
+		t.Fatalf("Zero-length get failed: %v", err)
+	}
+	if len(result) != 0 {
+		t.Errorf("Expected zero-length result, got %d", len(result))
+	}
+
+	// Test get at exact capacity boundary with zero length (should succeed)
+	result, err = memtable.Get(capacity, 0)
+	if err != nil {
+		t.Errorf("Expected no error for boundary get with zero length, got %v", err)
+	}
+	if len(result) != 0 {
+		t.Errorf("Expected zero-length result for boundary get, got %d", len(result))
+	}
+
+	// Test get beyond capacity boundary
+	_, err = memtable.Get(capacity, 1)
+	if err != ErrOffsetOutOfBounds {
+		t.Errorf("Expected ErrOffsetOutOfBounds for beyond boundary get, got %v", err)
+	}
+
+	// Test put that exactly fills capacity
+	exactData := make([]byte, capacity)
+	offset, length, readyForFlush = memtable.Put(exactData)
+	if offset != 0 || length != uint16(capacity) || readyForFlush {
+		t.Errorf("Exact capacity put: offset=%d, length=%d, readyForFlush=%v", offset, length, readyForFlush)
+	}
+
+	// Next put should trigger ready for flush
+	offset, length, readyForFlush = memtable.Put([]byte("overflow"))
+	if offset != -1 || length != 0 || !readyForFlush {
+		t.Errorf("Overflow put: offset=%d, length=%d, readyForFlush=%v", offset, length, readyForFlush)
+	}
+}
diff --git a/flashring/internal/pools/leaky_pool.go b/flashring/internal/pools/leaky_pool.go
new file mode 100644
index 00000000..b2a59487
--- /dev/null
+++ b/flashring/internal/pools/leaky_pool.go
@@ -0,0 +1,72 @@
+package pools
+
+import "sync"
+
+type LeakyPool struct {
+	availabilityList []interface{}
+	Meta             interface{}
+	createFunc       func() interface{}
+	preDrefHook      func(obj interface{})
+	capacity         int
+	usage            int
+	idx              int
+	lock             sync.RWMutex
+	stats            *Stats
+}
+
+type Stats struct {
+	Usage    int
+	Capacity int
+}
+
+type LeakyPoolConfig struct {
+	Capacity   int
+	Meta       interface{}
+	CreateFunc func() interface{}
+}
+
+func NewLeakyPool(config LeakyPoolConfig) *LeakyPool {
+	return &LeakyPool{
+		availabilityList: make([]interface{}, config.Capacity),
+		Meta:             config.Meta,
+		capacity:         config.Capacity,
+		createFunc:       config.CreateFunc,
+		usage:            0,
+		idx:              -1,
+		preDrefHook:      nil,
+		stats:            &Stats{Usage: 0, Capacity: config.Capacity},
+	}
+}
+
+func (p *LeakyPool) RegisterPreDrefHook(hook func(obj interface{})) {
+	p.preDrefHook = hook
+}
+
+func (p *LeakyPool) Get() interface{} {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+	p.usage++
+	if p.idx == -1 && p.usage > p.capacity {
+		return p.createFunc()
+	} else if p.idx == -1 {
+		return p.createFunc()
+	}
+	o := p.availabilityList[p.idx]
+	p.idx--
+	return o
+}
+
+func (p *LeakyPool) Put(obj interface{}) {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+	p.usage--
+	p.idx++
+	if p.idx == p.capacity {
+		if p.preDrefHook != nil {
+			p.preDrefHook(obj)
+		}
+		p.idx--
+		return
+	}
+	p.availabilityList[p.idx] = obj
+}
diff --git a/flashring/internal/pools/pool.go b/flashring/internal/pools/pool.go
new file mode 100644
index 00000000..86dfa5b7
--- /dev/null
+++ b/flashring/internal/pools/pool.go
@@ -0,0 +1,7 @@
+package pools
+
+type Pool interface {
+	Get() interface{}
+	Put(obj interface{})
+	RegisterPreDrefHook(hook func(obj interface{}))
+}
diff --git a/flashring/internal/server/resp.go b/flashring/internal/server/resp.go
new file mode 100644
index 00000000..dc202b6d
--- /dev/null
+++ b/flashring/internal/server/resp.go
@@ -0,0 +1,277 @@
+package server
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"io"
+	"net"
+	"strconv"
+	"time"
+)
+
+// KV is the minimal cache interface required by the RESP server.
+// Implementations should be safe for concurrent use.
+type KV interface {
+	// Put stores the value with optional expire time in unix seconds (0 for no expiry).
+	Put(key string, value []byte, exptime uint64) error
+	// Get returns value, keyFound, expired
+	Get(key string) ([]byte, bool, bool)
+}
+
+// ServeRESP starts a minimal RESP (Redis) protocol server over TCP supporting
+// GET and SET only. It is optimized for low overhead and pipelined requests.
+//
+// Supported commands (case-insensitive):
+//   - *2\r\n$3\r\nGET\r\n$<klen>\r\n<key>\r\n
+//   - *3\r\n$3\r\nSET\r\n$<klen>\r\n<key>\r\n$<vlen>\r\n<val>\r\n
+//   - SET with EX seconds (optional):
+//     *5 ... SET key val EX seconds
+//
+// Inline protocol is not supported to keep parsing fast and simple.
+func ServeRESP(addr string, cache KV) error {
+	ln, err := net.Listen("tcp", addr)
+	if err != nil {
+		return err
+	}
+	// Accept loop
+	for {
+		conn, err := ln.Accept()
+		if err != nil {
+			if ne, ok := err.(net.Error); ok && ne.Temporary() {
+				time.Sleep(50 * time.Millisecond)
+				continue
+			}
+			return err
+		}
+		// Configure TCP for low latency
+		if tc, ok := conn.(*net.TCPConn); ok {
+			_ = tc.SetNoDelay(true)
+			_ = tc.SetKeepAlive(true)
+			_ = tc.SetKeepAlivePeriod(3 * time.Minute)
+		}
+		go handleConn(conn, cache)
+	}
+}
+
+func handleConn(conn net.Conn, cache KV) {
+	defer conn.Close()
+	// Generous buffers for pipelining
+	r := bufio.NewReaderSize(conn, 64*1024)
+	w := bufio.NewWriterSize(conn, 64*1024)
+	for {
+		cmd, args, perr := readRESPArray(r)
+		if perr != nil {
+			if perr == io.EOF || errors.Is(perr, net.ErrClosed) {
+				return
+			}
+			// Protocol error: close connection per Redis behavior
+			return
+		}
+		if len(cmd) == 0 {
+			// Ignore empty
+			continue
+		}
+		// Fast upper-case compare for GET/SET without heap allocs
+		if len(cmd) == 3 && (cmd[0]|0x20) == 'g' && (cmd[1]|0x20) == 'e' && (cmd[2]|0x20) == 't' {
+			// GET key
+			if len(args) != 1 {
+				writeError(w, "wrong number of arguments for 'get'")
+				if w.Flush() != nil {
+					return
+				}
+				continue
+			}
+			key := b2s(args[0])
+			val, found, expired := cache.Get(key)
+			if !found || expired {
+				writeBulkNil(w)
+			} else {
+				writeBulk(w, val)
+			}
+			if w.Flush() != nil {
+				return
+			}
+			continue
+		}
+		if len(cmd) >= 3 && (cmd[0]|0x20) == 's' && (cmd[1]|0x20) == 'e' && (cmd[2]|0x20) == 't' {
+			// SET key value [EX seconds]
+			if len(args) != 2 && len(args) != 4 {
+				writeError(w, "wrong number of arguments for 'set'")
+				if w.Flush() != nil {
+					return
+				}
+				continue
+			}
+			key := b2s(args[0])
+			value := args[1]
+			var ex uint64
+			if len(args) == 4 {
+				// Expect EX seconds
+				if !bytes.EqualFold(args[2], []byte("EX")) {
+					writeError(w, "only EX option is supported")
+					if w.Flush() != nil {
+						return
+					}
+					continue
+				}
+				secs, err := parseUint(args[3])
+				if err != nil {
+					writeError(w, "invalid expire seconds")
+					if w.Flush() != nil {
+						return
+					}
+					continue
+				}
+				ex = secs
+			}
+			_ = cache.Put(key, value, ex)
+			writeSimpleString(w, "OK")
+			if w.Flush() != nil {
+				return
+			}
+			continue
+		}
+		// Unknown command
+		writeError(w, "unknown command")
+		if w.Flush() != nil {
+			return
+		}
+	}
+}
+
+// RESP helpers
+
+// readRESPArray parses a RESP Array of Bulk Strings and returns command and args.
+// It assumes arrays consisting only of bulk strings; inline protocol is not supported.
+func readRESPArray(r *bufio.Reader) (cmd []byte, args [][]byte, err error) {
+	// Expect '*'
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, nil, err
+	}
+	if b != '*' {
+		return nil, nil, io.ErrUnexpectedEOF
+	}
+	n, err := readIntCRLF(r)
+	if err != nil {
+		return nil, nil, err
+	}
+	if n <= 0 {
+		return nil, nil, nil
+	}
+	// First element is command
+	bs, err := readBulkString(r)
+	if err != nil {
+		return nil, nil, err
+	}
+	cmd = bs
+	// Remaining are args
+	if n > 1 {
+		args = make([][]byte, 0, n-1)
+		for i := 1; i < n; i++ {
+			bsi, err := readBulkString(r)
+			if err != nil {
+				return nil, nil, err
+			}
+			args = append(args, bsi)
+		}
+	}
+	return
+}
+
+func readBulkString(r *bufio.Reader) ([]byte, error) {
+	b, err := r.ReadByte()
+	if err != nil {
+		return nil, err
+	}
+	if b != '$' {
+		return nil, io.ErrUnexpectedEOF
+	}
+	n, err := readIntCRLF(r)
+	if err != nil {
+		return nil, err
+	}
+	if n < 0 {
+		// Null bulk string
+		return nil, nil
+	}
+	buf := make([]byte, n)
+	if _, err := io.ReadFull(r, buf); err != nil {
+		return nil, err
+	}
+	// Read trailing CRLF
+	if err := expectCRLF(r); err != nil {
+		return nil, err
+	}
+	return buf, nil
+}
+
+func readIntCRLF(r *bufio.Reader) (int, error) {
+	// Read until CR
+	line, err := r.ReadSlice('\r')
+	if err != nil {
+		return 0, err
+	}
+	// Next must be '\n'
+	if b, err := r.ReadByte(); err != nil || b != '\n' {
+		if err == nil {
+			err = io.ErrUnexpectedEOF
+		}
+		return 0, err
+	}
+	// Trim trailing CR
+	line = line[:len(line)-1]
+	// Parse signed/unsigned int
+	// Use strconv for correctness; line is small
+	i, err := strconv.Atoi(b2s(line))
+	if err != nil {
+		return 0, err
+	}
+	return i, nil
+}
+
+func expectCRLF(r *bufio.Reader) error {
+	c1, err := r.ReadByte()
+	if err != nil {
+		return err
+	}
+	c2, err := r.ReadByte()
+	if err != nil {
+		return err
+	}
+	if c1 != '\r' || c2 != '\n' {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+
+func writeSimpleString(w *bufio.Writer, s string) {
+	w.WriteByte('+')
+	w.WriteString(s)
+	w.WriteString("\r\n")
+}
+
+func writeError(w *bufio.Writer, s string) {
+	w.WriteByte('-')
+	w.WriteString("ERR ")
+	w.WriteString(s)
+	w.WriteString("\r\n")
+}
+
+func writeBulk(w *bufio.Writer, p []byte) {
+	w.WriteByte('$')
+	w.WriteString(strconv.Itoa(len(p)))
+	w.WriteString("\r\n")
+	w.Write(p)
+	w.WriteString("\r\n")
+}
+
+func writeBulkNil(w *bufio.Writer) {
+	w.WriteString("$-1\r\n")
+}
+
+// b2s converts []byte to string with allocation.
+// We intentionally avoid unsafe tricks for portability.
+func b2s(b []byte) string                { return string(b) }
+func parseUint(b []byte) (uint64, error) { return strconv.ParseUint(string(b), 10, 64) }
diff --git a/flashring/internal/shard/batch_reader.go b/flashring/internal/shard/batch_reader.go
new file mode 100644
index 00000000..3896834b
--- /dev/null
+++ b/flashring/internal/shard/batch_reader.go
@@ -0,0 +1,156 @@
+package filecache
+
+import (
+	"fmt"
+	"sort"
+	"sync"
+	"time"
+)
+
+// ===========batching reads ==========
+// ReadRequest represents a single read request
+type ReadRequest struct {
+	Key    string
+	Length uint16
+	MemId  uint32
+	Offset uint32
+	Result chan ReadResult
+}
+
+// ReadResult contains the response for a read request
+type ReadResult struct {
+	Found         bool
+	Data          []byte
+	TTL           uint16
+	Expired       bool
+	ShouldRewrite bool
+	Error         error
+}
+
+// BatchReader handles batching of disk reads
+type BatchReader struct {
+	requests     chan *ReadRequest
+	batchWindow  time.Duration
+	maxBatchSize int
+	shardCache   *ShardCache
+	stopCh       chan struct{}
+	wg           sync.WaitGroup
+}
+
+// Config for BatchReader
+type BatchReaderConfig struct {
+	BatchWindow  time.Duration // e.g., 5-10μs
+	MaxBatchSize int           // e.g., 32-64 requests
+}
+
+func NewBatchReader(config BatchReaderConfig, sc *ShardCache) *BatchReader {
+	br := &BatchReader{
+		requests:     make(chan *ReadRequest, config.MaxBatchSize*2),
+		batchWindow:  config.BatchWindow,
+		maxBatchSize: config.MaxBatchSize,
+		shardCache:   sc,
+		stopCh:       make(chan struct{}),
+	}
+
+	// Start batch processor goroutine
+	br.wg.Add(1)
+	go br.processBatches()
+
+	return br
+}
+
+func (br *BatchReader) processBatches() {
+	defer br.wg.Done()
+
+	for {
+		select {
+		case <-br.stopCh:
+			return
+		case firstReq := <-br.requests:
+			batch := br.collectBatch(firstReq)
+			br.shardCache.Stats.BatchTracker.RecordBatchSize(len(batch))
+			br.executeBatch(batch)
+		}
+	}
+}
+
+func (br *BatchReader) collectBatch(firstReq *ReadRequest) []*ReadRequest {
+	batch := make([]*ReadRequest, 0, br.maxBatchSize)
+	batch = append(batch, firstReq)
+
+	timer := time.NewTimer(br.batchWindow)
+
+	for len(batch) < br.maxBatchSize {
+		select {
+		case req := <-br.requests:
+			batch = append(batch, req)
+		case <-timer.C:
+			return batch
+		}
+	}
+
+	return batch
+}
+
+func (br *BatchReader) executeBatch(batch []*ReadRequest) {
+	// Separate memtable hits from disk reads
+	diskReads := make([]*ReadRequest, 0, len(batch))
+
+	for _, req := range batch {
+		mt := br.shardCache.mm.GetMemtableById(req.MemId)
+		if mt != nil {
+			// Fast path: memtable hit
+			buf, exists := mt.GetBufForRead(int(req.Offset), req.Length)
+			if exists {
+				result := br.shardCache.processBuffer(req.Key, buf, req.Length)
+				req.Result <- result
+				continue
+			}
+		}
+		// Needs disk read
+		diskReads = append(diskReads, req)
+	}
+
+	if len(diskReads) == 0 {
+		return
+	}
+
+	// Sort disk reads by file offset
+	sort.Slice(diskReads, func(i, j int) bool {
+		offsetI := uint64(diskReads[i].MemId)*uint64(br.shardCache.mm.Capacity) +
+			uint64(diskReads[i].Offset)
+		offsetJ := uint64(diskReads[j].MemId)*uint64(br.shardCache.mm.Capacity) +
+			uint64(diskReads[j].Offset)
+		return offsetI < offsetJ
+	})
+
+	// Execute disk reads (could be parallelized or merged here)
+	var wg sync.WaitGroup
+	for _, req := range diskReads {
+		wg.Add(1)
+		go func(r *ReadRequest) {
+			defer wg.Done()
+			result := br.executeReadFromDisk(r)
+			r.Result <- result
+		}(req)
+	}
+	wg.Wait()
+}
+
+func (br *BatchReader) executeReadFromDisk(req *ReadRequest) ReadResult {
+	buf := make([]byte, req.Length)
+	fileOffset := uint64(req.MemId)*uint64(br.shardCache.mm.Capacity) +
+		uint64(req.Offset)
+
+	n := br.shardCache.readFromDisk(int64(fileOffset), req.Length, buf)
+	if n != int(req.Length) {
+		return ReadResult{Error: fmt.Errorf("bad read length")}
+	}
+
+	return br.shardCache.processBuffer(req.Key, buf, req.Length)
+}
+
+func (br *BatchReader) Close() {
+	close(br.stopCh)
+	br.wg.Wait()
+}
diff --git a/flashring/internal/shard/batch_reader_v2.go b/flashring/internal/shard/batch_reader_v2.go
new file mode 100644
index 00000000..2aa99b09
--- /dev/null
+++ b/flashring/internal/shard/batch_reader_v2.go
@@ -0,0 +1,132 @@
+package filecache
+
+import (
+	"fmt"
+	"sync"
+	"time"
+)
+
+type ReadRequestV2 struct {
+	Key    string
+	Result chan ReadResultV2
+}
+
+type ReadResultV2 struct {
+	Found         bool
+	Data          []byte
+	TTL           uint16
+	Expired       bool
+	ShouldRewrite bool
+	Error         error
+}
+
+type WriteRequestV2 struct {
+	Key              string
+	Value            []byte
+	ExptimeInMinutes uint16
+	Result           chan error
+}
+
+type BatchReaderV2 struct {
+	Requests     chan *ReadRequestV2
+	batchWindow  time.Duration
+	maxBatchSize int
+	shardCache   *ShardCache
+	stopCh       chan struct{}
+	wg           sync.WaitGroup
+	shardLock    *sync.RWMutex
+}
+
+type BatchReaderV2Config struct {
+	BatchWindow  time.Duration
+	MaxBatchSize int
+}
+
+var ReadRequestPool = sync.Pool{
+	New: func() interface{} {
+		return &ReadRequestV2{}
+	},
+}
+
+var ReadResultPool = sync.Pool{
+	New: func() interface{} {
+		return make(chan ReadResultV2, 1)
+	},
+}
+
+var ErrorPool = sync.Pool{
+	New: func() interface{} {
+		return make(chan error, 1)
+	},
+}
+
+var BufPool = sync.Pool{
+	New: func() interface{} {
+		// Allocate max expected size - use pointer to avoid allocation on Put
+		buf := make([]byte, 4096)
+		return &buf
+	},
+}
+
+func NewBatchReaderV2(config BatchReaderV2Config, sc *ShardCache, sl *sync.RWMutex) *BatchReaderV2 {
+	br := &BatchReaderV2{
+		Requests:     make(chan *ReadRequestV2, config.MaxBatchSize*2),
+		batchWindow:  config.BatchWindow,
+		maxBatchSize: config.MaxBatchSize,
+		shardCache:   sc,
+		stopCh:       make(chan struct{}),
+		shardLock:    sl,
+	}
+
+	// Start batch processor goroutine
+	br.wg.Add(1)
+	go br.processBatchesV2()
+
+	return br
+}
+
+func (br *BatchReaderV2) processBatchesV2() {
+	defer br.wg.Done()
+
+	for {
+		select {
+		case <-br.stopCh:
+			return
+		case firstReq := <-br.Requests:
+			batch := br.collectBatchV2(firstReq)
+			br.shardCache.Stats.BatchTracker.RecordBatchSize(len(batch))
+			br.executeBatchV2(batch)
+		}
+	}
+}
+
+func (br *BatchReaderV2) collectBatchV2(firstReq *ReadRequestV2) []*ReadRequestV2 {
+	batch := make([]*ReadRequestV2, 0, br.maxBatchSize)
+	batch = append(batch, firstReq)
+
+	timer := time.NewTimer(br.batchWindow)
+
+	for len(batch) < br.maxBatchSize {
+		select {
+		case req := <-br.Requests:
+			batch = append(batch, req)
+		case <-timer.C:
+			return batch
+		}
+	}
+
+	return batch
+}
+
+func (br *BatchReaderV2) executeBatchV2(batch []*ReadRequestV2) {
+	br.shardLock.RLock()
+	defer br.shardLock.RUnlock()
+	for _, req := range batch {
+		found, data, ttl, expired, shouldRewrite := br.shardCache.Get(req.Key)
+		if !found {
+			req.Result <- ReadResultV2{Error: fmt.Errorf("key not found")}
+		} else {
+			req.Result <- ReadResultV2{Found: found, Data: data, TTL: ttl, Expired: expired, ShouldRewrite: shouldRewrite}
+		}
+	}
+}
diff --git a/flashring/internal/shard/batch_tracker.go b/flashring/internal/shard/batch_tracker.go
new file mode 100644
index 00000000..5658d0e2
--- /dev/null
+++ b/flashring/internal/shard/batch_tracker.go
@@ -0,0 +1,55 @@
+package filecache
+
+import (
+	"sort"
+	"sync"
+)
+
+type BatchTracker struct {
+	mu         sync.RWMutex
+	getBatch   []int
+	maxSamples int
+	getIndex   int
+}
+
+// const defaultMaxSamples = 100000
+
+func NewBatchTracker() *BatchTracker {
+	return &BatchTracker{
+		getBatch:   make([]int, defaultMaxSamples),
+		maxSamples: defaultMaxSamples,
+	}
+}
+
+func (bt *BatchTracker) RecordBatchSize(batchSize int) {
+	bt.mu.Lock()
+	defer bt.mu.Unlock()
+	bt.getBatch[bt.getIndex] = batchSize
+	bt.getIndex = (bt.getIndex + 1) % bt.maxSamples
+}
+
+func (bt *BatchTracker) GetBatchSizePercentiles() (p25, p50, p99 int) {
+	bt.mu.RLock()
+	defer bt.mu.RUnlock()
+
+	samples := bt.getIndex
+	if samples > int(bt.maxSamples) {
+		samples = int(bt.maxSamples)
+	}
+
+	if samples == 0 {
+		return 0, 0, 0
+	}
+
+	batchSizesCopy := make([]int, samples)
+	copy(batchSizesCopy, bt.getBatch[:samples])
+	sort.Slice(batchSizesCopy, func(i, j int) bool {
+		return batchSizesCopy[i] < batchSizesCopy[j]
+	})
+
+	p25 = batchSizesCopy[int(float64(samples)*0.25)]
+	p50 = batchSizesCopy[int(float64(samples)*0.50)]
+	p99 = batchSizesCopy[int(float64(samples)*0.99)]
+
+	return p25, p50, p99
+}
diff --git a/flashring/internal/shard/latency_tracker.go b/flashring/internal/shard/latency_tracker.go
new file mode 100644
index 00000000..eeb109c8
--- /dev/null
+++ b/flashring/internal/shard/latency_tracker.go
@@ -0,0 +1,96 @@
+package filecache
+
+import (
+	"sort"
+	"sync"
+	"time"
+)
+
+type LatencyTracker struct {
+	mu           sync.RWMutex
+	getLatencies []time.Duration
+	putLatencies []time.Duration
+	maxSamples   int
+	getIndex     int
+	putIndex     int
+	getCount     int64
+	putCount     int64
+}
+
+const defaultMaxSamples = 100000
+
+func NewLatencyTracker() *LatencyTracker {
+	return &LatencyTracker{
+		getLatencies: make([]time.Duration, defaultMaxSamples),
+		putLatencies: make([]time.Duration, defaultMaxSamples),
+		maxSamples:   defaultMaxSamples,
+	}
+}
+
+func (lt *LatencyTracker) RecordGet(duration time.Duration) {
+	lt.mu.Lock()
+	defer lt.mu.Unlock()
+	lt.getLatencies[lt.getIndex] = duration
+	lt.getIndex = (lt.getIndex + 1) % lt.maxSamples
+	lt.getCount++
+}
+
+func (lt *LatencyTracker) RecordPut(duration time.Duration) {
+	lt.mu.Lock()
+	defer lt.mu.Unlock()
+	lt.putLatencies[lt.putIndex] = duration
+	lt.putIndex = (lt.putIndex + 1) % lt.maxSamples
+	lt.putCount++
+}
+
+func (lt *LatencyTracker) GetLatencyPercentiles() (p25, p50, p99 time.Duration) {
+	lt.mu.RLock()
+	defer lt.mu.RUnlock()
+
+	samples := lt.getCount
+	if samples > int64(lt.maxSamples) {
+		samples = int64(lt.maxSamples)
+	}
+
+	if samples == 0 {
+		return 0, 0, 0
+	}
+
+	latenciesCopy := make([]time.Duration, samples)
+	copy(latenciesCopy, lt.getLatencies[:samples])
+	sort.Slice(latenciesCopy, func(i, j int) bool {
+		return latenciesCopy[i] < latenciesCopy[j]
+	})
+
+	p25 = latenciesCopy[int(float64(samples)*0.25)]
+	p50 = latenciesCopy[int(float64(samples)*0.50)]
+	p99 = latenciesCopy[int(float64(samples)*0.99)]
+
+	return p25, p50, p99
+}
+
+func (lt *LatencyTracker) PutLatencyPercentiles() (p25, p50, p99 time.Duration) {
+	lt.mu.RLock()
+	defer lt.mu.RUnlock()
+
+	samples := lt.putCount
+	if samples > int64(lt.maxSamples) {
+		samples = int64(lt.maxSamples)
+	}
+
+	if samples == 0 {
+		return 0, 0, 0
+	}
+
+	latenciesCopy := make([]time.Duration, samples)
+	copy(latenciesCopy, lt.putLatencies[:samples])
+	sort.Slice(latenciesCopy, func(i, j int) bool {
+		return latenciesCopy[i] < latenciesCopy[j]
+	})
+
+	p25 = latenciesCopy[int(float64(samples)*0.25)]
+	p50 = latenciesCopy[int(float64(samples)*0.50)]
+	p99 = latenciesCopy[int(float64(samples)*0.99)]
+
+	return p25, p50, p99
+}
diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go
new file mode 100644
index 00000000..78e19deb
--- /dev/null
+++ b/flashring/internal/shard/shard_cache.go
@@ -0,0 +1,379 @@
+package filecache
+
+import (
+	"fmt"
+	"hash/crc32"
+	"sync"
+	"time"
+
+	"github.com/Meesho/BharatMLStack/flashring/internal/allocators"
+	"github.com/Meesho/BharatMLStack/flashring/internal/fs"
+	indices "github.com/Meesho/BharatMLStack/flashring/internal/indicesV3"
+	"github.com/Meesho/BharatMLStack/flashring/internal/maths"
+	"github.com/Meesho/BharatMLStack/flashring/internal/memtables"
+	"github.com/rs/zerolog/log"
+)
+
+type ShardCache struct {
+	keyIndex          *indices.Index
+	file              *fs.WrapAppendFile
+	mm                *memtables.MemtableManager
+	readPageAllocator *allocators.SlabAlignedPageAllocator
+	dm                *indices.DeleteManager
+	predictor         *maths.Predictor
+	startAt           int64
+	Stats             *Stats
+
+	//batching reads
+	BatchReader *BatchReaderV2
+
+	//Lockless read and write
+	ReadCh  chan *ReadRequestV2
+	WriteCh chan *WriteRequestV2
+}
+
+type Stats struct {
+	KeyNotFoundCount int
+	KeyExpiredCount  int
+	BadDataCount     int
+	BadLengthCount   int
+	BadCR32Count     int
+	BadKeyCount      int
+	MemIdCount       map[uint32]int
+	LastDeletedMemId uint32
+	DeletedKeyCount  int
+	BadCRCMemIds     map[uint32]int
+	BadKeyMemIds     map[uint32]int
+	BatchTracker     *BatchTracker
+}
+
+type ShardCacheConfig struct {
+	Rounds              int
+	RbInitial           int
+	RbMax               int
+	DeleteAmortizedStep int
+	MemtableSize        int32
+	MaxFileSize         int64
+	BlockSize           int
+	Directory           string
+	AsyncReadWorkers    int
+	AsyncQueueDepth     int
+	Predictor           *maths.Predictor
+
+	//batching reads
+	EnableBatching bool
+	BatchWindow    time.Duration
+	MaxBatchSize   int
+}
+
+func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache {
+	filename := fmt.Sprintf("%s/%d.bin", config.Directory, time.Now().UnixNano())
+	punchHoleSize := config.MemtableSize
+	fsConf := fs.FileConfig{
+		Filename:          filename,
+		MaxFileSize:       config.MaxFileSize,
+		FilePunchHoleSize: int64(punchHoleSize),
+		BlockSize:         config.BlockSize,
+	}
+	file, err := fs.NewWrapAppendFile(fsConf)
+	if err != nil {
+		log.Panic().Err(err).Msg("Failed to create file")
+	}
+	memtableManager, err := memtables.NewMemtableManager(file, config.MemtableSize)
+	if err != nil {
+		log.Panic().Err(err).Msg("Failed to create memtable manager")
+	}
+	ki := indices.NewIndex(0, config.RbInitial, config.RbMax, config.DeleteAmortizedStep)
+	sizeClasses := make([]allocators.SizeClass, 0)
+	i := fs.BLOCK_SIZE
+	iMax := (1 << 16)
+	for i < iMax {
+		sizeClasses = append(sizeClasses, allocators.SizeClass{Size: i, MinCount: 1000})
+		i *= 2
+	}
+	readPageAllocator, err := allocators.NewSlabAlignedPageAllocator(allocators.SlabAlignedPageAllocatorConfig{SizeClasses: sizeClasses})
+	if err != nil {
+		log.Panic().Err(err).Msg("Failed to create read page allocator")
+	}
+	dm := indices.NewDeleteManager(ki, file, config.DeleteAmortizedStep)
+	sc := &ShardCache{
+		keyIndex:          ki,
+		mm:                memtableManager,
+		file:              file,
+		readPageAllocator: readPageAllocator,
+		dm:                dm,
+		predictor:         config.Predictor,
+		startAt:           time.Now().Unix(),
+		Stats: &Stats{
+			MemIdCount:   make(map[uint32]int),
+			BadCRCMemIds: make(map[uint32]int),
+			BadKeyMemIds: make(map[uint32]int),
+			BatchTracker: NewBatchTracker(),
+		},
+	}
+
+	// Initialize batch reader if enabled
+	if config.EnableBatching {
+		sc.BatchReader = NewBatchReaderV2(BatchReaderV2Config{
+			BatchWindow:  config.BatchWindow,
+			MaxBatchSize: config.MaxBatchSize,
+		}, sc, sl)
+	}
+
+	sc.ReadCh = make(chan *ReadRequestV2, 500)
+	sc.WriteCh = make(chan *WriteRequestV2, 500)
+
+	go sc.startReadWriteRoutines()
+
+	return sc
+}
+
+// function that starts go routine to process the read and write requests
+func (fc *ShardCache) startReadWriteRoutines() {
+	go func() {
+		for {
+			select {
+			case writeReq := <-fc.WriteCh: // Writes get priority
+				err := fc.Put(writeReq.Key, writeReq.Value, writeReq.ExptimeInMinutes)
+				writeReq.Result <- err
+			case readReq := <-fc.ReadCh:
+				found, data, ttl, expired, shouldRewrite := fc.GetSlowPath(readReq.Key)
+				readReq.Result <- ReadResultV2{Found: found, Data: data, TTL: ttl, Expired: expired, ShouldRewrite: shouldRewrite, Error: nil}
+			}
+		}
+	}()
+}
+
+func (fc *ShardCache) Put(key string, value []byte, ttlMinutes uint16) error {
+	size := 4 + len(key) + len(value)
+	mt, mtId, _ := fc.mm.GetMemtable()
+	err := fc.dm.ExecuteDeleteIfNeeded()
+	if err != nil {
+		return err
+	}
+	buf, offset, length, readyForFlush := mt.GetBufForAppend(uint16(size))
+	if readyForFlush {
+		fc.mm.Flush()
+		mt, mtId, _ = fc.mm.GetMemtable()
+		buf, offset, length, _ = mt.GetBufForAppend(uint16(size))
+	}
+	copy(buf[4:], key)
+	copy(buf[4+len(key):], value)
+	crc := crc32.ChecksumIEEE(buf[4:])
+	indices.ByteOrder.PutUint32(buf[0:4], crc)
+	fc.keyIndex.Put(key, length, ttlMinutes, mtId, uint32(offset))
+	fc.dm.IncMemtableKeyCount(mtId)
+	fc.Stats.MemIdCount[mtId]++
+	return nil
+}
+
+func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) {
+	length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key)
+	if status == indices.StatusNotFound {
+		fc.Stats.KeyNotFoundCount++
+		return false, nil, 0, false, false
+	}
+
+	if status == indices.StatusExpired {
+		fc.Stats.KeyExpiredCount++
+		return false, nil, 0, true, false
+	}
+
+	_, currMemId, _ := fc.mm.GetMemtable()
+	shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId)
+
+	exists := true
+	var buf []byte
+	memtableExists := true
+	mt := fc.mm.GetMemtableById(memId)
+	if mt == nil {
+		memtableExists = false
+	}
+	if !memtableExists {
+		bufPtr := BufPool.Get().(*[]byte)
+		buf = *bufPtr
+		defer BufPool.Put(bufPtr)
+		fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset)
+		n := fc.readFromDisk(int64(fileOffset), length, buf)
+		if n != int(length) {
+			fc.Stats.BadLengthCount++
+			return false, nil, 0, false, shouldReWrite
+		}
+	} else {
+		buf, exists = mt.GetBufForRead(int(offset), length)
+		if !exists {
+			panic("memtable exists but buf not found")
+		}
+	}
+	gotCR32 := indices.ByteOrder.Uint32(buf[0:4])
+	computedCR32 := crc32.ChecksumIEEE(buf[4:])
+	gotKey := string(buf[4 : 4+len(key)])
+	if gotCR32 != computedCR32 {
+		fc.Stats.BadCR32Count++
+		fc.Stats.BadCRCMemIds[memId]++
+		return false, nil, 0, false, shouldReWrite
+	}
+	if gotKey != key {
+		fc.Stats.BadKeyCount++
+		fc.Stats.BadKeyMemIds[memId]++
+		return false, nil, 0, false, shouldReWrite
+	}
+	valLen := int(length) - 4 - len(key)
+	return true, buf[4+len(key) : 4+len(key)+valLen], remainingTTL, false, shouldReWrite
+}
+
+// GetFastPath attempts to read from memtable only (no disk I/O).
+// Returns: (found, data, ttl, expired, needsSlowPath)
+// If needsSlowPath is true, caller should use GetSlowPath for disk read.
+func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) {
+	length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key)
+	if status == indices.StatusNotFound {
+		fc.Stats.KeyNotFoundCount++
+		return false, nil, 0, false, false // needsSlowPath = false (not found)
+	}
+
+	if status == indices.StatusExpired {
+		fc.Stats.KeyExpiredCount++
+		return false, nil, 0, true, false // needsSlowPath = false (expired)
+	}
+
+	// Check if data is in memtable
+	mt := fc.mm.GetMemtableById(memId)
+	if mt == nil {
+		// Data not in memtable, needs disk read - signal slow path needed
+		return false, nil, remainingTTL, false, true // needsSlowPath = true
+	}
+
+	// Fast path: read from memtable
+	buf, exists := mt.GetBufForRead(int(offset), length)
+	if !exists {
+		panic("memtable exists but buf not found")
+	}
+
+	// Validate CRC and key
+	gotCR32 := indices.ByteOrder.Uint32(buf[0:4])
+	computedCR32 := crc32.ChecksumIEEE(buf[4:])
+	if gotCR32 != computedCR32 {
+		fc.Stats.BadCR32Count++
+		fc.Stats.BadCRCMemIds[memId]++
+		_, currMemId, _ := fc.mm.GetMemtable()
+		shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId)
+		_ = shouldReWrite // Not returning shouldReWrite in fast path for simplicity
+		return false, nil, 0, false, false
+	}
+
+	gotKey := string(buf[4 : 4+len(key)])
+	if gotKey != key {
+		fc.Stats.BadKeyCount++
+		fc.Stats.BadKeyMemIds[memId]++
+		return false, nil, 0, false, false
+	}
+
+	valLen := int(length) - 4 - len(key)
+	return true, buf[4+len(key) : 4+len(key)+valLen], remainingTTL, false, false // needsSlowPath = false
+}
+
+// GetSlowPath reads data from disk. Used when GetFastPath indicates needsSlowPath.
+// Returns: (found, data, ttl, expired, shouldRewrite)
+func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) {
+	length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key)
+	if status == indices.StatusNotFound {
+		fc.Stats.KeyNotFoundCount++
+		return false, nil, 0, false, false
+	}
+
+	if status == indices.StatusExpired {
+		fc.Stats.KeyExpiredCount++
+		return false, nil, 0, true, false
+	}
+
+	_, currMemId, _ := fc.mm.GetMemtable()
+	shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId)
+
+	// Check memtable again (might have changed since fast path check)
+	mt := fc.mm.GetMemtableById(memId)
+	if mt != nil {
+		// Data is now in memtable, use fast path logic
+		buf, exists := mt.GetBufForRead(int(offset), length)
+		if !exists {
+			panic("memtable exists but buf not found")
+		}
+		return fc.validateAndReturnBuffer(key, buf, length, memId, remainingTTL, shouldReWrite)
+	}
+
+	// Read from disk
+	bufPtr := BufPool.Get().(*[]byte)
+	buf := *bufPtr
+	defer BufPool.Put(bufPtr)
+	fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset)
+	n := fc.readFromDisk(int64(fileOffset), length, buf)
+	if n != int(length) {
+		fc.Stats.BadLengthCount++
+		return false, nil, 0, false, shouldReWrite
+	}
+
+	return fc.validateAndReturnBuffer(key, buf, length, memId, remainingTTL, shouldReWrite)
+}
+
+// validateAndReturnBuffer validates CRC and key, then returns the value
+func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uint16, memId uint32, remainingTTL uint16, shouldReWrite bool) (bool, []byte, uint16, bool, bool) {
+	gotCR32 := indices.ByteOrder.Uint32(buf[0:4])
+	computedCR32 := crc32.ChecksumIEEE(buf[4:])
+	if gotCR32 != computedCR32 {
+		fc.Stats.BadCR32Count++
+		fc.Stats.BadCRCMemIds[memId]++
+		return false, nil, 0, false, shouldReWrite
+	}
+
+	gotKey := string(buf[4 : 4+len(key)])
+	if gotKey != key {
+		fc.Stats.BadKeyCount++
+		fc.Stats.BadKeyMemIds[memId]++
+		return false, nil, 0, false, shouldReWrite
+	}
+
+	valLen := int(length) - 4 - len(key)
+	return true, buf[4+len(key) : 4+len(key)+valLen], remainingTTL, false, shouldReWrite
+}
+
+func (fc *ShardCache) readFromDisk(fileOffset int64, length uint16, buf []byte) int {
+	alignedStartOffset := (fileOffset / fs.BLOCK_SIZE) * fs.BLOCK_SIZE
+	endndOffset := fileOffset + int64(length)
+	endAlignedOffset := ((endndOffset + fs.BLOCK_SIZE - 1) / fs.BLOCK_SIZE) * fs.BLOCK_SIZE
+	alignedReadSize := endAlignedOffset - alignedStartOffset
+	page := fc.readPageAllocator.Get(int(alignedReadSize))
+	fc.file.Pread(alignedStartOffset, page.Buf)
+	start := int(fileOffset - alignedStartOffset)
+	n := copy(buf, page.Buf[start:start+int(length)])
+	fc.readPageAllocator.Put(page)
+	return n
+}
+
+func (fc *ShardCache) GetRingBufferActiveEntries() int {
+	return fc.keyIndex.GetRB().ActiveEntries()
+}
+
+// batching reads
+func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadResult {
+	gotCR32 := indices.ByteOrder.Uint32(buf[0:4])
+	computedCR32 := crc32.ChecksumIEEE(buf[4:])
+	gotKey := string(buf[4 : 4+len(key)])
+
+	if gotCR32 != computedCR32 {
+		fc.Stats.BadCR32Count++
+		return ReadResult{Found: false, Error: fmt.Errorf("crc mismatch")}
+	}
+	if gotKey != key {
+		fc.Stats.BadKeyCount++
+		return ReadResult{Found: false, Error: fmt.Errorf("key mismatch")}
+	}
+
+	valLen := int(length) - 4 - len(key)
+	value := make([]byte, valLen)
+	copy(value, buf[4+len(key):4+len(key)+valLen])
+
+	return ReadResult{
+		Found: true,
+		Data:  value,
+	}
+}
diff --git a/flashring/main.go b/flashring/main.go
new file mode 100644
index 00000000..66f4cfa9
--- /dev/null
+++ b/flashring/main.go
@@ -0,0 +1,412 @@
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+	"unsafe"
+)
+
+const (
+	// Common page sizes (4KB is most common)
+	PageSize4K  = 4 * 1024
+	PageSize8K  = 8 * 1024
+	PageSize16K = 16 * 1024
+	PageSize64K = 64 * 1024
+
+	// Test data sizes
+	SmallRecord  = 128  // 128 bytes
+	MediumRecord = 1024 // 1KB
+	LargeRecord  = 8192 // 8KB
+)
+
+// PageAlignedBuffer provides page-aligned buffered writing
+type PageAlignedBuffer struct {
+	file       *os.File
+	buffer     []byte
+	bufferSize int
+	writePos   int
+	mu         sync.Mutex
+}
+
+// NewPageAlignedBuffer creates a new page-aligned buffer
+func NewPageAlignedBuffer(filename string, bufferSize int) (*PageAlignedBuffer, error) {
+	file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		return nil, err
+	}
+
+	// Align buffer to page boundary
+	buffer := make([]byte, bufferSize)
+
+	return &PageAlignedBuffer{
+		file:       file,
+		buffer:     buffer,
+		bufferSize: bufferSize,
+		writePos:   0,
+	}, nil
+}
+
+// Write writes data to the buffer, flushing when page size is reached
+func (pab *PageAlignedBuffer) Write(data []byte) error {
+	pab.mu.Lock()
+	defer pab.mu.Unlock()
+
+	dataLen := len(data)
+
+	// If data is larger than buffer, write directly
+	if dataLen > pab.bufferSize {
+		if pab.writePos > 0 {
+			if err := pab.flushUnsafe(); err != nil {
+				return err
+			}
+		}
+		_, err := pab.file.Write(data)
+		return err
+	}
+
+	// If data doesn't fit in current buffer, flush first
+	if pab.writePos+dataLen > pab.bufferSize {
+		if err := pab.flushUnsafe(); err != nil {
+			return err
+		}
+	}
+
+	// Copy data to buffer
+	copy(pab.buffer[pab.writePos:], data)
+	pab.writePos += dataLen
+
+	return nil
+}
+
+// Flush flushes the buffer to disk
+func (pab *PageAlignedBuffer) Flush() error {
+	pab.mu.Lock()
+	defer pab.mu.Unlock()
+	return pab.flushUnsafe()
+}
+
+func (pab *PageAlignedBuffer) flushUnsafe() error {
+	if pab.writePos == 0 {
+		return nil
+	}
+
+	_, err := pab.file.Write(pab.buffer[:pab.writePos])
+	if err != nil {
+		return err
+	}
+
+	pab.writePos = 0
+	return nil
+}
+
+// Sync syncs the file to disk
+func (pab *PageAlignedBuffer) Sync() error {
+	if err := pab.Flush(); err != nil {
+		return err
+	}
+	return pab.file.Sync()
+}
+
+// Close closes the buffer and file
+func (pab *PageAlignedBuffer) Close() error {
+	if err := pab.Flush(); err != nil {
+		return err
+	}
+	return pab.file.Close()
+}
+
+// DirectWriter wraps direct file writing
+type DirectWriter struct {
+	file *os.File
+}
+
+func NewDirectWriter(filename string) (*DirectWriter, error) {
+	file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		return nil, err
+	}
+	return &DirectWriter{file: file}, nil
+}
+
+func (dw *DirectWriter) Write(data []byte) error {
+	_, err := dw.file.Write(data)
+	return err
+}
+
+func (dw *DirectWriter) Sync() error {
+	return dw.file.Sync()
+}
+
+func (dw *DirectWriter) Close() error {
+	return dw.file.Close()
+}
+
+// MemoryMappedWriter uses memory mapping for writing
+type MemoryMappedWriter struct {
+	file     *os.File
+	data     []byte
+	size     int64
+	writePos int64
+	mu       sync.Mutex
+}
+
+func NewMemoryMappedWriter(filename string, size int64) (*MemoryMappedWriter, error) {
+	file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR, 0644)
+	if err != nil {
+		return nil, err
+	}
+
+	// Truncate file to desired size
+	if err := file.Truncate(size); err != nil {
+		file.Close()
+		return nil, err
+	}
+
+	// Memory map the file
+	data, err := syscall.Mmap(int(file.Fd()), 0, int(size), syscall.PROT_WRITE, syscall.MAP_SHARED)
+	if err != nil {
+		file.Close()
+		return nil, err
+	}
+
+	return &MemoryMappedWriter{
+		file:     file,
+		data:     data,
+		size:     size,
+		writePos: 0,
+	}, nil
+}
+
+func (mmw *MemoryMappedWriter) Write(data []byte) error {
+	mmw.mu.Lock()
+	defer mmw.mu.Unlock()
+
+	dataLen := int64(len(data))
+	if mmw.writePos+dataLen > mmw.size {
+		return fmt.Errorf("write would exceed mapped region")
+	}
+
+	copy(mmw.data[mmw.writePos:], data)
+	mmw.writePos += dataLen
+
+	return nil
+}
+
+func (mmw *MemoryMappedWriter) Sync() error {
+	// Use manual msync syscall since syscall.Msync might not be available on all platforms
+	_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(&mmw.data[0])), uintptr(len(mmw.data)), uintptr(syscall.MS_SYNC))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+func (mmw *MemoryMappedWriter) Close() error {
+	if err := syscall.Munmap(mmw.data); err != nil {
+		return err
+	}
+	return mmw.file.Close()
+}
+
+// Benchmark functions
+func benchmarkPageAlignedBuffer(recordSize, numRecords, bufferSize int) time.Duration {
+	filename := fmt.Sprintf("test_page_aligned_%d_%d_%d.log", recordSize, numRecords, bufferSize)
+	defer os.Remove(filename)
+
+	writer, err := NewPageAlignedBuffer(filename, bufferSize)
+	if err != nil {
+		panic(err)
+	}
+	defer writer.Close()
+
+	data := make([]byte, recordSize)
+	for i := 0; i < recordSize; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	start := time.Now()
+
+	for i := 0; i < numRecords; i++ {
+		if err := writer.Write(data); err != nil {
+			panic(err)
+		}
+	}
+
+	if err := writer.Sync(); err != nil {
+		panic(err)
+	}
+
+	return time.Since(start)
+}
+
+func benchmarkDirectWrite(recordSize, numRecords int) time.Duration {
+	filename := fmt.Sprintf("test_direct_%d_%d.log", recordSize, numRecords)
+	defer os.Remove(filename)
+
+	writer, err := NewDirectWriter(filename)
+	if err != nil {
+		panic(err)
+	}
+	defer writer.Close()
+
+	data := make([]byte, recordSize)
+	for i := 0; i < recordSize; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	start := time.Now()
+
+	for i := 0; i < numRecords; i++ {
+		if err := writer.Write(data); err != nil {
+			panic(err)
+		}
+	}
+
+	if err := writer.Sync(); err != nil {
+		panic(err)
+	}
+
+	return time.Since(start)
+}
+
+func benchmarkBufferedWrite(recordSize, numRecords, bufferSize int) time.Duration {
+	filename := fmt.Sprintf("test_buffered_%d_%d_%d.log", recordSize, numRecords, bufferSize)
+	defer os.Remove(filename)
+
+	file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
+	if err != nil {
+		panic(err)
+	}
+	defer file.Close()
+
+	writer := bufio.NewWriterSize(file, bufferSize)
+
+	data := make([]byte, recordSize)
+	for i := 0; i < recordSize; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	start := time.Now()
+
+	for i := 0; i < numRecords; i++ {
+		if _, err := writer.Write(data); err != nil {
+			panic(err)
+		}
+	}
+
+	if err := writer.Flush(); err != nil {
+		panic(err)
+	}
+
+	if err := file.Sync(); err != nil {
+		panic(err)
+	}
+
+	return time.Since(start)
+}
+
+func benchmarkMemoryMapped(recordSize, numRecords int) time.Duration {
+	filename := fmt.Sprintf("test_mmap_%d_%d.log", recordSize, numRecords)
+	defer os.Remove(filename)
+
+	totalSize := int64(recordSize * numRecords)
+	writer, err := NewMemoryMappedWriter(filename, totalSize)
+	if err != nil {
+		panic(err)
+	}
+	defer writer.Close()
+
+	data := make([]byte, recordSize)
+	for i := 0; i < recordSize; i++ {
+		data[i] = byte(i % 256)
+	}
+
+	start := time.Now()
+
+	for i := 0; i < numRecords; i++ {
+		if err := writer.Write(data); err != nil {
+			panic(err)
+		}
+	}
+
+	if err := writer.Sync(); err != nil {
+		panic(err)
+	}
+
+	return time.Since(start)
+}
+
+func printResults(name string, duration time.Duration, recordSize, numRecords int) {
+	totalBytes := int64(recordSize * numRecords)
+	throughputMBps := float64(totalBytes) / duration.Seconds() / (1024 * 1024)
+	recordsPerSec := float64(numRecords) / duration.Seconds()
+
+	fmt.Printf("%-30s: %10s | %8.2f MB/s | %10.0f records/s | %8.2f MB total\n",
+		name, duration.Round(time.Microsecond), throughputMBps, recordsPerSec, float64(totalBytes)/(1024*1024))
+}
+
+func runBenchmarks() {
+	fmt.Println("=== Append-Only File Writing Benchmarks ===")
+	fmt.Printf("Go Version: %s, OS: %s, Arch: %s\n", runtime.Version(), runtime.GOOS, runtime.GOARCH)
+	fmt.Printf("CPUs: %d\n\n", runtime.NumCPU())
+
+	testCases := []struct {
+		recordSize int
+		numRecords int
+		name       string
+	}{
+		{SmallRecord, 100000, "Small Records (128B x 100K)"},
+		{MediumRecord, 50000, "Medium Records (1KB x 50K)"},
+		{LargeRecord, 10000, "Large Records (8KB x 10K)"},
+	}
+
+	bufferSizes := []int{PageSize4K, PageSize8K, PageSize16K, PageSize64K}
+
+	for _, tc := range testCases {
+		fmt.Printf("\n=== %s ===\n", tc.name)
+		fmt.Printf("%-30s: %10s | %8s | %10s | %8s\n", "Method", "Duration", "MB/s", "Records/s", "Total MB")
+		fmt.Println(strings.Repeat("-", 80))
+
+		// Direct write benchmark
+		duration := benchmarkDirectWrite(tc.recordSize, tc.numRecords)
+		printResults("Direct Write", duration, tc.recordSize, tc.numRecords)
+
+		// Buffered write benchmarks with different buffer sizes
+		for _, bufSize := range bufferSizes {
+			duration := benchmarkBufferedWrite(tc.recordSize, tc.numRecords, bufSize)
+			name := fmt.Sprintf("Buffered (%dK)", bufSize/1024)
+			printResults(name, duration, tc.recordSize, tc.numRecords)
+		}
+
+		// Page-aligned buffer benchmarks
+		for _, bufSize := range bufferSizes {
+			duration := benchmarkPageAlignedBuffer(tc.recordSize, tc.numRecords, bufSize)
+			name := fmt.Sprintf("Page-Aligned (%dK)", bufSize/1024)
+			printResults(name, duration, tc.recordSize, tc.numRecords)
+		}
+
+		// Memory-mapped benchmark (if total size is reasonable)
+		totalSize := int64(tc.recordSize * tc.numRecords)
+		if totalSize < 1024*1024*1024 { // Less than 1GB
+			duration := benchmarkMemoryMapped(tc.recordSize, tc.numRecords)
+			printResults("Memory Mapped", duration, tc.recordSize, tc.numRecords)
+		}
+	}
+
+	fmt.Println("\n=== Recommendations ===")
+	fmt.Println("1. For high-throughput workloads: Use page-aligned buffers with 16KB-64KB buffer sizes")
+	fmt.Println("2. For low-latency workloads: Use smaller buffers (4KB-8KB) with frequent flushing")
+	fmt.Println("3. For large sequential writes: Consider memory-mapped files")
+	fmt.Println("4. Always align buffer sizes to page boundaries for optimal performance")
+	fmt.Println("5. Use fdatasync instead of fsync when metadata updates aren't critical")
+}
+
+func main() {
+	runBenchmarks()
+}
diff --git a/flashring/pkg/async/context.go b/flashring/pkg/async/context.go
new file mode 100644
index 00000000..0c01bd35
--- /dev/null
+++ b/flashring/pkg/async/context.go
@@ -0,0 +1 @@
+package async
diff --git a/flashring/pkg/hierbitmap/map.go b/flashring/pkg/hierbitmap/map.go
new file mode 100644
index 00000000..18b2b180
--- /dev/null
+++ b/flashring/pkg/hierbitmap/map.go
@@ -0,0 +1,23 @@
+package hierbitmap
+
+type Bitmap64 [64]uint64
+
+type Level3 struct {
+	Leafs [64]interface{}
+	Sum   Bitmap64
+}
+
+type Level2 struct {
+	Nodes [64]Level3
+	Sum   Bitmap64
+}
+
+type Level1 struct {
+	Nodes [64]Level2
+	Sum   Bitmap64
+}
+
+type Level0 struct {
+	Level1 [64]Level1
+	Sum    Bitmap64
+}
diff --git a/flashring/pkg/ycsb/README.md b/flashring/pkg/ycsb/README.md
new file mode 100644
index 00000000..a31d76e9
--- /dev/null
+++ b/flashring/pkg/ycsb/README.md
@@ -0,0 +1,178 @@
+# YCSB Adapter for LRU Cache
+
+This package provides a Yahoo! Cloud Serving Benchmark (YCSB) adapter for the LRU cache implementation, enabling standardized performance testing and comparison with other storage systems.
+
+## Overview
+
+The YCSB adapter implements standard YCSB workloads for our LRU cache:
+
+- **Workload A**: Read/Update heavy (50%/50%) - Update heavy workload
+- **Workload B**: Read heavy (95%/5%) - Read mostly workload  
+- **Workload C**: Read only (100%) - Read only workload
+- **Workload D**: Read latest (95%/5%) - Read latest workload
+- **Workload F**: Read-modify-write (50%/50%) - Transaction workload
+
+## Features
+
+- ✅ Standard YCSB database interface implementation
+- ✅ Configurable cache capacity and eviction threshold
+- ✅ Multiple request distributions (uniform, zipfian, latest)
+- ✅ Comprehensive performance metrics
+- ✅ Cache hit rate tracking
+- ✅ Memory allocation profiling
+
+## Configuration
+
+```go
+config := YCSBConfig{
+    Capacity:          1000000, // 1M cache capacity
+    EvictionThreshold: 0.7,     // 70% eviction threshold
+    SlabSizes:         []int{64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
+}
+
+db, err := NewLRUCacheDB(config)
+```
+
+## Usage Examples
+
+### Basic Usage
+
+```go
+// Create database with default configuration
+db, err := NewLRUCacheDBWithDefaults()
+if err != nil {
+    log.Fatal(err)
+}
+
+// Insert a record
+ctx := context.Background()
+values := map[string][]byte{
+    "field0": []byte("test data"),
+}
+err = db.Insert(ctx, "table", "key1", values)
+
+// Read a record
+result, err := db.Read(ctx, "table", "key1", []string{"field0"})
+if err != nil {
+    log.Printf("Record not found: %v", err)
+} else {
+    fmt.Printf("Value: %s\n", result["field0"])
+}
+
+// Update a record
+err = db.Update(ctx, "table", "key1", values)
+
+// Get cache statistics
+stats := db.GetStats()
+fmt.Printf("Hit rate: %.2f%%\n", 
+    float64(stats.HitCount)/float64(stats.HitCount+stats.MissCount)*100)
+```
+
+## Running Benchmarks
+
+### All YCSB Workloads
+```bash
+cd ssd-cache
+go test -bench=BenchmarkYCSB_AllWorkloads -benchtime=1x -v ./pkg/ycsb/
+```
+
+### Individual Workloads
+```bash
+# Test read/update heavy workload
+go test -bench=BenchmarkYCSB_WorkloadA -benchtime=1x -v ./pkg/ycsb/
+
+# Test read-heavy workload  
+go test -bench=BenchmarkYCSB_WorkloadB -benchtime=1x -v ./pkg/ycsb/
+
+# Test read-only workload
+go test -bench=BenchmarkYCSB_WorkloadC -benchtime=1x -v ./pkg/ycsb/
+```
+
+### Custom Benchmark Parameters
+
+The benchmarks use these default parameters:
+- **Load Phase**: 1M records inserted
+- **Run Phase**: 500K operations executed
+- **Cache Capacity**: 500K (creating memory pressure)
+- **Record Size**: 1KB (100 bytes × 10 fields)
+
+## Benchmark Output
+
+Example output includes comprehensive metrics:
+
+```
+=== YCSB WorkloadA Benchmark Results ===
+Description: Read/Update heavy (50%/50%) - Update heavy workload
+
+--- Performance Metrics ---
+Load Throughput: 285,432.50 ops/sec
+Run Throughput: 892,145.23 ops/sec
+Average Latency: 1,120.45 ns/op
+
+--- Cache Statistics ---
+Cache Hit Rate: 78.45% (392,250/500,000)
+Final Cache Size: 350,000
+Eviction Events: 12
+Total Items Evicted: 840,000
+
+--- Memory Metrics ---
+Allocations per Operation: 3.24
+Bytes per Operation: 156.78
+```
+
+## Request Distributions
+
+### Uniform Distribution
+All keys have equal probability of being accessed.
+
+### Zipfian Distribution  
+Follows the 80/20 rule - 80% of requests target 20% of keys (hot data).
+
+### Latest Distribution
+Favors recently inserted keys (temporal locality).
+
+## Limitations
+
+- **Scan Operations**: Not supported (LRU cache doesn't maintain key ordering)
+- **Delete Operations**: Not explicitly supported (relies on LRU eviction)
+- **Range Queries**: Not applicable to key-value cache
+
+## Integration with go-ycsb
+
+To integrate with the official [go-ycsb](https://github.com/pingcap/go-ycsb) project:
+
+1. Register the database adapter:
+```go
+func init() {
+    RegisterDB("lru", func() DB { 
+        db, _ := NewLRUCacheDBWithDefaults()
+        return db 
+    })
+}
+```
+
+2. Use with go-ycsb CLI:
+```bash
+./go-ycsb load lru -P workloads/workloada
+./go-ycsb run lru -P workloads/workloada
+```
+
+## Performance Characteristics
+
+The LRU cache adapter demonstrates:
+
+- **High Throughput**: 500K+ ops/sec for mixed workloads
+- **Low Latency**: Sub-microsecond average latency
+- **Predictable Eviction**: LRU policy ensures consistent behavior
+- **Memory Efficiency**: Slab allocation reduces fragmentation
+
+## Comparison with Other Systems
+
+YCSB results can be directly compared with other storage systems tested using the same workloads, providing standardized performance benchmarks for:
+
+- **Redis/Memcached**: In-memory key-value stores
+- **RocksDB/LevelDB**: Persistent key-value stores  
+- **Cassandra/ScyllaDB**: Distributed databases
+- **MySQL/PostgreSQL**: Relational databases
+
+This enables objective performance comparisons and helps identify the LRU cache's optimal use cases. 
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/BUILD.bazel b/flashring/pkg/ycsb/bazel_workspace/BUILD.bazel
new file mode 100644
index 00000000..b54e6c91
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/BUILD.bazel
@@ -0,0 +1,8 @@
+cc_binary(
+  name = "hello_world",
+  srcs = ["hello_world.cc"],
+  deps = [
+    "@abseil-cpp//absl/container:flat_hash_map",
+    "@abseil-cpp//absl/strings",
+  ],
+)
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel b/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel
new file mode 100644
index 00000000..74a4cb57
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel
@@ -0,0 +1,5 @@
+# MODULE.bazel
+
+# Choose the most recent version available at
+# https://registry.bazel.build/modules/abseil-cpp.
+bazel_dep(name = "abseil-cpp", version = "20240116.0")
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel.lock b/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel.lock
new file mode 100644
index 00000000..e44f7cdc
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/MODULE.bazel.lock
@@ -0,0 +1,205 @@
+{
+  "lockFileVersion": 18,
+  "registryFileHashes": {
+    "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497",
+    "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2",
+    "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230125.1/MODULE.bazel": "89047429cb0207707b2dface14ba7f8df85273d484c2572755be4bab7ce9c3a0",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0.bcr.1/MODULE.bazel": "1c8cec495288dccd14fdae6e3f95f772c1c91857047a098fad772034264cc8cb",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.0/MODULE.bazel": "d253ae36a8bd9ee3c5955384096ccb6baf16a1b1e93e858370da0a3b94f77c16",
+    "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.0/MODULE.bazel": "98dc378d64c12a4e4741ad3362f87fb737ee6a0886b2d90c3cdbb4d93ea3e0bf",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed",
+    "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/source.json": "9be551b8d4e3ef76875c0d744b5d6a504a27e3ae67bc6b28f46415fd2d2957da",
+    "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd",
+    "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8",
+    "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.17.0/MODULE.bazel": "039de32d21b816b47bd42c778e0454217e9c9caac4a3cf8e15c7231ee3ddee4d",
+    "https://bcr.bazel.build/modules/bazel_features/1.18.0/MODULE.bazel": "1be0ae2557ab3a72a57aeb31b29be347bcdc5d2b1eb1e70f39e3851a7e97041a",
+    "https://bcr.bazel.build/modules/bazel_features/1.19.0/MODULE.bazel": "59adcdf28230d220f0067b1f435b8537dd033bfff8db21335ef9217919c7fb58",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/MODULE.bazel": "a14b62d05969a293b80257e72e597c2da7f717e1e69fa8b339703ed6731bec87",
+    "https://bcr.bazel.build/modules/bazel_features/1.30.0/source.json": "b07e17f067fe4f69f90b03b36ef1e08fe0d1f3cac254c1241a1818773e3423bc",
+    "https://bcr.bazel.build/modules/bazel_features/1.4.1/MODULE.bazel": "e45b6bb2350aff3e442ae1111c555e27eac1d915e77775f6fdc4b351b758b5d7",
+    "https://bcr.bazel.build/modules/bazel_features/1.9.1/MODULE.bazel": "8f679097876a9b609ad1f60249c49d68bfab783dd9be012faf9d82547b14815a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.1.1/MODULE.bazel": "1add3e7d93ff2e6998f9e118022c84d163917d912f5afafb3058e3d2f1545b5e",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.0/MODULE.bazel": "44fe84260e454ed94ad326352a698422dbe372b21a1ac9f3eab76eb531223686",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.1/MODULE.bazel": "a0dcb779424be33100dcae821e9e27e4f2901d9dfd5333efe5ac6a8d7ab75e1d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.4.2/MODULE.bazel": "3bd40978e7a1fac911d5989e6b09d8f64921865a45822d8b09e815eaa726a651",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.5.0/MODULE.bazel": "32880f5e2945ce6a03d1fbd588e9198c0a959bb42297b2cfaf1685b7bc32e138",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b",
+    "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84",
+    "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8",
+    "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb",
+    "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4",
+    "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6",
+    "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/source.json": "41e9e129f80d8c8bf103a7acc337b76e54fad1214ac0a7084bf24f4cd924b8b4",
+    "https://bcr.bazel.build/modules/googletest/1.14.0/MODULE.bazel": "cfbcbf3e6eac06ef9d85900f64424708cc08687d1b527f0ef65aa7517af8118f",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.5/MODULE.bazel": "31271aedc59e815656f5736f282bb7509a97c7ecb43e927ac1a37966e0578075",
+    "https://bcr.bazel.build/modules/jsoncpp/1.9.5/source.json": "4108ee5085dd2885a341c7fab149429db457b3169b86eb081fa245eadf69169d",
+    "https://bcr.bazel.build/modules/libpfm/4.11.0/MODULE.bazel": "45061ff025b301940f1e30d2c16bea596c25b176c8b6b3087e92615adbd52902",
+    "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5",
+    "https://bcr.bazel.build/modules/platforms/0.0.11/MODULE.bazel": "0daefc49732e227caa8bfa834d65dc52e8cc18a2faf80df25e8caea151a9413f",
+    "https://bcr.bazel.build/modules/platforms/0.0.11/source.json": "f7e188b79ebedebfe75e9e1d098b8845226c7992b307e28e1496f23112e8fc29",
+    "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee",
+    "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37",
+    "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615",
+    "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814",
+    "https://bcr.bazel.build/modules/platforms/0.0.8/MODULE.bazel": "9f142c03e348f6d263719f5074b21ef3adf0b139ee4c5133e2aa35664da9eb2d",
+    "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7",
+    "https://bcr.bazel.build/modules/protobuf/27.0/MODULE.bazel": "7873b60be88844a0a1d8f80b9d5d20cfbd8495a689b8763e76c6372998d3f64c",
+    "https://bcr.bazel.build/modules/protobuf/27.1/MODULE.bazel": "703a7b614728bb06647f965264967a8ef1c39e09e8f167b3ca0bb1fd80449c0d",
+    "https://bcr.bazel.build/modules/protobuf/29.0-rc2/MODULE.bazel": "6241d35983510143049943fc0d57937937122baf1b287862f9dc8590fc4c37df",
+    "https://bcr.bazel.build/modules/protobuf/29.0/MODULE.bazel": "319dc8bf4c679ff87e71b1ccfb5a6e90a6dbc4693501d471f48662ac46d04e4e",
+    "https://bcr.bazel.build/modules/protobuf/29.0/source.json": "b857f93c796750eef95f0d61ee378f3420d00ee1dd38627b27193aa482f4f981",
+    "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/MODULE.bazel": "88af1c246226d87e65be78ed49ecd1e6f5e98648558c14ce99176da041dc378e",
+    "https://bcr.bazel.build/modules/pybind11_bazel/2.11.1/source.json": "be4789e951dd5301282729fe3d4938995dc4c1a81c2ff150afc9f1b0504c6022",
+    "https://bcr.bazel.build/modules/re2/2023-09-01/MODULE.bazel": "cb3d511531b16cfc78a225a9e2136007a48cf8a677e4264baeab57fe78a80206",
+    "https://bcr.bazel.build/modules/re2/2023-09-01/source.json": "e044ce89c2883cd957a2969a43e79f7752f9656f6b20050b62f90ede21ec6eb4",
+    "https://bcr.bazel.build/modules/rules_android/0.1.1/MODULE.bazel": "48809ab0091b07ad0182defb787c4c5328bd3a278938415c00a7b69b50c4d3a8",
+    "https://bcr.bazel.build/modules/rules_android/0.1.1/source.json": "e6986b41626ee10bdc864937ffb6d6bf275bb5b9c65120e6137d56e6331f089e",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.10/MODULE.bazel": "ec1705118f7eaedd6e118508d3d26deba2a4e76476ada7e0e3965211be012002",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.13/MODULE.bazel": "0e8529ed7b323dad0775ff924d2ae5af7640b23553dfcd4d34344c7e7a867191",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.14/MODULE.bazel": "5e343a3aac88b8d7af3b1b6d2093b55c347b8eefc2e7d1442f7a02dc8fea48ac",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.15/MODULE.bazel": "6704c35f7b4a72502ee81f61bf88706b54f06b3cbe5558ac17e2e14666cd5dcc",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.16/MODULE.bazel": "7661303b8fc1b4d7f532e54e9d6565771fea666fbdf839e0a86affcd02defe87",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.6/MODULE.bazel": "abf360251023dfe3efcef65ab9d56beefa8394d4176dd29529750e1c57eaa33f",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e",
+    "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5",
+    "https://bcr.bazel.build/modules/rules_cc/0.1.1/MODULE.bazel": "2f0222a6f229f0bf44cd711dc13c858dad98c62d52bd51d8fc3a764a83125513",
+    "https://bcr.bazel.build/modules/rules_cc/0.1.1/source.json": "d61627377bd7dd1da4652063e368d9366fc9a73920bfa396798ad92172cf645c",
+    "https://bcr.bazel.build/modules/rules_foreign_cc/0.9.0/MODULE.bazel": "c9e8c682bf75b0e7c704166d79b599f93b72cfca5ad7477df596947891feeef6",
+    "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/MODULE.bazel": "40c97d1144356f52905566c55811f13b299453a14ac7769dfba2ac38192337a8",
+    "https://bcr.bazel.build/modules/rules_fuzzing/0.5.2/source.json": "c8b1e2c717646f1702290959a3302a178fb639d987ab61d548105019f11e527e",
+    "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74",
+    "https://bcr.bazel.build/modules/rules_java/5.3.5/MODULE.bazel": "a4ec4f2db570171e3e5eb753276ee4b389bae16b96207e9d3230895c99644b86",
+    "https://bcr.bazel.build/modules/rules_java/6.0.0/MODULE.bazel": "8a43b7df601a7ec1af61d79345c17b31ea1fedc6711fd4abfd013ea612978e39",
+    "https://bcr.bazel.build/modules/rules_java/6.4.0/MODULE.bazel": "e986a9fe25aeaa84ac17ca093ef13a4637f6107375f64667a15999f77db6c8f6",
+    "https://bcr.bazel.build/modules/rules_java/6.5.2/MODULE.bazel": "1d440d262d0e08453fa0c4d8f699ba81609ed0e9a9a0f02cd10b3e7942e61e31",
+    "https://bcr.bazel.build/modules/rules_java/7.10.0/MODULE.bazel": "530c3beb3067e870561739f1144329a21c851ff771cd752a49e06e3dc9c2e71a",
+    "https://bcr.bazel.build/modules/rules_java/7.12.2/MODULE.bazel": "579c505165ee757a4280ef83cda0150eea193eed3bef50b1004ba88b99da6de6",
+    "https://bcr.bazel.build/modules/rules_java/7.2.0/MODULE.bazel": "06c0334c9be61e6cef2c8c84a7800cef502063269a5af25ceb100b192453d4ab",
+    "https://bcr.bazel.build/modules/rules_java/7.3.2/MODULE.bazel": "50dece891cfdf1741ea230d001aa9c14398062f2b7c066470accace78e412bc2",
+    "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe",
+    "https://bcr.bazel.build/modules/rules_java/8.12.0/MODULE.bazel": "8e6590b961f2defdfc2811c089c75716cb2f06c8a4edeb9a8d85eaa64ee2a761",
+    "https://bcr.bazel.build/modules/rules_java/8.12.0/source.json": "cbd5d55d9d38d4008a7d00bee5b5a5a4b6031fcd4a56515c9accbcd42c7be2ba",
+    "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.1/MODULE.bazel": "33f6f999e03183f7d088c9be518a63467dfd0be94a11d0055fe2d210f89aa909",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.2/MODULE.bazel": "d9351ba35217ad0de03816ef3ed63f89d411349353077348a45348b096615036",
+    "https://bcr.bazel.build/modules/rules_jvm_external/5.3/MODULE.bazel": "bf93870767689637164657731849fb887ad086739bd5d360d90007a581d5527d",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.1/MODULE.bazel": "75b5fec090dbd46cf9b7d8ea08cf84a0472d92ba3585b476f44c326eda8059c4",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.3/MODULE.bazel": "c998e060b85f71e00de5ec552019347c8bca255062c990ac02d051bb80a38df0",
+    "https://bcr.bazel.build/modules/rules_jvm_external/6.3/source.json": "6f5f5a5a4419ae4e37c35a5bb0a6ae657ed40b7abc5a5189111b47fcebe43197",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.0/MODULE.bazel": "ef85697305025e5a61f395d4eaede272a5393cee479ace6686dba707de804d59",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/MODULE.bazel": "d269a01a18ee74d0335450b10f62c9ed81f2321d7958a2934e44272fe82dcef3",
+    "https://bcr.bazel.build/modules/rules_kotlin/1.9.6/source.json": "2faa4794364282db7c06600b7e5e34867a564ae91bda7cae7c29c64e9466b7d5",
+    "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0",
+    "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/MODULE.bazel": "a7fda60eefdf3d8c827262ba499957e4df06f659330bbe6cdbdb975b768bb65c",
+    "https://bcr.bazel.build/modules/rules_license/1.0.0/source.json": "a52c89e54cc311196e478f8382df91c15f7a2bfdf4c6cd0e2675cc2ff0b56efb",
+    "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/MODULE.bazel": "5b1df97dbc29623bccdf2b0dcd0f5cb08e2f2c9050aab1092fd39a41e82686ff",
+    "https://bcr.bazel.build/modules/rules_pkg/1.0.1/source.json": "bd82e5d7b9ce2d31e380dd9f50c111d678c3bdaca190cb76b0e1c71b05e1ba8a",
+    "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06",
+    "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7",
+    "https://bcr.bazel.build/modules/rules_proto/6.0.2/MODULE.bazel": "ce916b775a62b90b61888052a416ccdda405212b6aaeb39522f7dc53431a5e73",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/MODULE.bazel": "bf81793bd6d2ad89a37a40693e56c61b0ee30f7a7fdbaf3eabbf5f39de47dea2",
+    "https://bcr.bazel.build/modules/rules_proto/7.0.2/source.json": "1e5e7260ae32ef4f2b52fd1d0de8d03b606a44c91b694d2f1afb1d3b28a48ce1",
+    "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f",
+    "https://bcr.bazel.build/modules/rules_python/0.23.1/MODULE.bazel": "49ffccf0511cb8414de28321f5fcf2a31312b47c40cc21577144b7447f2bf300",
+    "https://bcr.bazel.build/modules/rules_python/0.25.0/MODULE.bazel": "72f1506841c920a1afec76975b35312410eea3aa7b63267436bfb1dd91d2d382",
+    "https://bcr.bazel.build/modules/rules_python/0.28.0/MODULE.bazel": "cba2573d870babc976664a912539b320cbaa7114cd3e8f053c720171cde331ed",
+    "https://bcr.bazel.build/modules/rules_python/0.31.0/MODULE.bazel": "93a43dc47ee570e6ec9f5779b2e64c1476a6ce921c48cc9a1678a91dd5f8fd58",
+    "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c",
+    "https://bcr.bazel.build/modules/rules_python/0.40.0/MODULE.bazel": "9d1a3cd88ed7d8e39583d9ffe56ae8a244f67783ae89b60caafc9f5cf318ada7",
+    "https://bcr.bazel.build/modules/rules_python/0.40.0/source.json": "939d4bd2e3110f27bfb360292986bb79fd8dcefb874358ccd6cdaa7bda029320",
+    "https://bcr.bazel.build/modules/rules_shell/0.2.0/MODULE.bazel": "fda8a652ab3c7d8fee214de05e7a9916d8b28082234e8d2c0094505c5268ed3c",
+    "https://bcr.bazel.build/modules/rules_shell/0.2.0/source.json": "7f27af3c28037d9701487c4744b5448d26537cc66cdef0d8df7ae85411f8de95",
+    "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8",
+    "https://bcr.bazel.build/modules/stardoc/0.5.3/MODULE.bazel": "c7f6948dae6999bf0db32c1858ae345f112cacf98f174c7a8bb707e41b974f1c",
+    "https://bcr.bazel.build/modules/stardoc/0.5.6/MODULE.bazel": "c43dabc564990eeab55e25ed61c07a1aadafe9ece96a4efabb3f8bf9063b71ef",
+    "https://bcr.bazel.build/modules/stardoc/0.7.0/MODULE.bazel": "05e3d6d30c099b6770e97da986c53bd31844d7f13d41412480ea265ac9e8079c",
+    "https://bcr.bazel.build/modules/stardoc/0.7.1/MODULE.bazel": "3548faea4ee5dda5580f9af150e79d0f6aea934fc60c1cc50f4efdd9420759e7",
+    "https://bcr.bazel.build/modules/stardoc/0.7.1/source.json": "b6500ffcd7b48cd72c29bb67bcac781e12701cc0d6d55d266a652583cfcdab01",
+    "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43",
+    "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/MODULE.bazel": "eec517b5bbe5492629466e11dae908d043364302283de25581e3eb944326c4ca",
+    "https://bcr.bazel.build/modules/zlib/1.3.1.bcr.5/source.json": "22bc55c47af97246cfc093d0acf683a7869377de362b5d1c552c2c2e16b7a806",
+    "https://bcr.bazel.build/modules/zlib/1.3.1/MODULE.bazel": "751c9940dcfe869f5f7274e1295422a34623555916eb98c174c1e945594bf198"
+  },
+  "selectedYankedVersions": {},
+  "moduleExtensions": {
+    "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": {
+      "general": {
+        "bzlTransitiveDigest": "hUTp2w+RUVdL7ma5esCXZJAFnX7vLbVfLd7FwnQI6bU=",
+        "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=",
+        "recordedFileInputs": {},
+        "recordedDirentsInputs": {},
+        "envVariables": {},
+        "generatedRepoSpecs": {
+          "com_github_jetbrains_kotlin_git": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_compiler_git_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/JetBrains/kotlin/releases/download/v1.9.23/kotlin-compiler-1.9.23.zip"
+              ],
+              "sha256": "93137d3aab9afa9b27cb06a824c2324195c6b6f6179d8a8653f440f5bd58be88"
+            }
+          },
+          "com_github_jetbrains_kotlin": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:compiler.bzl%kotlin_capabilities_repository",
+            "attributes": {
+              "git_repository_name": "com_github_jetbrains_kotlin_git",
+              "compiler_version": "1.9.23"
+            }
+          },
+          "com_github_google_ksp": {
+            "repoRuleId": "@@rules_kotlin+//src/main/starlark/core/repositories:ksp.bzl%ksp_compiler_plugin_repository",
+            "attributes": {
+              "urls": [
+                "https://github.com/google/ksp/releases/download/1.9.23-1.0.20/artifacts.zip"
+              ],
+              "sha256": "ee0618755913ef7fd6511288a232e8fad24838b9af6ea73972a76e81053c8c2d",
+              "strip_version": "1.9.23-1.0.20"
+            }
+          },
+          "com_github_pinterest_ktlint": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_file",
+            "attributes": {
+              "sha256": "01b2e0ef893383a50dbeb13970fe7fa3be36ca3e83259e01649945b09d736985",
+              "urls": [
+                "https://github.com/pinterest/ktlint/releases/download/1.3.0/ktlint"
+              ],
+              "executable": true
+            }
+          },
+          "rules_android": {
+            "repoRuleId": "@@bazel_tools//tools/build_defs/repo:http.bzl%http_archive",
+            "attributes": {
+              "sha256": "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806",
+              "strip_prefix": "rules_android-0.1.1",
+              "urls": [
+                "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip"
+              ]
+            }
+          }
+        },
+        "recordedRepoMappingEntries": [
+          [
+            "rules_kotlin+",
+            "bazel_tools",
+            "bazel_tools"
+          ]
+        ]
+      }
+    }
+  }
+}
diff --git a/flashring/pkg/ycsb/bazel_workspace/bazel-bazel_workspace b/flashring/pkg/ycsb/bazel_workspace/bazel-bazel_workspace
new file mode 120000
index 00000000..27644f1c
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/bazel-bazel_workspace
@@ -0,0 +1 @@
+/home/a0d00kc/.cache/bazel/_bazel_a0d00kc/ea88c144588668cbf32ba2f0c98bda83/execroot/_main
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/bazel-bin b/flashring/pkg/ycsb/bazel_workspace/bazel-bin
new file mode 120000
index 00000000..ad7980a0
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/bazel-bin
@@ -0,0 +1 @@
+/home/a0d00kc/.cache/bazel/_bazel_a0d00kc/ea88c144588668cbf32ba2f0c98bda83/execroot/_main/bazel-out/k8-fastbuild/bin
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/bazel-out b/flashring/pkg/ycsb/bazel_workspace/bazel-out
new file mode 120000
index 00000000..550ba267
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/bazel-out
@@ -0,0 +1 @@
+/home/a0d00kc/.cache/bazel/_bazel_a0d00kc/ea88c144588668cbf32ba2f0c98bda83/execroot/_main/bazel-out
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/bazel-testlogs b/flashring/pkg/ycsb/bazel_workspace/bazel-testlogs
new file mode 120000
index 00000000..3af07959
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/bazel-testlogs
@@ -0,0 +1 @@
+/home/a0d00kc/.cache/bazel/_bazel_a0d00kc/ea88c144588668cbf32ba2f0c98bda83/execroot/_main/bazel-out/k8-fastbuild/testlogs
\ No newline at end of file
diff --git a/flashring/pkg/ycsb/bazel_workspace/hello_world.cc b/flashring/pkg/ycsb/bazel_workspace/hello_world.cc
new file mode 100644
index 00000000..28e07e08
--- /dev/null
+++ b/flashring/pkg/ycsb/bazel_workspace/hello_world.cc
@@ -0,0 +1,56 @@
+#include <iostream>
+#include <string>
+#include <chrono>
+#include <random>
+
+#include "absl/container/flat_hash_map.h"
+
+constexpr int kNumElements = 1'000'000;
+
+int main() {
+  absl::flat_hash_map<int, int> map;
+  map.reserve(kNumElements);
+
+
+  // Random number generator
+  std::mt19937 rng(42);
+  std::uniform_int_distribution<int> dist(1, kNumElements * 10);
+
+  std::vector<int> keys;
+  keys.reserve(kNumElements);
+  for (int i = 0; i < kNumElements; ++i) {
+    keys.push_back(dist(rng));
+  }
+
+  // Insertion benchmark
+  auto start_insert = std::chrono::high_resolution_clock::now();
+  for (int i = 0; i < kNumElements; ++i) {
+    map[keys[i]] = i;
+  }
+  auto end_insert = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> insert_duration = end_insert - start_insert;
+  std::cout << "Insertion of " << kNumElements << " items took: " << insert_duration.count() << " seconds\n";
+
+  // Lookup benchmark
+  auto start_lookup = std::chrono::high_resolution_clock::now();
+  size_t found = 0;
+  for (int i = 0; i < kNumElements; ++i) {
+    if (map.find(keys[i]) != map.end()) {
+      ++found;
+    }
+  }
+  auto end_lookup = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> lookup_duration = end_lookup - start_lookup;
+  std::cout << "Lookup of " << kNumElements << " items took: " << lookup_duration.count() << " seconds. Found: " << found << "\n";
+
+  // Optional: Deletion benchmark
+  auto start_erase = std::chrono::high_resolution_clock::now();
+  for (int i = 0; i < kNumElements; ++i) {
+    map.erase(keys[i]);
+  }
+  auto end_erase = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> erase_duration = end_erase - start_erase;
+  std::cout << "Deletion of " << kNumElements << " items took: " << erase_duration.count() << " seconds\n";
+
+  return 0;
+}
diff --git a/flashring/pkg/ycsb/simdmap/match16_avx2_amd64.s b/flashring/pkg/ycsb/simdmap/match16_avx2_amd64.s
new file mode 100644
index 00000000..ede44804
--- /dev/null
+++ b/flashring/pkg/ycsb/simdmap/match16_avx2_amd64.s
@@ -0,0 +1,23 @@
+//go:build amd64 && avx2
+// +build amd64,avx2
+
+#include "textflag.h"
+
+// func match16_simd(ctrl *byte, h2 byte) uint16
+TEXT ·match16_simd(SB),NOSPLIT,$0-0
+    // DI = &ctrl[0];    SIL = h2 (byte parameter)
+
+    // Load 16 control bytes into Y0
+    VMOVDQU (DI), Y0
+
+    // Broadcast h2 from memory operand directly into Y1
+    VPBROADCASTB h2+8(FP), Y1
+
+    // Compare Y0 bytes with broadcasted h2
+    VPCMPEQB Y1, Y0, Y2
+
+    // Extract the MSBs of comparison result into AX as 16‑bit mask
+    VPMOVMSKB Y2, AX
+
+    VZEROUPPER
+    RET
diff --git a/flashring/pkg/ycsb/simdmap/match16_switch_avx2.go b/flashring/pkg/ycsb/simdmap/match16_switch_avx2.go
new file mode 100644
index 00000000..ea660045
--- /dev/null
+++ b/flashring/pkg/ycsb/simdmap/match16_switch_avx2.go
@@ -0,0 +1,7 @@
+//go:build amd64 && avx2
+// +build amd64,avx2
+
+package simdmap
+
+// Link‑time swap to SIMD fast‑path.
+func init() { match16 = match16_simd }
diff --git a/flashring/pkg/ycsb/simdmap/simdmap.go b/flashring/pkg/ycsb/simdmap/simdmap.go
new file mode 100644
index 00000000..6c53d120
--- /dev/null
+++ b/flashring/pkg/ycsb/simdmap/simdmap.go
@@ -0,0 +1,377 @@
+// // SPDX‑License‑Identifier: Apache‑2.0
+// // Package simdmap is a Swiss‑table open‑addressing hash map with an
+// // optional AVX2‑vectorised probe loop for amd64.  When the build tag
+// // `avx2` is *not* supplied or the CPU lacks AVX2, the implementation
+// // falls back to a tight scalar probe, keeping the package portable.
+// //
+// // Build (Go 1.22+):
+// //
+// //	$ go test -tags avx2 ./...    # AVX2 fast‑path on Intel/AMD ≥ Haswell/Zen1
+// //	$ go test          ./...     # scalar path (any GOARCH)
+// //
+// // The key type is fixed to uint64 (a 64‑bit fingerprint like xxhash).
+// // You will typically store metadata such as {Off uint64; Len uint32} as V.
+// package simdmap
+
+// import (
+// 	"math/bits"
+// 	"unsafe"
+// )
+
+// // --------------------------------------------------------------------- //
+// // Constants and tiny helpers
+// // --------------------------------------------------------------------- //
+
+// const (
+// 	groupSize  = 16 // 16 control bytes per Swiss group
+// 	ctrlEmpty  = 0x80
+// 	ctrlTomb   = 0xfe
+// 	loadFactor = 7 // 7/8 = 87.5 %
+// )
+
+// type entry[V any] struct {
+// 	hash uint64
+// 	val  V
+// }
+
+// type Map[V any] struct {
+// 	mask   uintptr
+// 	ctrl   []byte
+// 	slots  []entry[V]
+// 	size   uintptr
+// 	growth uintptr
+// }
+
+// // roundUpToGroups returns next power‑of‑two group count ≥ x.
+// func roundUpToGroups(x uintptr) uintptr {
+// 	if x < groupSize {
+// 		x = groupSize
+// 	}
+// 	return uintptr(1) << bits.Len(uint(x-1))
+// }
+
+// func New[V any](capacity int) *Map[V] {
+// 	groups := roundUpToGroups(uintptr(capacity))
+// 	n := groups * groupSize
+
+// 	m := &Map[V]{
+// 		mask:   n - 1,
+// 		ctrl:   make([]byte, n+groupSize), // sentinel group
+// 		slots:  make([]entry[V], n),
+// 		growth: (n * loadFactor) / 8,
+// 	}
+// 	for i := range m.ctrl {
+// 		m.ctrl[i] = ctrlEmpty
+// 	}
+// 	return m
+// }
+
+// // --------------------------------------------------------------------- //
+// // SIMD probe helpers
+// // --------------------------------------------------------------------- //
+
+// //go:noescape
+// func match16_simd(ctrl *byte, h2 byte) uint16 // provided in .s when avx2 tag
+
+// func match16_scalar(ctrl *byte, h2 byte) uint16 {
+// 	var m uint16
+// 	b := (*[groupSize]byte)(unsafe.Pointer(ctrl))
+// 	for i := 0; i < groupSize; i++ {
+// 		if b[i] == h2 {
+// 			m |= 1 << uint(i)
+// 		}
+// 	}
+// 	return m
+// }
+
+// // --------------------------------------------------------------------- //
+// // Build‑tag specific swap‑in of the SIMD fast‑path
+// // --------------------------------------------------------------------- //
+
+// var match16 = match16_scalar // overridden when the avx2 build‑tag is used
+
+// // --------------------------------------------------------------------- //
+// // Probe and API
+// // --------------------------------------------------------------------- //
+
+// func (m *Map[V]) findSlot(h uint64) (uintptr, bool) {
+// 	h1 := uintptr(h >> 7)
+// 	h2 := byte(h & 0x7f)
+// 	maskGroups := m.mask & ^uintptr(groupSize-1)
+
+// 	for {
+// 		grp := h1 & maskGroups
+// 		cptr := (*byte)(unsafe.Pointer(&m.ctrl[grp]))
+
+// 		if mask := match16(cptr, h2); mask != 0 {
+// 			for mask != 0 {
+// 				i := bits.TrailingZeros16(mask)
+// 				idx := grp + uintptr(i)
+// 				if m.slots[idx].hash == h {
+// 					return idx, true
+// 				}
+// 				mask &^= 1 << uint(i)
+// 			}
+// 		}
+// 		for i := 0; i < groupSize; i++ {
+// 			if *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(cptr)) + uintptr(i))) >= ctrlEmpty {
+// 				return grp + uintptr(i), false
+// 			}
+// 		}
+// 		h1 += groupSize
+// 	}
+// }
+
+// func (m *Map[V]) Get(hash uint64) (V, bool) {
+// 	var zero V
+// 	idx, ok := m.findSlot(hash)
+// 	if !ok {
+// 		return zero, false
+// 	}
+// 	return m.slots[idx].val, true
+// }
+
+// func (m *Map[V]) putEntry(hash uint64, v V) {
+// 	idx, found := m.findSlot(hash)
+// 	if !found {
+// 		m.size++
+// 	}
+// 	m.ctrl[idx] = byte(hash & 0x7f)
+// 	m.slots[idx] = entry[V]{hash: hash, val: v}
+// }
+
+// func (m *Map[V]) Put(hash uint64, v V) {
+// 	m.putEntry(hash, v)
+// 	if m.size >= m.growth {
+// 		m.rehash()
+// 	}
+// }
+
+// func (m *Map[V]) Delete(hash uint64) bool {
+// 	idx, ok := m.findSlot(hash)
+// 	if !ok {
+// 		return false
+// 	}
+// 	m.ctrl[idx] = ctrlTomb
+// 	m.size--
+// 	return true
+// }
+
+// // --------------------------------------------------------------------- //
+// // Resize
+// // --------------------------------------------------------------------- //
+
+// func (m *Map[V]) rehash() {
+// 	oldCtrl, oldSlots := m.ctrl, m.slots
+// 	newLen := uintptr(len(oldSlots) * 2)
+
+// 	m.ctrl = make([]byte, newLen+groupSize)
+// 	for i := range m.ctrl {
+// 		m.ctrl[i] = ctrlEmpty
+// 	}
+// 	m.slots = make([]entry[V], newLen)
+// 	m.mask = newLen - 1
+// 	m.size = 0
+// 	m.growth = (newLen * loadFactor) / 8
+
+// 	for i, c := range oldCtrl[:len(oldSlots)] {
+// 		if c < ctrlEmpty {
+// 			e := oldSlots[i]
+// 			m.putEntry(e.hash, e.val)
+// 		}
+// 	}
+// }
+
+// SPDX‑License‑Identifier: Apache‑2.0
+// Incremental‑rehash version of simdmap.
+// Only the growth logic has changed; probe loop and SIMD assembly are
+// untouched.  A single `Put` moves at most `migrateStep` live entries
+// from the old table to the new, flattening the latency spike to <5 µs.
+//
+// Build / tags unchanged:
+//
+//	go test -tags avx2 ./...
+package simdmap
+
+import (
+	"math/bits"
+	"unsafe"
+)
+
+const (
+	groupSize   = 16
+	ctrlEmpty   = 0x80
+	ctrlTomb    = 0xfe
+	loadFactor  = 7
+	migrateStep = 128 // live entries moved per mutation (tune!)
+)
+
+type entry[V any] struct {
+	hash uint64
+	val  V
+}
+
+type Map[V any] struct {
+	// active table
+	mask   uintptr
+	ctrl   []byte
+	slots  []entry[V]
+	size   uintptr
+	growth uintptr
+
+	// incremental‑rehash state (nil when not migrating)
+	oldCtrl  []byte
+	oldSlots []entry[V]
+	rehashAt uintptr // next index to migrate
+}
+
+// --- constructor unchanged -------------------------------------------------
+
+func roundUpToGroups(x uintptr) uintptr {
+	if x < groupSize {
+		x = groupSize
+	}
+	return uintptr(1) << bits.Len(uint(x-1))
+}
+
+func New[V any](capHint int) *Map[V] {
+	groups := roundUpToGroups(uintptr(capHint))
+	n := groups * groupSize
+	m := &Map[V]{
+		mask:   n - 1,
+		ctrl:   make([]byte, n+groupSize),
+		slots:  make([]entry[V], n),
+		growth: (n * loadFactor) / 8,
+	}
+	for i := range m.ctrl {
+		m.ctrl[i] = ctrlEmpty
+	}
+	return m
+}
+
+// --- SIMD probe machinery (unchanged) -------------------------------------
+
+//go:noescape
+func match16_simd(*byte, byte) uint16
+
+func match16_scalar(ctrl *byte, h2 byte) uint16 {
+	var m uint16
+	b := (*[groupSize]byte)(unsafe.Pointer(ctrl))
+	for i := 0; i < groupSize; i++ {
+		if b[i] == h2 {
+			m |= 1 << uint(i)
+		}
+	}
+	return m
+}
+
+var match16 = match16_scalar // overridden by build‑tag file
+
+func (m *Map[V]) findSlot(h uint64) (uintptr, bool) {
+	h1 := uintptr(h >> 7)
+	h2 := byte(h & 0x7f)
+	maskGroups := m.mask & ^uintptr(groupSize-1)
+	for {
+		grp := h1 & maskGroups
+		cptr := (*byte)(unsafe.Pointer(&m.ctrl[grp]))
+		if mask := match16(cptr, h2); mask != 0 {
+			for mask != 0 {
+				i := bits.TrailingZeros16(mask)
+				idx := grp + uintptr(i)
+				if m.slots[idx].hash == h {
+					return idx, true
+				}
+				mask &^= 1 << uint(i)
+			}
+		}
+		for i := 0; i < groupSize; i++ {
+			if *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(cptr)) + uintptr(i))) >= ctrlEmpty {
+				return grp + uintptr(i), false
+			}
+		}
+		h1 += groupSize
+	}
+}
+
+// ---------------- incremental migration helpers ---------------------------
+
+func (m *Map[V]) migrateSome() {
+	if m.oldCtrl == nil { // not in rehash
+		return
+	}
+	moved := 0
+	oldLen := uintptr(len(m.oldSlots))
+
+	for moved < migrateStep && m.rehashAt < oldLen {
+		c := m.oldCtrl[m.rehashAt]
+		if c < ctrlEmpty {
+			e := m.oldSlots[m.rehashAt]
+			m.putEntry(e.hash, e.val) // into new table
+			moved++
+		}
+		m.rehashAt++
+	}
+
+	// finished?
+	if m.rehashAt >= oldLen {
+		m.oldCtrl, m.oldSlots = nil, nil
+	}
+}
+
+func (m *Map[V]) startRehash() {
+	if m.oldCtrl != nil {
+		return // already running
+	}
+	m.oldCtrl, m.oldSlots = m.ctrl, m.slots
+
+	newLen := uintptr(len(m.oldSlots) * 2)
+	m.ctrl = make([]byte, newLen+groupSize)
+	for i := range m.ctrl {
+		m.ctrl[i] = ctrlEmpty
+	}
+	m.slots = make([]entry[V], newLen)
+	m.mask = newLen - 1
+	m.size = 0
+	m.growth = (newLen * loadFactor) / 8
+	m.rehashAt = 0
+}
+
+// ---------------- public API (Put/Get/Delete) -----------------------------
+
+func (m *Map[V]) Get(hash uint64) (V, bool) {
+	m.migrateSome()
+	var zero V
+	idx, ok := m.findSlot(hash)
+	if !ok {
+		return zero, false
+	}
+	return m.slots[idx].val, true
+}
+
+func (m *Map[V]) putEntry(hash uint64, v V) {
+	idx, found := m.findSlot(hash)
+	if !found {
+		m.size++
+	}
+	m.ctrl[idx] = byte(hash & 0x7f)
+	m.slots[idx] = entry[V]{hash: hash, val: v}
+}
+
+func (m *Map[V]) Put(hash uint64, v V) {
+	m.migrateSome()
+	m.putEntry(hash, v)
+	if m.size >= m.growth {
+		m.startRehash()
+	}
+}
+
+func (m *Map[V]) Delete(hash uint64) bool {
+	m.migrateSome()
+	idx, ok := m.findSlot(hash)
+	if !ok {
+		return false
+	}
+	m.ctrl[idx] = ctrlTomb
+	m.size--
+	return true
+}
diff --git a/flashring/pkg/ycsb/simdmap/simdmap_test.go b/flashring/pkg/ycsb/simdmap/simdmap_test.go
new file mode 100644
index 00000000..39ab13e6
--- /dev/null
+++ b/flashring/pkg/ycsb/simdmap/simdmap_test.go
@@ -0,0 +1,156 @@
+package simdmap
+
+import (
+	crand "crypto/rand"
+	"encoding/binary"
+	"math/rand"
+	"testing"
+)
+
+func TestPutGet(t *testing.T) {
+	m := New[int](1 << 10)
+
+	// Insert 10 000 random keys.
+	kvs := make([]uint64, 10_000)
+	for i := range kvs {
+		_ = binary.Read(crand.Reader, binary.LittleEndian, &kvs[i])
+		m.Put(kvs[i], int(i))
+	}
+
+	// Verify all keys are present.
+	for i, k := range kvs {
+		v, ok := m.Get(k)
+		if !ok || v != i {
+			t.Fatalf("key %d lost: got (%d,%v)", k, v, ok)
+		}
+	}
+
+	// Delete half, ensure they’re gone.
+	for i := 0; i < len(kvs); i += 2 {
+		m.Delete(kvs[i])
+		if _, ok := m.Get(kvs[i]); ok {
+			t.Fatalf("key %d should have been deleted", kvs[i])
+		}
+	}
+}
+
+func BenchmarkMixed_SIMDMap(b *testing.B) {
+
+	//m := map[uint64]struct{}{}
+	sm := New[struct{}](1_000_000)
+	b.Run("simdmap-put", func(b *testing.B) {
+
+		var h uint64
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_ = binary.Read(crand.Reader, binary.LittleEndian, &h)
+			sm.Put(h, struct{}{})
+		}
+		b.StopTimer()
+		b.ReportAllocs()
+	})
+
+	b.Run("simdmap-get", func(b *testing.B) {
+		var h uint64
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_ = binary.Read(crand.Reader, binary.LittleEndian, &h)
+			_, _ = sm.Get(h)
+		}
+		b.StopTimer()
+		b.ReportAllocs()
+	})
+
+}
+
+func BenchmarkMixed_GOMap(b *testing.B) {
+	m := make(map[uint64]struct{}, 1_000_000)
+	b.Run("map-put", func(b *testing.B) {
+		var h uint64
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_ = binary.Read(crand.Reader, binary.LittleEndian, &h)
+			m[h] = struct{}{}
+		}
+		b.StopTimer()
+		b.ReportAllocs()
+	})
+
+	b.Run("map-get", func(b *testing.B) {
+
+		var h uint64
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			_ = binary.Read(crand.Reader, binary.LittleEndian, &h)
+			_, _ = m[h]
+		}
+		b.StopTimer()
+		b.ReportAllocs()
+	})
+}
+
+func BenchmarkGet_Hit(b *testing.B) {
+	m := New[struct{}](1 << 20)
+
+	// Fill the map with 1 M random keys
+	keys := make([]uint64, 1<<20)
+	for i := range keys {
+		_ = binary.Read(crand.Reader, binary.LittleEndian, &keys[i])
+		m.Put(keys[i], struct{}{})
+	}
+
+	// Deterministic PRNG for benchmark loop
+	rng := rand.New(rand.NewSource(42))
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		k := keys[rng.Intn(len(keys))]
+		_, _ = m.Get(k)
+	}
+}
+
+// -------- ultra‑cheap 64‑bit key generator (SplitMix64) -------------
+var x uint64 = 0x9e3779b97f4a7c15
+
+func next() uint64 {
+	z := x + 0x9e3779b97f4a7c15
+	x = z
+	z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9
+	z = (z ^ (z >> 27)) * 0x94d049bb133111eb
+	return z ^ (z >> 31)
+}
+
+// ------------ reusable key slice: *zero* cost in hot loop -----------
+const N = 1 << 20 // 1 048 576 keys
+var keys [N]uint64
+
+func init() {
+	for i := range keys {
+		keys[i] = next()
+	}
+}
+
+// ----------------------- benchmarks ---------------------------------
+func BenchmarkPutGet_SIMD(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		m := New[struct{}](N) // capacity == live set
+		for _, k := range keys {
+			m.Put(k, struct{}{})
+		}
+		for _, k := range keys {
+			_, _ = m.Get(k)
+		}
+	}
+}
+
+func BenchmarkPutGet_Go(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		m := make(map[uint64]struct{}, N) // same load‑factor
+		for _, k := range keys {
+			m[k] = struct{}{}
+		}
+		for _, k := range keys {
+			_, _ = m[k]
+		}
+	}
+}
diff --git a/flashring/pkg/ycsb/ycsb_bench_test.go b/flashring/pkg/ycsb/ycsb_bench_test.go
new file mode 100644
index 00000000..03665d86
--- /dev/null
+++ b/flashring/pkg/ycsb/ycsb_bench_test.go
@@ -0,0 +1,354 @@
+package ycsb
+
+import (
+	"context"
+	"fmt"
+	"math/rand"
+	"runtime"
+	"testing"
+	"time"
+)
+
+// YCSB Workload configurations based on standard YCSB workloads
+type WorkloadConfig struct {
+	Name                string
+	ReadProportion      float64
+	UpdateProportion    float64
+	InsertProportion    float64
+	ScanProportion      float64
+	ReadModifyWriteProp float64
+	RequestDistribution string // "uniform", "zipfian", "latest"
+	Description         string
+}
+
+// Standard YCSB Workloads
+var (
+	WorkloadA = WorkloadConfig{
+		Name:                "WorkloadA",
+		ReadProportion:      0.5,
+		UpdateProportion:    0.5,
+		InsertProportion:    0.0,
+		ScanProportion:      0.0,
+		ReadModifyWriteProp: 0.0,
+		RequestDistribution: "zipfian",
+		Description:         "Read/Update heavy (50%/50%) - Update heavy workload",
+	}
+
+	WorkloadB = WorkloadConfig{
+		Name:                "WorkloadB",
+		ReadProportion:      0.95,
+		UpdateProportion:    0.05,
+		InsertProportion:    0.0,
+		ScanProportion:      0.0,
+		ReadModifyWriteProp: 0.0,
+		RequestDistribution: "zipfian",
+		Description:         "Read heavy (95%/5%) - Read mostly workload",
+	}
+
+	WorkloadC = WorkloadConfig{
+		Name:                "WorkloadC",
+		ReadProportion:      1.0,
+		UpdateProportion:    0.0,
+		InsertProportion:    0.0,
+		ScanProportion:      0.0,
+		ReadModifyWriteProp: 0.0,
+		RequestDistribution: "zipfian",
+		Description:         "Read only (100%) - Read only workload",
+	}
+
+	WorkloadD = WorkloadConfig{
+		Name:                "WorkloadD",
+		ReadProportion:      0.95,
+		UpdateProportion:    0.0,
+		InsertProportion:    0.05,
+		ScanProportion:      0.0,
+		ReadModifyWriteProp: 0.0,
+		RequestDistribution: "latest",
+		Description:         "Read latest (95%/5%) - Read latest workload",
+	}
+
+	WorkloadF = WorkloadConfig{
+		Name:                "WorkloadF",
+		ReadProportion:      0.5,
+		UpdateProportion:    0.0,
+		InsertProportion:    0.0,
+		ScanProportion:      0.0,
+		ReadModifyWriteProp: 0.5,
+		RequestDistribution: "zipfian",
+		Description:         "Read-modify-write (50%/50%) - Transaction workload",
+	}
+)
+
+// BenchmarkYCSB_AllWorkloads runs all standard YCSB workloads
+func BenchmarkYCSB_AllWorkloads(b *testing.B) {
+	workloads := []WorkloadConfig{WorkloadA, WorkloadB, WorkloadC, WorkloadD, WorkloadF}
+
+	for _, workload := range workloads {
+		b.Run(workload.Name, func(b *testing.B) {
+			benchmarkYCSBWorkload(b, workload)
+		})
+	}
+}
+
+// BenchmarkYCSB_WorkloadA tests read/update heavy workload
+func BenchmarkYCSB_WorkloadA(b *testing.B) {
+	benchmarkYCSBWorkload(b, WorkloadA)
+}
+
+// BenchmarkYCSB_WorkloadB tests read heavy workload
+func BenchmarkYCSB_WorkloadB(b *testing.B) {
+	benchmarkYCSBWorkload(b, WorkloadB)
+}
+
+// BenchmarkYCSB_WorkloadC tests read only workload
+func BenchmarkYCSB_WorkloadC(b *testing.B) {
+	benchmarkYCSBWorkload(b, WorkloadC)
+}
+
+func benchmarkYCSBWorkload(b *testing.B, workload WorkloadConfig) {
+	const (
+		recordCount    = 1000000 // 1M records for load phase
+		operationCount = 500000  // 500K operations for run phase
+		fieldLength    = 100     // 100 bytes per field
+		fieldCount     = 10      // 10 fields per record
+	)
+
+	// Create YCSB configuration
+	config := YCSBConfig{
+		Capacity:          500000, // 500K capacity (half of record count)
+		EvictionThreshold: 0.7,    // 70% eviction threshold
+		SlabSizes:         []int{64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
+	}
+
+	// Create test data
+	testValue := make([]byte, fieldLength*fieldCount)
+	for i := range testValue {
+		testValue[i] = byte(i % 256)
+	}
+
+	// Initialize random seed
+	rand.Seed(time.Now().UnixNano())
+
+	b.ResetTimer()
+
+	for n := 0; n < b.N; n++ {
+		b.StopTimer()
+
+		// Create fresh database for each iteration
+		db, err := NewLRUCacheDB(config)
+		if err != nil {
+			b.Fatalf("Failed to create LRU cache DB: %v", err)
+		}
+
+		var memStatsBefore, memStatsAfter runtime.MemStats
+		runtime.GC()
+		runtime.ReadMemStats(&memStatsBefore)
+
+		// Counters for operation tracking
+		var readOps, updateOps, insertOps, rmwOps int64
+		var readHits, readMisses int64
+
+		b.StartTimer()
+		startTime := time.Now()
+
+		// Load phase: Insert initial records
+		ctx := context.Background()
+		for i := 0; i < recordCount; i++ {
+			key := fmt.Sprintf("user%010d", i)
+			values := map[string][]byte{
+				"field0": testValue,
+			}
+			err := db.Insert(ctx, "usertable", key, values)
+			if err != nil {
+				b.Fatalf("Insert failed: %v", err)
+			}
+		}
+
+		loadDuration := time.Since(startTime)
+
+		// Run phase: Execute workload operations
+		runStartTime := time.Now()
+		for i := 0; i < operationCount; i++ {
+			key := generateKey(i, recordCount, workload.RequestDistribution)
+			operation := selectOperation(workload)
+
+			switch operation {
+			case "read":
+				_, err := db.Read(ctx, "usertable", key, []string{"field0"})
+				if err != nil {
+					readMisses++
+				} else {
+					readHits++
+				}
+				readOps++
+
+			case "update":
+				values := map[string][]byte{
+					"field0": testValue,
+				}
+				err := db.Update(ctx, "usertable", key, values)
+				if err != nil {
+					b.Errorf("Update failed: %v", err)
+				}
+				updateOps++
+
+			case "insert":
+				// For insert operations, use a new key
+				newKey := fmt.Sprintf("user%010d", recordCount+i)
+				values := map[string][]byte{
+					"field0": testValue,
+				}
+				err := db.Insert(ctx, "usertable", newKey, values)
+				if err != nil {
+					b.Errorf("Insert failed: %v", err)
+				}
+				insertOps++
+
+			case "readmodifywrite":
+				// Read-modify-write operation
+				_, err := db.Read(ctx, "usertable", key, []string{"field0"})
+				if err != nil {
+					readMisses++
+				} else {
+					readHits++
+					// Modify and write back
+					values := map[string][]byte{
+						"field0": testValue,
+					}
+					err = db.Update(ctx, "usertable", key, values)
+					if err != nil {
+						b.Errorf("Read-modify-write update failed: %v", err)
+					}
+				}
+				rmwOps++
+			}
+		}
+
+		runDuration := time.Since(runStartTime)
+		totalDuration := time.Since(startTime)
+
+		b.StopTimer()
+
+		runtime.GC()
+		runtime.ReadMemStats(&memStatsAfter)
+
+		// Get cache statistics
+		stats := db.GetStats()
+
+		// Calculate metrics
+		totalOps := readOps + updateOps + insertOps + rmwOps
+		throughput := float64(totalOps) / runDuration.Seconds()
+		loadThroughput := float64(recordCount) / loadDuration.Seconds()
+
+		// Calculate hit rates
+		cacheHitRate := float64(stats.HitCount) / float64(stats.HitCount+stats.MissCount) * 100
+		workloadHitRate := float64(readHits) / float64(readHits+readMisses) * 100
+
+		// Calculate memory metrics
+		allocsPerOp := float64(memStatsAfter.Mallocs-memStatsBefore.Mallocs) / float64(totalOps+recordCount)
+		bytesPerOp := float64(memStatsAfter.TotalAlloc-memStatsBefore.TotalAlloc) / float64(totalOps+recordCount)
+
+		// Report benchmark metrics
+		b.ReportMetric(throughput, "ops/sec")
+		b.ReportMetric(float64(runDuration.Nanoseconds())/float64(totalOps), "ns/op")
+		b.ReportMetric(workloadHitRate, "hit_rate_%")
+		b.ReportMetric(allocsPerOp, "allocs/op")
+		b.ReportMetric(bytesPerOp, "B/op")
+
+		// Log detailed stats on first iteration
+		if n == 0 {
+			b.Logf("\n=== YCSB %s Benchmark Results ===", workload.Name)
+			b.Logf("Description: %s", workload.Description)
+			b.Logf("\n--- Workload Configuration ---")
+			b.Logf("Read Proportion: %.1f%%", workload.ReadProportion*100)
+			b.Logf("Update Proportion: %.1f%%", workload.UpdateProportion*100)
+			b.Logf("Insert Proportion: %.1f%%", workload.InsertProportion*100)
+			b.Logf("Read-Modify-Write Proportion: %.1f%%", workload.ReadModifyWriteProp*100)
+			b.Logf("Request Distribution: %s", workload.RequestDistribution)
+
+			b.Logf("\n--- Performance Metrics ---")
+			b.Logf("Load Throughput: %.2f ops/sec", loadThroughput)
+			b.Logf("Run Throughput: %.2f ops/sec", throughput)
+			b.Logf("Average Latency: %.2f ns/op", float64(runDuration.Nanoseconds())/float64(totalOps))
+
+			b.Logf("\n--- Operation Breakdown ---")
+			b.Logf("Read Operations: %d (%.1f%%)", readOps, float64(readOps)/float64(totalOps)*100)
+			b.Logf("Update Operations: %d (%.1f%%)", updateOps, float64(updateOps)/float64(totalOps)*100)
+			b.Logf("Insert Operations: %d (%.1f%%)", insertOps, float64(insertOps)/float64(totalOps)*100)
+			b.Logf("Read-Modify-Write Operations: %d (%.1f%%)", rmwOps, float64(rmwOps)/float64(totalOps)*100)
+
+			b.Logf("\n--- Cache Statistics ---")
+			b.Logf("Cache Hit Rate: %.2f%% (%d/%d)", cacheHitRate, stats.HitCount, stats.HitCount+stats.MissCount)
+			b.Logf("Workload Hit Rate: %.2f%% (%d/%d)", workloadHitRate, readHits, readHits+readMisses)
+			b.Logf("Final Cache Size: %d", stats.Size)
+			b.Logf("Cache Capacity: %d", stats.Capacity)
+			b.Logf("Eviction Events: %d", stats.EvictCount)
+			b.Logf("Total Items Evicted: %d", stats.EvictItemCount)
+
+			b.Logf("\n--- Timing Breakdown ---")
+			b.Logf("Load Phase Duration: %v", loadDuration)
+			b.Logf("Run Phase Duration: %v", runDuration)
+			b.Logf("Total Duration: %v", totalDuration)
+
+			b.Logf("\n--- Memory Metrics ---")
+			b.Logf("Allocations per Operation: %.2f", allocsPerOp)
+			b.Logf("Bytes per Operation: %.2f", bytesPerOp)
+		}
+	}
+}
+
+// selectOperation selects an operation based on workload proportions
+func selectOperation(workload WorkloadConfig) string {
+	r := rand.Float64()
+
+	if r < workload.ReadProportion {
+		return "read"
+	}
+	r -= workload.ReadProportion
+
+	if r < workload.UpdateProportion {
+		return "update"
+	}
+	r -= workload.UpdateProportion
+
+	if r < workload.InsertProportion {
+		return "insert"
+	}
+	r -= workload.InsertProportion
+
+	if r < workload.ReadModifyWriteProp {
+		return "readmodifywrite"
+	}
+
+	// Default to read if something goes wrong
+	return "read"
+}
+
+// generateKey generates a key based on the request distribution
+func generateKey(operationIndex, recordCount int, distribution string) string {
+	var keyIndex int
+
+	switch distribution {
+	case "uniform":
+		keyIndex = rand.Intn(recordCount)
+	case "zipfian":
+		// Simplified Zipfian: 80% of requests go to 20% of keys
+		if rand.Float64() < 0.8 {
+			keyIndex = rand.Intn(recordCount / 5) // Top 20% of keys
+		} else {
+			keyIndex = recordCount/5 + rand.Intn(recordCount*4/5) // Bottom 80% of keys
+		}
+	case "latest":
+		// Latest distribution: favor recently inserted keys
+		if rand.Float64() < 0.8 {
+			// 80% chance to access the most recent 10% of keys
+			keyIndex = recordCount*9/10 + rand.Intn(recordCount/10)
+		} else {
+			keyIndex = rand.Intn(recordCount * 9 / 10)
+		}
+	default:
+		keyIndex = rand.Intn(recordCount)
+	}
+
+	return fmt.Sprintf("user%010d", keyIndex)
+}
diff --git a/flashring/prep_ssd.sh b/flashring/prep_ssd.sh
new file mode 100644
index 00000000..f8e33b3e
--- /dev/null
+++ b/flashring/prep_ssd.sh
@@ -0,0 +1,202 @@
+#!/usr/bin/env bash
+# Mount all non-root NVMe SSDs (/dev/nvme*n1) as ext4 under /mnt/localssd1, /mnt/localssd2, ...
+# Uses hourly fstrim (systemd timer or cron fallback). Safe to re-run.
+set -euo pipefail
+
+MOUNT_BASE="/mnt/localssd"
+
+log() { echo "[$(date +'%F %T')] $*"; }
+trap 'log "ERROR: Command failed: $BASH_COMMAND (line $LINENO)"' ERR
+
+# ---------- Helpers ----------
+fs_type() { lsblk -ndo FSTYPE "$1" 2>/dev/null | tr -d ' '; }
+is_mounted_anywhere() { findmnt -S "$1" >/dev/null 2>&1; }
+current_mountpoint() { findmnt -S "$1" -no TARGET 2>/dev/null || true; }
+
+root_source() { findmnt -no SOURCE / 2>/dev/null || true; }
+parent_of() {
+  local s="$1"
+  [[ "$s" =~ ^/dev/nvme[0-9]+n[0-9]+p[0-9]+$ ]] && { echo "${s%p*}"; return; }
+  [[ "$s" =~ ^/dev/sd[a-z][0-9]+$ ]] && { echo "${s%[0-9]}"; return; }
+  echo "$s"
+}
+is_boot_dev() {
+  local dev="$1"
+  local rsrc; rsrc="$(root_source)"
+  [[ -z "$rsrc" ]] && return 1
+  local rparent; rparent="$(parent_of "$rsrc")"
+  [[ "$dev" == "$rsrc" || "$dev" == "$rparent" ]]
+}
+
+next_mountpoint() {
+  local n=1
+  while :; do
+    local mp="${MOUNT_BASE}${n}"
+    if ! mountpoint -q "$mp"; then
+      echo "$mp"
+      return 0
+    fi
+    ((n+=1))
+  done
+}
+
+ensure_fstab_entry() {
+  local uuid="$1" mp="$2"
+  local line="UUID=${uuid}  ${mp}  ext4  defaults,nofail,noatime,nodiratime  0  2"
+  sed -i -E "/^UUID=${uuid}[[:space:]]/d" /etc/fstab 2>/dev/null || true
+  grep -q "UUID=${uuid}  ${mp}  ext4" /etc/fstab 2>/dev/null || echo "$line" >> /etc/fstab
+}
+
+sanitize_fstab_discard() {
+  if grep -Eq '/mnt/localssd[0-9]+[[:space:]]+ext4' /etc/fstab 2>/dev/null; then
+    log "Sanitizing /etc/fstab to remove ',discard' on /mnt/localssd* entries"
+    sed -i -E '/\/mnt\/localssd[0-9]+[[:space:]]+ext4/ s/,?discard//g' /etc/fstab
+  fi
+}
+
+remount_localssd_no_discard() {
+  mapfile -t MPS < <(findmnt -no TARGET | grep -E "^${MOUNT_BASE}[0-9]+$" || true)
+  for mp in "${MPS[@]:-}"; do
+    log "Remounting $mp without 'discard'"
+    mount -o remount,noatime,nodiratime "$mp" || true
+  done
+}
+
+setup_fstrim_hourly() {
+  if command -v systemctl >/dev/null 2>&1 && command -v fstrim >/dev/null 2>&1; then
+    log "Configuring systemd fstrim.timer to run hourly"
+    mkdir -p /etc/systemd/system/fstrim.timer.d
+    cat >/etc/systemd/system/fstrim.timer.d/override.conf <<'EOF'
+[Timer]
+OnCalendar=hourly
+Persistent=true
+EOF
+    systemctl daemon-reload
+    systemctl enable --now fstrim.timer
+    systemctl status fstrim.timer --no-pager -l || true
+  else
+    if command -v fstrim >/dev/null 2>&1; then
+      log "Configuring cron.hourly for fstrim (systemd not available)"
+      mkdir -p /etc/cron.hourly
+      cat >/etc/cron.hourly/fstrim-localssd <<'EOF'
+#!/bin/sh
+/sbin/fstrim --all --quiet || /usr/sbin/fstrim --all --quiet || true
+EOF
+      chmod +x /etc/cron.hourly/fstrim-localssd
+    else
+      log "WARN: fstrim not found; install util-linux to enable trimming."
+    fi
+  fi
+}
+
+# ---------- Modes ----------
+umount_mode() {
+  log "Unmounting /mnt/localssd* and cleaning /etc/fstab entries"
+  mapfile -t MPS < <(findmnt -no TARGET | grep -E "^${MOUNT_BASE}[0-9]+$" || true)
+  for mp in "${MPS[@]:-}"; do
+    log "Umount $mp"
+    umount "$mp" || true
+  done
+  sed -i -E '/\/mnt\/localssd[0-9]+[[:space:]]+ext4/d' /etc/fstab || true
+  systemctl daemon-reload || true
+  log "Done. Re-run this script to mount afresh."
+  exit 0
+}
+
+status_mode() {
+  log "Current localssd mounts:"
+  findmnt -no TARGET,SOURCE,FSTYPE | grep -E "^${MOUNT_BASE}[0-9]+" || echo "None"
+  exit 0
+}
+
+usage() {
+  echo "Usage: $0 [--umount|--status]"
+  exit 1
+}
+
+case "${1:-}" in
+  --umount) umount_mode ;;
+  --status) status_mode ;;
+  "") ;;
+  *) usage ;;
+esac
+
+# ---------- Preconditions ----------
+command -v lsblk >/dev/null || { echo "lsblk not found"; exit 1; }
+command -v blkid >/dev/null || { echo "blkid not found"; exit 1; }
+command -v mkfs.ext4 >/dev/null || { echo "mkfs.ext4 not found"; exit 1; }
+
+# Sync systemd with current fstab before mounts
+systemctl daemon-reload || true
+
+# Enumerate all NVMe namespaces deterministically
+mapfile -t NVME_DEVS < <(ls /dev/nvme*n1 2>/dev/null | sort || true)
+if [[ ${#NVME_DEVS[@]} -eq 0 ]]; then
+  log "No NVMe namespaces (/dev/nvme*n1) found."
+  exit 0
+fi
+log "Scanning devices: ${NVME_DEVS[*]}"
+
+processed=0
+
+for dev in "${NVME_DEVS[@]}"; do
+  [[ -e "$dev" ]] || { log "$dev not found at runtime — skipping."; continue; }
+
+  if is_boot_dev "$dev"; then
+    log "Skipping boot/root device: $dev"
+    continue
+  fi
+
+  log "Found $dev"
+
+  if is_mounted_anywhere "$dev"; then
+    mp_now="$(current_mountpoint "$dev")"
+    log "$dev already mounted at $mp_now — leaving as-is."
+    uuid="$(blkid -s UUID -o value "$dev" || true)"
+    [[ -n "$uuid" ]] && ensure_fstab_entry "$uuid" "$mp_now"
+    ((processed+=1))
+    continue
+  fi
+
+  fstype="$(fs_type "$dev")"
+  if [[ -z "$fstype" ]]; then
+    log "Formatting $dev as ext4"
+    mkfs.ext4 -F -m 0 -E lazy_itable_init=1,lazy_journal_init=1 "$dev"
+    fstype="ext4"
+  else
+    log "$dev already has filesystem: $fstype"
+  fi
+
+  if [[ "$fstype" != "ext4" ]]; then
+    log "Skipping $dev (unsupported fs: $fstype)."
+    continue
+  fi
+
+  mp="$(next_mountpoint)"
+  mkdir -p "$mp"
+  log "Mounting $dev at $mp (no 'discard')"
+  mount -o noatime,nodiratime "$dev" "$mp"
+
+  uuid="$(blkid -s UUID -o value "$dev" || true)"
+  if [[ -n "$uuid" ]]; then
+    ensure_fstab_entry "$uuid" "$mp"
+    systemctl daemon-reload || true
+  else
+    log "WARN: Could not read UUID for $dev; skipping fstab entry."
+  fi
+
+  ((processed+=1))
+done
+
+sanitize_fstab_discard
+remount_localssd_no_discard
+
+systemctl daemon-reload || true
+setup_fstrim_hourly
+
+if [[ "$processed" -eq 0 ]]; then
+  log "No devices processed. Existing mounts sanitized and hourly fstrim scheduled (if available)."
+else
+  log "Done. Processed $processed device(s). Current mounts:"
+  findmnt -no TARGET,SOURCE | grep "$MOUNT_BASE" || true
+fi