From 3b723871dd805d1426939c19e1752f28da066229 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Wed, 24 Dec 2025 08:48:55 +0000 Subject: [PATCH 01/53] externaize the package for cache so it can be imported --- flashring/cmd/flashringtest/plan_badger.go | 2 +- flashring/cmd/flashringtest/plan_freecache.go | 2 +- flashring/cmd/flashringtest/plan_lockless.go | 2 +- flashring/cmd/flashringtest/plan_random_gausian.go | 2 +- flashring/cmd/flashringtest/plan_readthrough_gausian.go | 2 +- flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go | 2 +- flashring/{internal => pkg}/cache/badger.go | 0 flashring/{internal => pkg}/cache/cache.go | 0 flashring/{internal => pkg}/cache/freecache.go | 0 9 files changed, 6 insertions(+), 6 deletions(-) rename flashring/{internal => pkg}/cache/badger.go (100%) rename flashring/{internal => pkg}/cache/cache.go (100%) rename flashring/{internal => pkg}/cache/freecache.go (100%) diff --git a/flashring/cmd/flashringtest/plan_badger.go b/flashring/cmd/flashringtest/plan_badger.go index 4ba266d4..1e06f8fa 100644 --- a/flashring/cmd/flashringtest/plan_badger.go +++ b/flashring/cmd/flashringtest/plan_badger.go @@ -10,7 +10,7 @@ import ( "strings" "sync" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_freecache.go b/flashring/cmd/flashringtest/plan_freecache.go index 0fe6a297..be46daf9 100644 --- a/flashring/cmd/flashringtest/plan_freecache.go +++ b/flashring/cmd/flashringtest/plan_freecache.go @@ -11,7 +11,7 @@ import ( "strings" "sync" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index e946c9af..a15aed95 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -13,7 +13,7 @@ import ( "sync" "time" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go index 3fbaf849..d9333210 100644 --- a/flashring/cmd/flashringtest/plan_random_gausian.go +++ b/flashring/cmd/flashringtest/plan_random_gausian.go @@ -12,7 +12,7 @@ import ( "sync" "time" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 56c6da3d..6bea81c8 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -13,7 +13,7 @@ import ( "sync" "time" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go index fd33e06a..ceb628cb 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go @@ -13,7 +13,7 @@ import ( "sync" "time" - cachepkg "github.com/Meesho/BharatMLStack/flashring/internal/cache" + cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/internal/cache/badger.go b/flashring/pkg/cache/badger.go similarity index 100% rename from flashring/internal/cache/badger.go rename to flashring/pkg/cache/badger.go diff --git a/flashring/internal/cache/cache.go b/flashring/pkg/cache/cache.go similarity index 100% rename from flashring/internal/cache/cache.go rename to flashring/pkg/cache/cache.go diff --git a/flashring/internal/cache/freecache.go b/flashring/pkg/cache/freecache.go similarity index 100% rename from flashring/internal/cache/freecache.go rename to flashring/pkg/cache/freecache.go From b8ffbaf81e2f7809f61081f72d6a6a4f3a503f57 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Fri, 9 Jan 2026 09:13:07 +0000 Subject: [PATCH 02/53] do not start lockless functions if lockless not enabled --- flashring/internal/shard/shard_cache.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 78e19deb..8fb77b3a 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -64,6 +64,9 @@ type ShardCacheConfig struct { EnableBatching bool BatchWindow time.Duration MaxBatchSize int + + //lockless + EnableLockless bool } func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { @@ -120,10 +123,13 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { }, sc, sl) } + if config.EnableLockless { + sc.ReadCh = make(chan *ReadRequestV2, 500) sc.WriteCh = make(chan *WriteRequestV2, 500) go sc.startReadWriteRoutines() + } return sc } From bfd133757a00f276a3b4ce9282493a9b47301df7 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Fri, 9 Jan 2026 09:45:38 +0000 Subject: [PATCH 03/53] should rewrite logic off for now --- flashring/pkg/cache/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 74755251..e2094524 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -440,7 +440,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { wc.stats[shardIdx].Expired.Add(1) } wc.stats[shardIdx].TotalGets.Add(1) - if shouldReWrite { + if false && shouldReWrite { wc.stats[shardIdx].ReWrites.Add(1) wc.putLocked(shardIdx, h32, key, val, remainingTTL) } From 90cae0fd5e586095dd0615f2ef664fde7aeb535e Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Fri, 9 Jan 2026 10:09:42 +0000 Subject: [PATCH 04/53] change shard cache metric maps to sync maps --- flashring/internal/shard/shard_cache.go | 100 ++++++++++++++---------- 1 file changed, 60 insertions(+), 40 deletions(-) diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 8fb77b3a..9e28b17d 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -4,6 +4,7 @@ import ( "fmt" "hash/crc32" "sync" + "sync/atomic" "time" "github.com/Meesho/BharatMLStack/flashring/internal/allocators" @@ -33,20 +34,41 @@ type ShardCache struct { } type Stats struct { - KeyNotFoundCount int - KeyExpiredCount int - BadDataCount int - BadLengthCount int - BadCR32Count int - BadKeyCount int - MemIdCount map[uint32]int - LastDeletedMemId uint32 - DeletedKeyCount int - BadCRCMemIds map[uint32]int - BadKeyMemIds map[uint32]int + KeyNotFoundCount atomic.Int64 + KeyExpiredCount atomic.Int64 + BadDataCount atomic.Int64 + BadLengthCount atomic.Int64 + BadCR32Count atomic.Int64 + BadKeyCount atomic.Int64 + MemIdCount sync.Map // key: uint32, value: *atomic.Int64 + LastDeletedMemId atomic.Uint32 + DeletedKeyCount atomic.Int64 + BadCRCMemIds sync.Map // key: uint32, value: *atomic.Int64 + BadKeyMemIds sync.Map // key: uint32, value: *atomic.Int64 BatchTracker *BatchTracker } +// Helper method to increment a counter in a sync.Map +func (s *Stats) incMapCounter(m *sync.Map, key uint32) { + val, _ := m.LoadOrStore(key, &atomic.Int64{}) + val.(*atomic.Int64).Add(1) +} + +// IncMemIdCount atomically increments the counter for the given memId +func (s *Stats) IncMemIdCount(memId uint32) { + s.incMapCounter(&s.MemIdCount, memId) +} + +// IncBadCRCMemIds atomically increments the bad CRC counter for the given memId +func (s *Stats) IncBadCRCMemIds(memId uint32) { + s.incMapCounter(&s.BadCRCMemIds, memId) +} + +// IncBadKeyMemIds atomically increments the bad key counter for the given memId +func (s *Stats) IncBadKeyMemIds(memId uint32) { + s.incMapCounter(&s.BadKeyMemIds, memId) +} + type ShardCacheConfig struct { Rounds int RbInitial int @@ -108,9 +130,7 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { predictor: config.Predictor, startAt: time.Now().Unix(), Stats: &Stats{ - MemIdCount: make(map[uint32]int), - BadCRCMemIds: make(map[uint32]int), - BadKeyMemIds: make(map[uint32]int), + // sync.Map fields have zero values that are ready to use BatchTracker: NewBatchTracker(), }, } @@ -125,10 +145,10 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { if config.EnableLockless { - sc.ReadCh = make(chan *ReadRequestV2, 500) - sc.WriteCh = make(chan *WriteRequestV2, 500) + sc.ReadCh = make(chan *ReadRequestV2, 500) + sc.WriteCh = make(chan *WriteRequestV2, 500) - go sc.startReadWriteRoutines() + go sc.startReadWriteRoutines() } return sc @@ -169,19 +189,19 @@ func (fc *ShardCache) Put(key string, value []byte, ttlMinutes uint16) error { indices.ByteOrder.PutUint32(buf[0:4], crc) fc.keyIndex.Put(key, length, ttlMinutes, mtId, uint32(offset)) fc.dm.IncMemtableKeyCount(mtId) - fc.Stats.MemIdCount[mtId]++ + fc.Stats.IncMemIdCount(mtId) return nil } func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount++ + fc.Stats.KeyNotFoundCount.Add(1) return false, nil, 0, false, false } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount++ + fc.Stats.KeyExpiredCount.Add(1) return false, nil, 0, true, false } @@ -202,7 +222,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { - fc.Stats.BadLengthCount++ + fc.Stats.BadLengthCount.Add(1) return false, nil, 0, false, shouldReWrite } } else { @@ -215,13 +235,13 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { computedCR32 := crc32.ChecksumIEEE(buf[4:]) gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count++ - fc.Stats.BadCRCMemIds[memId]++ + fc.Stats.BadCR32Count.Add(1) + fc.Stats.IncBadCRCMemIds(memId) return false, nil, 0, false, shouldReWrite } if gotKey != key { - fc.Stats.BadKeyCount++ - fc.Stats.BadKeyMemIds[memId]++ + fc.Stats.BadKeyCount.Add(1) + fc.Stats.IncBadKeyMemIds(memId) return false, nil, 0, false, shouldReWrite } valLen := int(length) - 4 - len(key) @@ -234,12 +254,12 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount++ + fc.Stats.KeyNotFoundCount.Add(1) return false, nil, 0, false, false // needsSlowPath = false (not found) } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount++ + fc.Stats.KeyExpiredCount.Add(1) return false, nil, 0, true, false // needsSlowPath = false (expired) } @@ -260,8 +280,8 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count++ - fc.Stats.BadCRCMemIds[memId]++ + fc.Stats.BadCR32Count.Add(1) + fc.Stats.IncBadCRCMemIds(memId) _, currMemId, _ := fc.mm.GetMemtable() shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId) _ = shouldReWrite // Not returning shouldReWrite in fast path for simplicity @@ -270,8 +290,8 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - fc.Stats.BadKeyCount++ - fc.Stats.BadKeyMemIds[memId]++ + fc.Stats.BadKeyCount.Add(1) + fc.Stats.IncBadKeyMemIds(memId) return false, nil, 0, false, false } @@ -284,12 +304,12 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount++ + fc.Stats.KeyNotFoundCount.Add(1) return false, nil, 0, false, false } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount++ + fc.Stats.KeyExpiredCount.Add(1) return false, nil, 0, true, false } @@ -314,7 +334,7 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { - fc.Stats.BadLengthCount++ + fc.Stats.BadLengthCount.Add(1) return false, nil, 0, false, shouldReWrite } @@ -326,15 +346,15 @@ func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uin gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count++ - fc.Stats.BadCRCMemIds[memId]++ + fc.Stats.BadCR32Count.Add(1) + fc.Stats.IncBadCRCMemIds(memId) return false, nil, 0, false, shouldReWrite } gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - fc.Stats.BadKeyCount++ - fc.Stats.BadKeyMemIds[memId]++ + fc.Stats.BadKeyCount.Add(1) + fc.Stats.IncBadKeyMemIds(memId) return false, nil, 0, false, shouldReWrite } @@ -366,11 +386,11 @@ func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadR gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count++ + fc.Stats.BadCR32Count.Add(1) return ReadResult{Found: false, Error: fmt.Errorf("crc mismatch")} } if gotKey != key { - fc.Stats.BadKeyCount++ + fc.Stats.BadKeyCount.Add(1) return ReadResult{Found: false, Error: fmt.Errorf("key mismatch")} } From 19887a9a8fdb347d27872f55090a9e203b7492ee Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Mon, 12 Jan 2026 12:44:30 +0000 Subject: [PATCH 05/53] improve metrics package --- flashring/cmd/flashringtest/plan_lockless.go | 33 +- .../cmd/flashringtest/plan_random_gausian.go | 19 +- .../flashringtest/plan_readthrough_gausian.go | 32 +- .../plan_readthrough_gausian_batched.go | 30 +- flashring/go.mod | 17 + flashring/go.sum | 68 ++++ flashring/internal/metrics/console_logger.go | 44 +++ flashring/internal/metrics/csv_logger.go | 170 +++++++++ flashring/internal/metrics/metric.go | 109 ++++++ .../internal/metrics/metrics_averager.go | 57 +++ flashring/internal/metrics/runmetrics.go | 350 ++++++++++++++++++ flashring/internal/metrics/statsd_logger.go | 55 +++ flashring/internal/metrics/tag.go | 55 +++ flashring/pkg/cache/cache.go | 173 +++------ 14 files changed, 1056 insertions(+), 156 deletions(-) create mode 100644 flashring/internal/metrics/console_logger.go create mode 100644 flashring/internal/metrics/csv_logger.go create mode 100644 flashring/internal/metrics/metric.go create mode 100644 flashring/internal/metrics/metrics_averager.go create mode 100644 flashring/internal/metrics/runmetrics.go create mode 100644 flashring/internal/metrics/statsd_logger.go create mode 100644 flashring/internal/metrics/tag.go diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index a15aed95..100ebf01 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -13,6 +13,7 @@ import ( "sync" "time" + metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -35,7 +36,7 @@ func planLockless() { cpuProfile string ) - flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files") + flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme", "data directory for shard files") flag.IntVar(&numShards, "shards", 500, "number of shards") flag.IntVar(&keysPerShard, "keys-per-shard", 10_00_00, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") @@ -94,22 +95,24 @@ func planLockless() { ReWriteScoreThreshold: 0.8, GridSearchEpsilon: 0.0001, SampleDuration: time.Duration(sampleSecs) * time.Second, - - // Pass the metrics collector to record cache metrics - MetricsRecorder: InitMetricsCollector(), } - // Set additional input parameters that the cache doesn't know about - metricsCollector.SetShards(numShards) - metricsCollector.SetKeysPerShard(keysPerShard) - metricsCollector.SetReadWorkers(readWorkers) - metricsCollector.SetWriteWorkers(writeWorkers) - metricsCollector.SetPlan("lockless") - - // Start background goroutine to wait for shutdown signal and export CSV - go RunmetricsWaitForShutdown() - - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats) + metricsConfig := metrics.MetricsCollectorConfig{ + StatsEnabled: true, + CsvLogging: true, + ConsoleLogging: true, + StatsdLogging: true, + InstantMetrics: true, + AveragedMetrics: true, + Metadata: map[string]any{ + "shards": numShards, + "keys-per-shard": keysPerShard, + "read-workers": readWorkers, + "write-workers": writeWorkers, + "plan": "lockless"}, + } + metricsCollector := metrics.InitMetricsCollector(metricsConfig) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) if err != nil { panic(err) } diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go index d9333210..1d9477c3 100644 --- a/flashring/cmd/flashringtest/plan_random_gausian.go +++ b/flashring/cmd/flashringtest/plan_random_gausian.go @@ -12,6 +12,7 @@ import ( "sync" "time" + metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -95,7 +96,23 @@ func planRandomGaussian() { SampleDuration: time.Duration(sampleSecs) * time.Second, } - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats) + metricsConfig := metrics.MetricsCollectorConfig{ + StatsEnabled: true, + CsvLogging: true, + ConsoleLogging: true, + StatsdLogging: false, + InstantMetrics: false, + AveragedMetrics: true, + Metadata: map[string]any{ + "shards": numShards, + "keys-per-shard": keysPerShard, + "read-workers": readWorkers, + "write-workers": writeWorkers, + "plan": "random-gausian"}, + } + metricsCollector := metrics.InitMetricsCollector(metricsConfig) + + pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) if err != nil { panic(err) } diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 6bea81c8..6ceb053b 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -16,6 +16,8 @@ import ( cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" + + metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" ) func planReadthroughGaussian() { @@ -86,6 +88,21 @@ func planReadthroughGaussian() { memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes) + metricsConfig := metrics.MetricsCollectorConfig{ + StatsEnabled: true, + CsvLogging: true, + ConsoleLogging: true, + StatsdLogging: true, + InstantMetrics: false, + AveragedMetrics: true, + Metadata: map[string]any{ + "shards": numShards, + "keys-per-shard": keysPerShard, + "read-workers": readWorkers, + "write-workers": writeWorkers, + "plan": "readthrough"}, + } + cfg := cachepkg.WrapCacheConfig{ NumShards: numShards, KeysPerShard: keysPerShard, @@ -94,22 +111,11 @@ func planReadthroughGaussian() { ReWriteScoreThreshold: 0.8, GridSearchEpsilon: 0.0001, SampleDuration: time.Duration(sampleSecs) * time.Second, - - // Pass the metrics collector to record cache metrics - MetricsRecorder: InitMetricsCollector(), } - // Set additional input parameters that the cache doesn't know about - metricsCollector.SetShards(numShards) - metricsCollector.SetKeysPerShard(keysPerShard) - metricsCollector.SetReadWorkers(readWorkers) - metricsCollector.SetWriteWorkers(writeWorkers) - metricsCollector.SetPlan("readthrough") - - // Start background goroutine to wait for shutdown signal and export CSV - go RunmetricsWaitForShutdown() + metricsCollector := metrics.InitMetricsCollector(metricsConfig) - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) if err != nil { panic(err) } diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go index ceb628cb..0b875b65 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go @@ -13,6 +13,7 @@ import ( "sync" "time" + metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -108,22 +109,25 @@ func planReadthroughGaussianBatched() { EnableBatching: enableBatching, BatchWindowMicros: batchWindowMicros, MaxBatchSize: maxBatchSize, - - // Pass the metrics collector to record cache metrics - MetricsRecorder: InitMetricsCollector(), } - // Set additional input parameters that the cache doesn't know about - metricsCollector.SetShards(numShards) - metricsCollector.SetKeysPerShard(keysPerShard) - metricsCollector.SetReadWorkers(readWorkers) - metricsCollector.SetWriteWorkers(writeWorkers) - metricsCollector.SetPlan("readthrough-batched") - - // Start background goroutine to wait for shutdown signal and export CSV - go RunmetricsWaitForShutdown() + metricsConfig := metrics.MetricsCollectorConfig{ + StatsEnabled: true, + CsvLogging: true, + ConsoleLogging: true, + StatsdLogging: false, + InstantMetrics: false, + AveragedMetrics: true, + Metadata: map[string]any{ + "shards": numShards, + "keys-per-shard": keysPerShard, + "read-workers": readWorkers, + "write-workers": writeWorkers, + "plan": "readthrough-batched"}, + } + metricsCollector := metrics.InitMetricsCollector(metricsConfig) - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, logStats) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) if err != nil { panic(err) } diff --git a/flashring/go.mod b/flashring/go.mod index f02d9663..288dd765 100644 --- a/flashring/go.mod +++ b/flashring/go.mod @@ -13,6 +13,22 @@ require ( ) require ( + github.com/Microsoft/go-winio v0.5.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect + github.com/sagikazarmark/locafero v0.11.0 // indirect + github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect + github.com/spf13/afero v1.15.0 // indirect + github.com/spf13/cast v1.10.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/text v0.28.0 // indirect +) + +require ( + github.com/DataDog/datadog-go/v5 v5.8.2 github.com/dgraph-io/badger/v4 v4.9.0 // indirect github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect @@ -23,6 +39,7 @@ require ( github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/spf13/viper v1.21.0 go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/otel v1.37.0 // indirect go.opentelemetry.io/otel/metric v1.37.0 // indirect diff --git a/flashring/go.sum b/flashring/go.sum index 6c22ab66..701011b5 100644 --- a/flashring/go.sum +++ b/flashring/go.sum @@ -1,21 +1,32 @@ +github.com/DataDog/datadog-go/v5 v5.8.2 h1:9IEfH1Mw9AjWwhAMqCAkhbxjuJeMxm2ARX2VdgL+ols= +github.com/DataDog/datadog-go/v5 v5.8.2/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= +github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU= +github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M= github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger/v4 v4.9.0 h1:tpqWb0NewSrCYqTvywbcXOhQdWcqephkVkbBmaaqHzc= github.com/dgraph-io/badger/v4 v4.9.0/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= @@ -33,10 +44,36 @@ github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APP github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= +github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= +github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= +github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= +github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= +github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= +github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= +github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= @@ -49,8 +86,25 @@ go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/Wgbsd go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -58,5 +112,19 @@ golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/flashring/internal/metrics/console_logger.go b/flashring/internal/metrics/console_logger.go new file mode 100644 index 00000000..fcc7f7ef --- /dev/null +++ b/flashring/internal/metrics/console_logger.go @@ -0,0 +1,44 @@ +package metrics + +import ( + "time" + + "github.com/rs/zerolog/log" +) + +func RunConsoleLogger(metricsCollector *MetricsCollector) { + + // start a ticker to log the metrics every 30 seconds + + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-metricsCollector.stopCh: + return + case <-ticker.C: + currentMetrics = metricsCollector.GetMetrics() + + rp99 := currentMetrics.AveragedMetrics.RP99.Milliseconds() + rp50 := currentMetrics.AveragedMetrics.RP50.Milliseconds() + rp25 := currentMetrics.AveragedMetrics.RP25.Milliseconds() + wp99 := currentMetrics.AveragedMetrics.WP99.Milliseconds() + wp50 := currentMetrics.AveragedMetrics.WP50.Milliseconds() + wp25 := currentMetrics.AveragedMetrics.WP25.Milliseconds() + + rThroughput := currentMetrics.AveragedMetrics.RThroughput + hitRate := currentMetrics.AveragedMetrics.HitRate + + log.Info().Msgf("RP99: %vms", rp99) + log.Info().Msgf("RP50: %vms", rp50) + log.Info().Msgf("RP25: %vms", rp25) + log.Info().Msgf("WP99: %vms", wp99) + log.Info().Msgf("WP50: %vms", wp50) + log.Info().Msgf("WP25: %vms", wp25) + log.Info().Msgf("RThroughput: %v", rThroughput) + log.Info().Msgf("HitRate: %v", hitRate) + + } + } +} diff --git a/flashring/internal/metrics/csv_logger.go b/flashring/internal/metrics/csv_logger.go new file mode 100644 index 00000000..d5ae6cd3 --- /dev/null +++ b/flashring/internal/metrics/csv_logger.go @@ -0,0 +1,170 @@ +package metrics + +import ( + "bufio" + "encoding/csv" + "fmt" + "log" + "os" + "os/signal" + "runtime" + "strconv" + "strings" + "syscall" + "time" +) + +// --- CSV Configuration --- +const CSVFileName = "performance_results.csv" + +// RunCSVLoggerWaitForShutdown waits for shutdown signal and logs final metrics to CSV +func RunCSVLoggerWaitForShutdown() { + // --- Set up Signal Handling --- + stopChan := make(chan os.Signal, 1) + signal.Notify(stopChan, syscall.SIGINT, syscall.SIGTERM) + + fmt.Println("Program running. Press Ctrl+C to stop and log results to CSV...") + + // --- Wait for Stop Signal --- + <-stopChan + fmt.Println("\nTermination signal received. Stopping work and logging results...") + + // Stop the metrics collector + if metricsCollector != nil { + metricsCollector.Stop() + + // Get final averaged metrics + currentMetrics = metricsCollector.GetMetrics() + } + + // --- Log Data to CSV --- + if err := LogResultsToCSV(metricsCollector.Config.Metadata); err != nil { + log.Fatalf("FATAL: Failed to log results to CSV: %v", err) + } + + fmt.Printf("Successfully logged results to %s.\n", CSVFileName) + + // Exit the program since we're running in a goroutine + os.Exit(0) +} + +func LogResultsToCSV(metadata map[string]any) error { + // 1. Check if the file exists to determine if we need a header row. + file, err := os.OpenFile(CSVFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return fmt.Errorf("failed to open CSV file: %w", err) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() // Crucial to ensure data is written to the file before exiting. + + // The list of all your column headers (per-shard metrics) + header := []string{ + "SHARDS", "KEYS_PER_SHARD", "READ_WORKERS", "WRITE_WORKERS", "PLAN", + "R_THROUGHPUT", "R_P99", "R_P50", "R_P25", "W_THROUGHPUT", "W_P99", "W_P50", "W_P25", + "HIT_RATE", "CPU", "MEMORY", "TIME", + } + + // Determine if the file is new (or empty) and needs the header + fileInfo, _ := file.Stat() + if fileInfo.Size() == 0 { + if err := writer.Write(header); err != nil { + return fmt.Errorf("error writing CSV header: %w", err) + } + } + + timestamp := time.Now().In(time.FixedZone("IST", 5*60*60+30*60)).Format("2006-01-02 15:04:05") + + dataRow := []string{ + // Input Parameters + strconv.Itoa(metadata["shards"].(int)), + strconv.Itoa(metadata["keys_per_shard"].(int)), + strconv.Itoa(metadata["read_workers"].(int)), + strconv.Itoa(metadata["write_workers"].(int)), + metadata["plan"].(string), + + // averaged observation parameters + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RThroughput), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP99), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP50), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP25), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WThroughput), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP99), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP50), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP25), + fmt.Sprintf("%v", currentMetrics.AveragedMetrics.HitRate), + fmt.Sprintf("%v", getCPUUsagePercent()), + fmt.Sprintf("%v", getMemoryUsageMB()), + timestamp, + } + + if err := writer.Write(dataRow); err != nil { + return fmt.Errorf("error writing CSV data row: %w", err) + } + + return nil +} + +// getMemoryUsageMB returns the current memory usage of this process in MB +func getMemoryUsageMB() float64 { + var m runtime.MemStats + runtime.ReadMemStats(&m) + // Alloc is bytes of allocated heap objects + return float64(m.Alloc) / 1024 / 1024 +} + +// getCPUUsagePercent returns the CPU usage percentage for this process +// It measures CPU usage over a short interval +func getCPUUsagePercent() float64 { + // Read initial CPU stats + idle1, total1 := getCPUStats() + time.Sleep(100 * time.Millisecond) + // Read CPU stats again + idle2, total2 := getCPUStats() + + idleDelta := float64(idle2 - idle1) + totalDelta := float64(total2 - total1) + + if totalDelta == 0 { + return 0 + } + + cpuUsage := (1.0 - idleDelta/totalDelta) * 100.0 + return cpuUsage +} + +// getCPUStats reads /proc/stat and returns idle and total CPU time +func getCPUStats() (idle, total uint64) { + file, err := os.Open("/proc/stat") + if err != nil { + return 0, 0 + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if strings.HasPrefix(line, "cpu ") { + fields := strings.Fields(line) + if len(fields) < 5 { + return 0, 0 + } + // fields: cpu user nice system idle iowait irq softirq steal guest guest_nice + var values []uint64 + for _, field := range fields[1:] { + val, err := strconv.ParseUint(field, 10, 64) + if err != nil { + continue + } + values = append(values, val) + total += val + } + if len(values) >= 4 { + idle = values[3] // idle is the 4th value + } + break + } + } + return idle, total +} diff --git a/flashring/internal/metrics/metric.go b/flashring/internal/metrics/metric.go new file mode 100644 index 00000000..263a4e69 --- /dev/null +++ b/flashring/internal/metrics/metric.go @@ -0,0 +1,109 @@ +package metrics + +import ( + "sync" + "time" + + "github.com/DataDog/datadog-go/v5/statsd" + "github.com/rs/zerolog/log" + "github.com/spf13/viper" +) + +const ( + ApiRequestCount = "api_request_count" + ApiRequestLatency = "api_request_latency" + ExternalApiRequestCount = "external_api_request_count" + ExternalApiRequestLatency = "external_api_request_latency" + DBCallLatency = "db_call_latency" + DBCallCount = "db_call_count" + MethodLatency = "method_latency" + MethodCount = "method_count" +) + +var ( + // it is safe to use one client from multiple goroutines simultaneously + statsDClient = getDefaultClient() + // by default full sampling + samplingRate = 0.0 + telegrafAddress = "localhost:8125" + appName = "" + initialized = false + once sync.Once +) + +// Init initializes the metrics client +func Init() { + if initialized { + log.Debug().Msgf("Metrics already initialized!") + return + } + once.Do(func() { + var err error + samplingRate = viper.GetFloat64("APP_METRIC_SAMPLING_RATE") + appName = viper.GetString("APP_NAME") + globalTags := getGlobalTags() + + statsDClient, err = statsd.New( + telegrafAddress, + statsd.WithTags(globalTags), + ) + + if err != nil { + log.Panic().AnErr("StatsD client initialization failed", err) + } + log.Info().Msgf("Metrics client initialized with telegraf address - %s, global tags - %v, and "+ + "sampling rate - %f", telegrafAddress, globalTags, samplingRate) + initialized = true + }) +} + +func getDefaultClient() *statsd.Client { + client, _ := statsd.New("localhost:8125") + return client +} + +func getGlobalTags() []string { + env := viper.GetString("APP_ENV") + if len(env) == 0 { + log.Warn().Msg("APP_ENV is not set") + } + service := viper.GetString("APP_NAME") + if len(service) == 0 { + log.Warn().Msg("APP_NAME is not set") + } + return []string{ + TagAsString(TagEnv, env), + TagAsString(TagService, service), + } +} + +// Timing sends timing information +func Timing(name string, value time.Duration, tags []string) { + tags = append(tags, TagAsString(TagService, appName)) + err := statsDClient.Timing(name, value, tags, samplingRate) + if err != nil { + log.Warn().AnErr("Error occurred while doing statsd timing", err) + } +} + +// Count Increases metric counter by value +func Count(name string, value int64, tags []string) { + tags = append(tags, TagAsString(TagService, appName)) + err := statsDClient.Count(name, value, tags, samplingRate) + if err != nil { + log.Warn().AnErr("Error occurred while doing statsd count", err) + } +} + +// Incr Increases metric counter by 1 +func Incr(name string, tags []string) { + Count(name, 1, tags) +} + +func Gauge(name string, value float64, tags []string) { + tags = append(tags, TagAsString(TagService, appName)) + err := statsDClient.Gauge(name, value, tags, samplingRate) + if err != nil { + log.Warn().AnErr("Error occurred while doing statsd gauge", err) + } +} diff --git a/flashring/internal/metrics/metrics_averager.go b/flashring/internal/metrics/metrics_averager.go new file mode 100644 index 00000000..f5346ced --- /dev/null +++ b/flashring/internal/metrics/metrics_averager.go @@ -0,0 +1,57 @@ +package metrics + +import ( + "sync" + "time" +) + +// MetricAverager maintains running averages for a metric +type MetricAverager struct { + mu sync.RWMutex + sum float64 + count int64 + lastValue float64 +} + +func (ma *MetricAverager) Add(value float64) { + if value == 0 { + return // Ignore zero values + } + ma.mu.Lock() + defer ma.mu.Unlock() + ma.sum += value + ma.count++ + ma.lastValue = value +} + +func (ma *MetricAverager) AddDuration(value time.Duration) { + if value == 0 { + return // Ignore zero values + } + ma.mu.Lock() + defer ma.mu.Unlock() + ma.sum += float64(value) + ma.count++ +} + +func (ma *MetricAverager) Average() float64 { + ma.mu.RLock() + defer ma.mu.RUnlock() + if ma.count == 0 { + return 0 + } + return ma.sum / float64(ma.count) +} + +func (ma *MetricAverager) Latest() float64 { + ma.mu.RLock() + defer ma.mu.RUnlock() + return ma.lastValue +} + +func (ma *MetricAverager) Reset() { + ma.mu.Lock() + defer ma.mu.Unlock() + ma.sum = 0 + ma.count = 0 +} diff --git a/flashring/internal/metrics/runmetrics.go b/flashring/internal/metrics/runmetrics.go new file mode 100644 index 00000000..238ddce7 --- /dev/null +++ b/flashring/internal/metrics/runmetrics.go @@ -0,0 +1,350 @@ +package metrics + +import ( + "sync" + "time" +) + +// Global variable to hold runtime data +var currentMetrics RunMetrics +var metricsCollector *MetricsCollector + +// MetricsRecorder is an interface for recording metrics from the cache +// Implement this interface to receive per-shard metrics from the cache layer +type MetricsRecorder interface { + + // Per-shard observation metrics + RecordRP99(shardIdx int, value time.Duration) + RecordRP50(shardIdx int, value time.Duration) + RecordRP25(shardIdx int, value time.Duration) + RecordWP99(shardIdx int, value time.Duration) + RecordWP50(shardIdx int, value time.Duration) + RecordWP25(shardIdx int, value time.Duration) + RecordRThroughput(shardIdx int, value float64) + RecordWThroughput(shardIdx int, value float64) + RecordHitRate(shardIdx int, value float64) +} + +type MetricsCollectorConfig struct { + StatsEnabled bool //Stats enabled - global flag + + CsvLogging bool //Log to CSV enabled + ConsoleLogging bool //Log to console enabled + StatsdLogging bool //Log to Statsd enabled + + InstantMetrics bool //Metrics at every instant + AveragedMetrics bool //Metrics averaged over a period of time + + // Metadata for external systems to use + // must include shards, keys_per_shard, read_workers, write_workers, plan + Metadata map[string]any +} + +// ShardMetrics holds observation metrics for a single shard +type ShardMetrics struct { + RP99 time.Duration + RP50 time.Duration + RP25 time.Duration + WP99 time.Duration + WP50 time.Duration + WP25 time.Duration + RThroughput float64 + WThroughput float64 + HitRate float64 +} + +// Define your parameter structure +type RunMetrics struct { + // Per-shard observation parameters + ShardMetrics []ShardMetrics + + // Averaged metrics over all shards + AveragedMetrics ShardMetrics +} + +// ShardMetricValue represents a metric value for a specific shard +type ShardMetricValue struct { + ShardIdx int + Value float64 +} + +// ShardDurationValue represents a duration metric value for a specific shard +type ShardDurationValue struct { + ShardIdx int + Value time.Duration +} + +// MetricChannels holds separate channels for each metric type (per-shard) +type MetricChannels struct { + RP99 chan ShardDurationValue + RP50 chan ShardDurationValue + RP25 chan ShardDurationValue + WP99 chan ShardDurationValue + WP50 chan ShardDurationValue + WP25 chan ShardDurationValue + RThroughput chan ShardMetricValue + WThroughput chan ShardMetricValue + HitRate chan ShardMetricValue +} + +// MetricsCollector collects and averages all metrics (per-shard) +type MetricsCollector struct { + Config MetricsCollectorConfig + channels MetricChannels //channels for each metric type (per-shard) + averagedMetrics map[string]*MetricAverager // metricName -> averager + instantMetrics map[int]map[string]*MetricAverager // shardIdx -> metricName -> averager + stopCh chan struct{} //channel to stop the collector when running from console + wg sync.WaitGroup + mu sync.RWMutex +} + +// InitMetricsCollector creates and starts the metrics collector, returning it +// so it can be passed to other components (e.g., cache config) +func InitMetricsCollector(config MetricsCollectorConfig) *MetricsCollector { + Init() + metricsCollector = NewMetricsCollector(config, 100) + + shouldLog := config.StatsEnabled && (config.CsvLogging || config.ConsoleLogging || config.StatsdLogging) + + if shouldLog { + metricsCollector.Start() + } + + if config.CsvLogging { + go RunCSVLoggerWaitForShutdown() + } + + if config.StatsdLogging { + go RunStatsdLogger(metricsCollector) + } + + if config.ConsoleLogging { + go RunConsoleLogger(metricsCollector) + } + + return metricsCollector +} + +// NewMetricsCollector creates a new metrics collector with channels +func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *MetricsCollector { + mc := &MetricsCollector{ + Config: config, + channels: MetricChannels{ + RP99: make(chan ShardDurationValue, bufferSize), + RP50: make(chan ShardDurationValue, bufferSize), + RP25: make(chan ShardDurationValue, bufferSize), + WP99: make(chan ShardDurationValue, bufferSize), + WP50: make(chan ShardDurationValue, bufferSize), + WP25: make(chan ShardDurationValue, bufferSize), + RThroughput: make(chan ShardMetricValue, bufferSize), + WThroughput: make(chan ShardMetricValue, bufferSize), + HitRate: make(chan ShardMetricValue, bufferSize), + }, + averagedMetrics: make(map[string]*MetricAverager), + instantMetrics: make(map[int]map[string]*MetricAverager), + stopCh: make(chan struct{}), + } + + // Initialize averagedMetrics with MetricAverager instances + metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "RThroughput", "WThroughput", "HitRate"} + for _, name := range metricNames { + mc.averagedMetrics[name] = &MetricAverager{} + } + + // Initialize instantMetrics for each shard with MetricAverager instances + shards := config.Metadata["shards"].(int) + for shardIdx := 0; shardIdx < shards; shardIdx++ { + mc.instantMetrics[shardIdx] = make(map[string]*MetricAverager) + for _, name := range metricNames { + mc.instantMetrics[shardIdx][name] = &MetricAverager{} + } + } + + return mc +} + +// Start begins collecting metrics from all channels +func (mc *MetricsCollector) Start() { + // Start a goroutine for each metric channel + mc.wg.Add(9) + + go mc.collectShardDuration(mc.channels.RP99, "RP99") + go mc.collectShardDuration(mc.channels.RP50, "RP50") + go mc.collectShardDuration(mc.channels.RP25, "RP25") + go mc.collectShardDuration(mc.channels.WP99, "WP99") + go mc.collectShardDuration(mc.channels.WP50, "WP50") + go mc.collectShardDuration(mc.channels.WP25, "WP25") + go mc.collectShardMetric(mc.channels.RThroughput, "RThroughput") + go mc.collectShardMetric(mc.channels.WThroughput, "WThroughput") + go mc.collectShardMetric(mc.channels.HitRate, "HitRate") +} + +func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { + defer mc.wg.Done() + for { + select { + case <-mc.stopCh: + return + case sv, ok := <-ch: + if !ok { + return + } + instants := mc.instantMetrics[sv.ShardIdx] + instants[name].Add(sv.Value) + mc.averagedMetrics[name].Add(sv.Value) + + } + } +} + +func (mc *MetricsCollector) collectShardDuration(ch chan ShardDurationValue, name string) { + defer mc.wg.Done() + for { + select { + case <-mc.stopCh: + return + case sv, ok := <-ch: + if !ok { + return + } + instants := mc.instantMetrics[sv.ShardIdx] + instants[name].AddDuration(sv.Value) + mc.averagedMetrics[name].AddDuration(sv.Value) + } + } +} + +// RecordRP99 sends a value to the RP99 channel for a specific shard +func (mc *MetricsCollector) RecordRP99(shardIdx int, value time.Duration) { + select { + case mc.channels.RP99 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: // Don't block if channel is full + } +} + +// RecordRP50 sends a value to the RP50 channel for a specific shard +func (mc *MetricsCollector) RecordRP50(shardIdx int, value time.Duration) { + select { + case mc.channels.RP50 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordRP25 sends a value to the RP25 channel for a specific shard +func (mc *MetricsCollector) RecordRP25(shardIdx int, value time.Duration) { + select { + case mc.channels.RP25 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordWP99 sends a value to the WP99 channel for a specific shard +func (mc *MetricsCollector) RecordWP99(shardIdx int, value time.Duration) { + select { + case mc.channels.WP99 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordWP50 sends a value to the WP50 channel for a specific shard +func (mc *MetricsCollector) RecordWP50(shardIdx int, value time.Duration) { + select { + case mc.channels.WP50 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordWP25 sends a value to the WP25 channel for a specific shard +func (mc *MetricsCollector) RecordWP25(shardIdx int, value time.Duration) { + select { + case mc.channels.WP25 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordRThroughput sends a value to the RThroughput channel for a specific shard +func (mc *MetricsCollector) RecordRThroughput(shardIdx int, value float64) { + select { + case mc.channels.RThroughput <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordWThroughput sends a value to the WThroughput channel for a specific shard +func (mc *MetricsCollector) RecordWThroughput(shardIdx int, value float64) { + select { + case mc.channels.WThroughput <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +// RecordHitRate sends a value to the HitRate channel for a specific shard +func (mc *MetricsCollector) RecordHitRate(shardIdx int, value float64) { + select { + case mc.channels.HitRate <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + +func (mc *MetricsCollector) GetMetrics() RunMetrics { + mc.mu.RLock() + defer mc.mu.RUnlock() + + shards := mc.Config.Metadata["shards"].(int) + + // Build per-shard metrics + shardMetrics := make([]ShardMetrics, shards) + for shardIdx := 0; shardIdx < shards; shardIdx++ { + if instants, exists := mc.instantMetrics[shardIdx]; exists { + shardMetrics[shardIdx] = ShardMetrics{ + RP99: time.Duration(instants["RP99"].Latest()), + RP50: time.Duration(instants["RP50"].Latest()), + RP25: time.Duration(instants["RP25"].Latest()), + WP99: time.Duration(instants["WP99"].Latest()), + WP50: time.Duration(instants["WP50"].Latest()), + WP25: time.Duration(instants["WP25"].Latest()), + RThroughput: instants["RThroughput"].Latest(), + WThroughput: instants["WThroughput"].Latest(), + HitRate: instants["HitRate"].Latest(), + } + } + } + + averagedMetrics := ShardMetrics{ + RP99: time.Duration(mc.averagedMetrics["RP99"].Average()), + RP50: time.Duration(mc.averagedMetrics["RP50"].Average()), + RP25: time.Duration(mc.averagedMetrics["RP25"].Average()), + WP99: time.Duration(mc.averagedMetrics["WP99"].Average()), + WP50: time.Duration(mc.averagedMetrics["WP50"].Average()), + WP25: time.Duration(mc.averagedMetrics["WP25"].Average()), + RThroughput: mc.averagedMetrics["RThroughput"].Average(), + WThroughput: mc.averagedMetrics["WThroughput"].Average(), + HitRate: mc.averagedMetrics["HitRate"].Average(), + } + + return RunMetrics{ + ShardMetrics: shardMetrics, + AveragedMetrics: averagedMetrics, + } +} + +// ResetAverages resets all averagers to start fresh +func (mc *MetricsCollector) ResetAverages() { + mc.mu.Lock() + defer mc.mu.Unlock() + + for _, shardInstant := range mc.instantMetrics { + for _, instantMetric := range shardInstant { + instantMetric.Reset() // Reset the instant metric + } + } + for _, averagedMetric := range mc.averagedMetrics { + averagedMetric.Reset() // Reset the averaged metric + } +} + +// Stop stops all collector goroutines +func (mc *MetricsCollector) Stop() { + close(mc.stopCh) + mc.wg.Wait() +} diff --git a/flashring/internal/metrics/statsd_logger.go b/flashring/internal/metrics/statsd_logger.go new file mode 100644 index 00000000..28d741db --- /dev/null +++ b/flashring/internal/metrics/statsd_logger.go @@ -0,0 +1,55 @@ +package metrics + +import ( + "strconv" + "time" +) + +const ( + KEY_READ_LATENCY = "flashringread_latency" + KEY_WRITE_LATENCY = "flashringwrite_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + + TAG_LATENCY_PERCENTILE = "latency_percentile" + TAG_VALUE_P25 = "p25" + TAG_VALUE_P50 = "p50" + TAG_VALUE_P99 = "p99" + + TAG_SHARD_IDX = "shard_idx" +) + +func RunStatsdLogger(metricsCollector *MetricsCollector) { + + // start a ticker to log the metrics every 30 seconds + + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-metricsCollector.stopCh: + return + case <-ticker.C: + currentMetrics = metricsCollector.GetMetrics() + + for idx, shard := range currentMetrics.ShardMetrics { + + shardIdx := strconv.Itoa(idx) + + Timing(KEY_READ_LATENCY, shard.RP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), NewTag(TAG_SHARD_IDX, shardIdx))) + Timing(KEY_READ_LATENCY, shard.RP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), NewTag(TAG_SHARD_IDX, shardIdx))) + Timing(KEY_READ_LATENCY, shard.RP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), NewTag(TAG_SHARD_IDX, shardIdx))) + Timing(KEY_WRITE_LATENCY, shard.WP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), NewTag(TAG_SHARD_IDX, shardIdx))) + Timing(KEY_WRITE_LATENCY, shard.WP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), NewTag(TAG_SHARD_IDX, shardIdx))) + Timing(KEY_WRITE_LATENCY, shard.WP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), NewTag(TAG_SHARD_IDX, shardIdx))) + Gauge(KEY_RTHROUGHPUT, shard.RThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) + Gauge(KEY_WTHROUGHPUT, shard.WThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) + Gauge(KEY_HITRATE, shard.HitRate, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) + } + + } + + } +} diff --git a/flashring/internal/metrics/tag.go b/flashring/internal/metrics/tag.go new file mode 100644 index 00000000..d77ac38e --- /dev/null +++ b/flashring/internal/metrics/tag.go @@ -0,0 +1,55 @@ +package metrics + +// Tag constants +const ( + TagEnv = "env" + TagService = "service" + TagPath = "path" + TagMethod = "method" + TagHttpStatusCode = "http_status_code" + TagGrpcStatusCode = "grpc_status_code" + TagExternalService = "external_service" + TagExternalServicePath = "external_service_path" + TagExternalServiceMethod = "external_service_method" + TagExternalServiceStatusCode = "external_service_status_code" + TagZkRealtimeTotalUpdateEvent = "zk_realtime_total_update_event" + TagZkRealtimeFailureEvent = "zk_realtime_failure_event" + TagZkRealtimeSuccessEvent = "zk_realtime_success_event" + TagZkRealtimeEventUpdateLatency = "zk_realtime_event_update_latency" + TagCommunicationProtocol = "communication_protocol" + TagUserContext = "user_context" + + TagValueCommunicationProtocolHttp = "http" + TagValueCommunicationProtocolGrpc = "grpc" +) + +type Tag struct { + Name string + Value string +} + +func NewTag(name, value string) Tag { + return Tag{ + Name: name, + Value: value, + } +} + +// BuildTag builds a tag from the given name and value +func BuildTag(tags ...Tag) []string { + allTags := make([]string, 0) + for _, tag := range tags { + allTags = append(allTags, TagAsString(tag.Name, tag.Value)) + } + return allTags +} + +func TagAsString(name string, value string) string { + return name + ":" + value +} + +func UpdateTags(tags *[]string, newTags ...Tag) { + for _, tag := range newTags { + *tags = append(*tags, TagAsString(tag.Name, tag.Value)) + } +} diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index e2094524..27468542 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -11,6 +11,8 @@ import ( filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard" "github.com/cespare/xxhash/v2" "github.com/rs/zerolog/log" + + metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" ) /* @@ -37,11 +39,11 @@ var ( ) type WrapCache struct { - shards []*filecache.ShardCache - shardLocks []sync.RWMutex - predictor *maths.Predictor - stats []*CacheStats - metricsRecorder MetricsRecorder + shards []*filecache.ShardCache + shardLocks []sync.RWMutex + predictor *maths.Predictor + stats []*CacheStats + metricsCollector *metrics.MetricsCollector } type CacheStats struct { @@ -55,28 +57,6 @@ type CacheStats struct { BatchTracker *filecache.BatchTracker } -// MetricsRecorder is an interface for recording metrics from the cache -// Implement this interface to receive metrics from the cache layer -type MetricsRecorder interface { - // Input parameters - SetShards(value int) - SetKeysPerShard(value int) - SetReadWorkers(value int) - SetWriteWorkers(value int) - SetPlan(value string) - - // Observation metrics - RecordRP99(value time.Duration) - RecordRP50(value time.Duration) - RecordRP25(value time.Duration) - RecordWP99(value time.Duration) - RecordWP50(value time.Duration) - RecordWP25(value time.Duration) - RecordRThroughput(value float64) - RecordWThroughput(value float64) - RecordHitRate(value float64) -} - type WrapCacheConfig struct { NumShards int KeysPerShard int @@ -92,13 +72,13 @@ type WrapCacheConfig struct { MaxBatchSize int // Optional metrics recorder - MetricsRecorder MetricsRecorder + MetricsRecorder metrics.MetricsRecorder //Badger MountPoint string } -func NewWrapCache(config WrapCacheConfig, mountPoint string, logStats bool) (*WrapCache, error) { +func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *metrics.MetricsCollector) (*WrapCache, error) { if config.NumShards <= 0 { return nil, ErrNumShardLessThan1 } @@ -200,109 +180,74 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, logStats bool) (*Wr stats[i] = &CacheStats{LatencyTracker: filecache.NewLatencyTracker(), BatchTracker: filecache.NewBatchTracker()} } wc := &WrapCache{ - shards: shards, - shardLocks: shardLocks, - predictor: predictor, - stats: stats, - metricsRecorder: config.MetricsRecorder, + shards: shards, + shardLocks: shardLocks, + predictor: predictor, + stats: stats, + metricsCollector: metricsCollector, } - if logStats { + + if metricsCollector.Config.StatsEnabled { go func() { sleepDuration := 10 * time.Second - // perShardPrevTotalGets := make([]uint64, config.NumShards) - // perShardPrevTotalPuts := make([]uint64, config.NumShards) - combinedPrevTotalGets := uint64(0) - combinedPrevTotalPuts := uint64(0) + perShardPrevTotalGets := make([]uint64, config.NumShards) + perShardPrevTotalPuts := make([]uint64, config.NumShards) + for { time.Sleep(sleepDuration) - combinedTotalGets := uint64(0) - combinedTotalPuts := uint64(0) - combinedHits := uint64(0) - combinedReWrites := uint64(0) - combinedExpired := uint64(0) - combinedShardWiseActiveEntries := uint64(0) - for i := 0; i < config.NumShards; i++ { - combinedTotalGets += wc.stats[i].TotalGets.Load() - combinedTotalPuts += wc.stats[i].TotalPuts.Load() - combinedHits += wc.stats[i].Hits.Load() - combinedReWrites += wc.stats[i].ReWrites.Load() - combinedExpired += wc.stats[i].Expired.Load() - combinedShardWiseActiveEntries += wc.stats[i].ShardWiseActiveEntries.Load() - } - - combinedHitRate := float64(0) - if combinedTotalGets > 0 { - combinedHitRate = float64(combinedHits) / float64(combinedTotalGets) - } - - log.Info().Msgf("Combined HitRate: %v", combinedHitRate) - log.Info().Msgf("Combined ReWrites: %v", combinedReWrites) - log.Info().Msgf("Combined Expired: %v", combinedExpired) - log.Info().Msgf("Combined Total: %v", combinedTotalGets) - log.Info().Msgf("Combined Puts/sec: %v", float64(combinedTotalPuts-combinedPrevTotalPuts)/float64(sleepDuration.Seconds())) - log.Info().Msgf("Combined Gets/sec: %v", float64(combinedTotalGets-combinedPrevTotalGets)/float64(sleepDuration.Seconds())) - log.Info().Msgf("Combined ShardWiseActiveEntries: %v", combinedShardWiseActiveEntries) - - combinedGetP25, combinedGetP50, combinedGetP99 := wc.stats[0].LatencyTracker.GetLatencyPercentiles() - combinedPutP25, combinedPutP50, combinedPutP99 := wc.stats[0].LatencyTracker.PutLatencyPercentiles() - - log.Info().Msgf("Combined Get Count: %v", combinedTotalGets) - log.Info().Msgf("Combined Put Count: %v", combinedTotalPuts) - log.Info().Msgf("Combined Get Latencies - P25: %v, P50: %v, P99: %v", combinedGetP25, combinedGetP50, combinedGetP99) - log.Info().Msgf("Combined Put Latencies - P25: %v, P50: %v, P99: %v", combinedPutP25, combinedPutP50, combinedPutP99) - - combinedGetBatchP25, combinedGetBatchP50, combinedGetBatchP99 := wc.shards[0].Stats.BatchTracker.GetBatchSizePercentiles() - log.Info().Msgf("Combined Get Batch Sizes - P25: %v, P50: %v, P99: %v", combinedGetBatchP25, combinedGetBatchP50, combinedGetBatchP99) - - // Send metrics to the recorder if configured - if wc.metricsRecorder != nil { - rThroughput := float64(combinedTotalGets-combinedPrevTotalGets) / sleepDuration.Seconds() - wThroughput := float64(combinedTotalPuts-combinedPrevTotalPuts) / sleepDuration.Seconds() - - wc.metricsRecorder.RecordRP25(combinedGetP25) - wc.metricsRecorder.RecordRP50(combinedGetP50) - wc.metricsRecorder.RecordRP99(combinedGetP99) - wc.metricsRecorder.RecordWP25(combinedPutP25) - wc.metricsRecorder.RecordWP50(combinedPutP50) - wc.metricsRecorder.RecordWP99(combinedPutP99) - wc.metricsRecorder.RecordRThroughput(rThroughput) - wc.metricsRecorder.RecordWThroughput(wThroughput) - wc.metricsRecorder.RecordHitRate(combinedHitRate) - } - - combinedPrevTotalGets = combinedTotalGets - combinedPrevTotalPuts = combinedTotalPuts - - /* disabling per shard stats for now for i := 0; i < config.NumShards; i++ { log.Info().Msgf("Shard %d has %d active entries", i, wc.stats[i].ShardWiseActiveEntries.Load()) total := wc.stats[i].TotalGets.Load() - hits := wc.stats[i].Hits.Load() - hitRate := float64(0) - if total > 0 { - hitRate = float64(hits) / float64(total) - } - log.Info().Msgf("Shard %d HitRate: %v", i, hitRate) - log.Info().Msgf("Shard %d ReWrites: %v", i, wc.stats[i].ReWrites.Load()) - log.Info().Msgf("Shard %d Expired: %v", i, wc.stats[i].Expired.Load()) - log.Info().Msgf("Shard %d Total: %v", i, total) - log.Info().Msgf("Gets/sec: %v", float64(total-perShardPrevTotalGets[i])/float64(sleepDuration.Seconds())) - log.Info().Msgf("Puts/sec: %v", float64(wc.stats[i].TotalPuts.Load()-perShardPrevTotalPuts[i])/float64(sleepDuration.Seconds())) + // hits := wc.stats[i].Hits.Load() + // hitRate := float64(0) + // if total > 0 { + // hitRate = float64(hits) / float64(total) + // } + // log.Info().Msgf("Shard %d HitRate: %v", i, hitRate) + // log.Info().Msgf("Shard %d ReWrites: %v", i, wc.stats[i].ReWrites.Load()) + // log.Info().Msgf("Shard %d Expired: %v", i, wc.stats[i].Expired.Load()) + // log.Info().Msgf("Shard %d Total: %v", i, total) + // log.Info().Msgf("Gets/sec: %v", float64(total-perShardPrevTotalGets[i])/float64(sleepDuration.Seconds())) + // log.Info().Msgf("Puts/sec: %v", float64(wc.stats[i].TotalPuts.Load()-perShardPrevTotalPuts[i])/float64(sleepDuration.Seconds())) perShardPrevTotalGets[i] = total perShardPrevTotalPuts[i] = wc.stats[i].TotalPuts.Load() getP25, getP50, getP99 := wc.stats[i].LatencyTracker.GetLatencyPercentiles() putP25, putP50, putP99 := wc.stats[i].LatencyTracker.PutLatencyPercentiles() - log.Info().Msgf("Get Count: %v", wc.stats[i].TotalGets.Load()) - log.Info().Msgf("Put Count: %v", wc.stats[i].TotalPuts.Load()) - log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99) - log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99) + // log.Info().Msgf("Get Count: %v", wc.stats[i].TotalGets.Load()) + // log.Info().Msgf("Put Count: %v", wc.stats[i].TotalPuts.Load()) + // log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99) + // log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99) + + shardGets := wc.stats[i].TotalGets.Load() + shardPuts := wc.stats[i].TotalPuts.Load() + shardHits := wc.stats[i].Hits.Load() + + // Calculate per-shard throughput + rThroughput := float64(shardGets) / sleepDuration.Seconds() + wThroughput := float64(shardPuts) / sleepDuration.Seconds() + + // Calculate per-shard hit rate + shardHitRate := float64(0) + if shardGets > 0 { + shardHitRate = float64(shardHits) / float64(shardGets) + } + + wc.metricsCollector.RecordRP25(i, getP25) + wc.metricsCollector.RecordRP50(i, getP50) + wc.metricsCollector.RecordRP99(i, getP99) + wc.metricsCollector.RecordWP25(i, putP25) + wc.metricsCollector.RecordWP50(i, putP50) + wc.metricsCollector.RecordWP99(i, putP99) + wc.metricsCollector.RecordRThroughput(i, rThroughput) + wc.metricsCollector.RecordWThroughput(i, wThroughput) + wc.metricsCollector.RecordHitRate(i, shardHitRate) } - */ + log.Info().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) } }() From 0d014c49b1a01607fd8063051c460153c0757c3c Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Mon, 12 Jan 2026 13:37:21 +0000 Subject: [PATCH 06/53] move metrics to pkg --- flashring/cmd/flashringtest/plan_lockless.go | 7 +- .../cmd/flashringtest/plan_random_gausian.go | 2 +- .../flashringtest/plan_readthrough_gausian.go | 2 +- .../plan_readthrough_gausian_batched.go | 2 +- flashring/cmd/flashringtest/runmetrics.go | 515 ------------------ flashring/internal/metrics/console_logger.go | 44 -- flashring/pkg/cache/cache.go | 29 +- flashring/pkg/metrics/console_logger.go | 47 ++ .../{internal => pkg}/metrics/csv_logger.go | 0 flashring/{internal => pkg}/metrics/metric.go | 0 .../metrics/metrics_averager.go | 7 +- .../{internal => pkg}/metrics/runmetrics.go | 108 ++-- .../metrics/statsd_logger.go | 12 +- flashring/{internal => pkg}/metrics/tag.go | 0 14 files changed, 135 insertions(+), 640 deletions(-) delete mode 100644 flashring/cmd/flashringtest/runmetrics.go delete mode 100644 flashring/internal/metrics/console_logger.go create mode 100644 flashring/pkg/metrics/console_logger.go rename flashring/{internal => pkg}/metrics/csv_logger.go (100%) rename flashring/{internal => pkg}/metrics/metric.go (100%) rename flashring/{internal => pkg}/metrics/metrics_averager.go (89%) rename flashring/{internal => pkg}/metrics/runmetrics.go (75%) rename flashring/{internal => pkg}/metrics/statsd_logger.go (82%) rename flashring/{internal => pkg}/metrics/tag.go (100%) diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index 100ebf01..3203db23 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -13,8 +13,8 @@ import ( "sync" "time" - metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" + metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -37,7 +37,7 @@ func planLockless() { ) flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme", "data directory for shard files") - flag.IntVar(&numShards, "shards", 500, "number of shards") + flag.IntVar(&numShards, "shards", 100, "number of shards") flag.IntVar(&keysPerShard, "keys-per-shard", 10_00_00, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard") @@ -95,6 +95,9 @@ func planLockless() { ReWriteScoreThreshold: 0.8, GridSearchEpsilon: 0.0001, SampleDuration: time.Duration(sampleSecs) * time.Second, + + //lockless mode for PutLL/GetLL + EnableLockless: true, } metricsConfig := metrics.MetricsCollectorConfig{ diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go index 1d9477c3..ffa493a6 100644 --- a/flashring/cmd/flashringtest/plan_random_gausian.go +++ b/flashring/cmd/flashringtest/plan_random_gausian.go @@ -12,8 +12,8 @@ import ( "sync" "time" - metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" + metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 6ceb053b..29885572 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -17,7 +17,7 @@ import ( "github.com/rs/zerolog" "github.com/rs/zerolog/log" - metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" + metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) func planReadthroughGaussian() { diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go index 0b875b65..21e6c0d2 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go @@ -13,8 +13,8 @@ import ( "sync" "time" - metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" + metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) diff --git a/flashring/cmd/flashringtest/runmetrics.go b/flashring/cmd/flashringtest/runmetrics.go deleted file mode 100644 index 5e1aabec..00000000 --- a/flashring/cmd/flashringtest/runmetrics.go +++ /dev/null @@ -1,515 +0,0 @@ -package main - -import ( - "bufio" - "encoding/csv" - "fmt" - "log" - "os" - "os/signal" - "runtime" - "strconv" - "strings" - "sync" - "syscall" - "time" -) - -// Define your parameter structure -type RunMetrics struct { - // Input Parameters - Shards int - KeysPerShard int - ReadWorkers int - WriteWorkers int - Plan string - - // Observation Parameters - RP99 time.Duration - RP50 time.Duration - RP25 time.Duration - WP99 time.Duration - WP50 time.Duration - WP25 time.Duration - RThroughput float64 - WThroughput float64 - HitRate float64 - CPUUsage float64 - MemoryUsage float64 -} - -// MetricChannels holds separate channels for each metric type -type MetricChannels struct { - RP99 chan time.Duration - RP50 chan time.Duration - RP25 chan time.Duration - WP99 chan time.Duration - WP50 chan time.Duration - WP25 chan time.Duration - RThroughput chan float64 - WThroughput chan float64 - HitRate chan float64 - CPUUsage chan float64 - MemoryUsage chan float64 -} - -// MetricAverager maintains running averages for a metric -type MetricAverager struct { - mu sync.RWMutex - sum float64 - count int64 - lastValue float64 -} - -func (ma *MetricAverager) Add(value float64) { - if value == 0 { - return // Ignore zero values - } - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum += value - ma.count++ - ma.lastValue = value -} - -func (ma *MetricAverager) AddDuration(value time.Duration) { - if value == 0 { - return // Ignore zero values - } - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum += float64(value) - ma.count++ -} - -func (ma *MetricAverager) Average() float64 { - ma.mu.RLock() - defer ma.mu.RUnlock() - if ma.count == 0 { - return 0 - } - return ma.sum / float64(ma.count) -} - -func (ma *MetricAverager) Latest() float64 { - ma.mu.RLock() - defer ma.mu.RUnlock() - return ma.lastValue -} - -func (ma *MetricAverager) Reset() { - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum = 0 - ma.count = 0 -} - -// MetricsCollector collects and averages all metrics -type MetricsCollector struct { - channels MetricChannels - averagers map[string]*MetricAverager - stopCh chan struct{} - wg sync.WaitGroup - - // Input parameters (set once) - Shards int - KeysPerShard int - ReadWorkers int - WriteWorkers int - Plan string -} - -// NewMetricsCollector creates a new metrics collector with channels -func NewMetricsCollector(bufferSize int) *MetricsCollector { - mc := &MetricsCollector{ - channels: MetricChannels{ - RP99: make(chan time.Duration, bufferSize), - RP50: make(chan time.Duration, bufferSize), - RP25: make(chan time.Duration, bufferSize), - WP99: make(chan time.Duration, bufferSize), - WP50: make(chan time.Duration, bufferSize), - WP25: make(chan time.Duration, bufferSize), - RThroughput: make(chan float64, bufferSize), - WThroughput: make(chan float64, bufferSize), - HitRate: make(chan float64, bufferSize), - CPUUsage: make(chan float64, bufferSize), - MemoryUsage: make(chan float64, bufferSize), - }, - averagers: make(map[string]*MetricAverager), - stopCh: make(chan struct{}), - } - - // Initialize averagers for each metric - metricNames := []string{"RThroughput", "RP99", "RP50", "RP25", "WThroughput", "WP99", "WP50", "WP25", "HitRate", "CPUUsage", "MemoryUsage"} - for _, name := range metricNames { - mc.averagers[name] = &MetricAverager{} - } - - return mc -} - -// Start begins collecting metrics from all channels -func (mc *MetricsCollector) Start() { - // Start a goroutine for each metric channel - mc.wg.Add(11) - - go mc.collectMetricDuration(mc.channels.RP99, "RP99") - go mc.collectMetricDuration(mc.channels.RP50, "RP50") - go mc.collectMetricDuration(mc.channels.RP25, "RP25") - go mc.collectMetricDuration(mc.channels.WP99, "WP99") - go mc.collectMetricDuration(mc.channels.WP50, "WP50") - go mc.collectMetricDuration(mc.channels.WP25, "WP25") - go mc.collectMetric(mc.channels.RThroughput, "RThroughput") - go mc.collectMetric(mc.channels.WThroughput, "WThroughput") - go mc.collectMetric(mc.channels.HitRate, "HitRate") - go mc.collectMetric(mc.channels.CPUUsage, "CPUUsage") - go mc.collectMetric(mc.channels.MemoryUsage, "MemoryUsage") -} - -func (mc *MetricsCollector) collectMetric(ch chan float64, name string) { - defer mc.wg.Done() - for { - select { - case <-mc.stopCh: - return - case value, ok := <-ch: - if !ok { - return - } - mc.averagers[name].Add(value) - } - } -} - -func (mc *MetricsCollector) collectMetricDuration(ch chan time.Duration, name string) { - defer mc.wg.Done() - for { - select { - case <-mc.stopCh: - return - case value, ok := <-ch: - if !ok { - return - } - mc.averagers[name].AddDuration(value) - } - } -} - -// RecordRP99 sends a value to the RP99 channel -func (mc *MetricsCollector) RecordRP99(value time.Duration) { - select { - case mc.channels.RP99 <- value: - default: // Don't block if channel is full - } -} - -// RecordRP50 sends a value to the RP50 channel -func (mc *MetricsCollector) RecordRP50(value time.Duration) { - select { - case mc.channels.RP50 <- value: - default: - } -} - -// RecordRP25 sends a value to the RP25 channel -func (mc *MetricsCollector) RecordRP25(value time.Duration) { - select { - case mc.channels.RP25 <- value: - default: - } -} - -// RecordWP99 sends a value to the WP99 channel -func (mc *MetricsCollector) RecordWP99(value time.Duration) { - select { - case mc.channels.WP99 <- value: - default: - } -} - -// RecordWP50 sends a value to the WP50 channel -func (mc *MetricsCollector) RecordWP50(value time.Duration) { - select { - case mc.channels.WP50 <- value: - default: - } -} - -// RecordWP25 sends a value to the WP25 channel -func (mc *MetricsCollector) RecordWP25(value time.Duration) { - select { - case mc.channels.WP25 <- value: - default: - } -} - -// RecordRThroughput sends a value to the RThroughput channel -func (mc *MetricsCollector) RecordRThroughput(value float64) { - select { - case mc.channels.RThroughput <- value: - default: - } -} - -// RecordWThroughput sends a value to the WThroughput channel -func (mc *MetricsCollector) RecordWThroughput(value float64) { - select { - case mc.channels.WThroughput <- value: - default: - } -} - -// RecordHitRate sends a value to the HitRate channel -func (mc *MetricsCollector) RecordHitRate(value float64) { - select { - case mc.channels.HitRate <- value: - default: - } -} - -// GetAveragedMetrics returns the current averaged metrics -func (mc *MetricsCollector) GetAveragedMetrics() RunMetrics { - return RunMetrics{ - Shards: mc.Shards, - KeysPerShard: mc.KeysPerShard, - ReadWorkers: mc.ReadWorkers, - WriteWorkers: mc.WriteWorkers, - Plan: mc.Plan, - RP99: time.Duration(mc.averagers["RP99"].Average()), - RP50: time.Duration(mc.averagers["RP50"].Average()), - RP25: time.Duration(mc.averagers["RP25"].Average()), - WP99: time.Duration(mc.averagers["WP99"].Average()), - WP50: time.Duration(mc.averagers["WP50"].Average()), - WP25: time.Duration(mc.averagers["WP25"].Average()), - RThroughput: mc.averagers["RThroughput"].Latest(), - WThroughput: mc.averagers["WThroughput"].Latest(), - HitRate: mc.averagers["HitRate"].Average(), - CPUUsage: mc.averagers["CPUUsage"].Average(), - MemoryUsage: mc.averagers["MemoryUsage"].Average(), - } -} - -// ResetAverages resets all averagers to start fresh -func (mc *MetricsCollector) ResetAverages() { - for _, avg := range mc.averagers { - avg.Reset() - } -} - -// Stop stops all collector goroutines -func (mc *MetricsCollector) Stop() { - close(mc.stopCh) - mc.wg.Wait() -} - -// SetShards sets the number of shards (input parameter) -func (mc *MetricsCollector) SetShards(value int) { - mc.Shards = value -} - -// SetKeysPerShard sets the keys per shard (input parameter) -func (mc *MetricsCollector) SetKeysPerShard(value int) { - mc.KeysPerShard = value -} - -// SetReadWorkers sets the number of read workers (input parameter) -func (mc *MetricsCollector) SetReadWorkers(value int) { - mc.ReadWorkers = value -} - -// SetWriteWorkers sets the number of write workers (input parameter) -func (mc *MetricsCollector) SetWriteWorkers(value int) { - mc.WriteWorkers = value -} - -// SetPlan sets the plan name (input parameter) -func (mc *MetricsCollector) SetPlan(value string) { - mc.Plan = value -} - -// Global variable to hold runtime data -var currentMetrics RunMetrics -var metricsCollector *MetricsCollector - -// --- CSV Configuration --- -const CSVFileName = "performance_results.csv" - -// InitMetricsCollector creates and starts the metrics collector, returning it -// so it can be passed to other components (e.g., cache config) -func InitMetricsCollector() *MetricsCollector { - metricsCollector = NewMetricsCollector(100) - metricsCollector.Start() - return metricsCollector -} - -// RunmetricsWaitForShutdown waits for shutdown signal and logs final metrics to CSV -func RunmetricsWaitForShutdown() { - // --- Set up Signal Handling --- - stopChan := make(chan os.Signal, 1) - signal.Notify(stopChan, syscall.SIGINT, syscall.SIGTERM) - - fmt.Println("Program running. Press Ctrl+C to stop and log results to CSV...") - - // --- Wait for Stop Signal --- - <-stopChan - fmt.Println("\nTermination signal received. Stopping work and logging results...") - - // Stop the metrics collector - if metricsCollector != nil { - metricsCollector.Stop() - - // Get final averaged metrics - currentMetrics = metricsCollector.GetAveragedMetrics() - } - - // Get memory usage and CPU usage at this instant - currentMetrics.MemoryUsage = getMemoryUsageMB() - currentMetrics.CPUUsage = getCPUUsagePercent() - - // --- Log Data to CSV --- - if err := logResultsToCSV(); err != nil { - log.Fatalf("FATAL: Failed to log results to CSV: %v", err) - } - - fmt.Printf("Successfully logged results to %s.\n", CSVFileName) - - // Exit the program since we're running in a goroutine - os.Exit(0) -} - -// RunmetricsInit initializes metrics and waits for shutdown (convenience function) -func RunmetricsInit() { - InitMetricsCollector() - RunmetricsWaitForShutdown() -} - -func logResultsToCSV() error { - // 1. Check if the file exists to determine if we need a header row. - file, err := os.OpenFile(CSVFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return fmt.Errorf("failed to open CSV file: %w", err) - } - defer file.Close() - - writer := csv.NewWriter(file) - defer writer.Flush() // Crucial to ensure data is written to the file before exiting. - - // The list of all your column headers - header := []string{ - "SHARDS", "KEYS_PER_SHARD", "READ_WORKERS", "WRITE_WORKERS", "PLAN", - "R_THROUGHPUT", "R_P99", "R_P50", "R_P25", "W_THROUGHPUT", "W_P99", "W_P50", "W_P25", - "HIT_RATE", "CPU", "MEMORY", "TIME", - } - - // Determine if the file is new (or empty) and needs the header - fileInfo, _ := file.Stat() - if fileInfo.Size() == 0 { - if err := writer.Write(header); err != nil { - return fmt.Errorf("error writing CSV header: %w", err) - } - } - - // Convert your struct fields into a slice of strings for the CSV writer - dataRow := []string{ - // Input Parameters - strconv.Itoa(currentMetrics.Shards), - strconv.Itoa(currentMetrics.KeysPerShard), - strconv.Itoa(currentMetrics.ReadWorkers), // Convert int to string - strconv.Itoa(currentMetrics.WriteWorkers), - currentMetrics.Plan, - - // Observation Parameters (convert floats to strings) - fmt.Sprintf("%v", currentMetrics.RThroughput), - fmt.Sprintf("%v", currentMetrics.RP99), - fmt.Sprintf("%v", currentMetrics.RP50), - fmt.Sprintf("%v", currentMetrics.RP25), - - fmt.Sprintf("%v", currentMetrics.WThroughput), - fmt.Sprintf("%v", currentMetrics.WP99), - fmt.Sprintf("%v", currentMetrics.WP50), - fmt.Sprintf("%v", currentMetrics.WP25), - - fmt.Sprintf("%v", currentMetrics.HitRate), - fmt.Sprintf("%v", currentMetrics.CPUUsage), - fmt.Sprintf("%v", currentMetrics.MemoryUsage), - fmt.Sprintf("%v", time.Now().In(time.FixedZone("IST", 5*60*60+30*60)).Format("2006-01-02 15:04:05")), - } - - if err := writer.Write(dataRow); err != nil { - return fmt.Errorf("error writing CSV data row: %w", err) - } - - return nil -} - -// getMemoryUsageMB returns the current memory usage of this process in MB -func getMemoryUsageMB() float64 { - var m runtime.MemStats - runtime.ReadMemStats(&m) - // Alloc is bytes of allocated heap objects - return float64(m.Alloc) / 1024 / 1024 -} - -// getSystemMemoryUsageMB returns the total system memory used by this process in MB -func getSystemMemoryUsageMB() float64 { - var m runtime.MemStats - runtime.ReadMemStats(&m) - // Sys is the total bytes of memory obtained from the OS - return float64(m.Sys) / 1024 / 1024 -} - -// getCPUUsagePercent returns the CPU usage percentage for this process -// It measures CPU usage over a short interval -func getCPUUsagePercent() float64 { - // Read initial CPU stats - idle1, total1 := getCPUStats() - time.Sleep(100 * time.Millisecond) - // Read CPU stats again - idle2, total2 := getCPUStats() - - idleDelta := float64(idle2 - idle1) - totalDelta := float64(total2 - total1) - - if totalDelta == 0 { - return 0 - } - - cpuUsage := (1.0 - idleDelta/totalDelta) * 100.0 - return cpuUsage -} - -// getCPUStats reads /proc/stat and returns idle and total CPU time -func getCPUStats() (idle, total uint64) { - file, err := os.Open("/proc/stat") - if err != nil { - return 0, 0 - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "cpu ") { - fields := strings.Fields(line) - if len(fields) < 5 { - return 0, 0 - } - // fields: cpu user nice system idle iowait irq softirq steal guest guest_nice - var values []uint64 - for _, field := range fields[1:] { - val, err := strconv.ParseUint(field, 10, 64) - if err != nil { - continue - } - values = append(values, val) - total += val - } - if len(values) >= 4 { - idle = values[3] // idle is the 4th value - } - break - } - } - return idle, total -} diff --git a/flashring/internal/metrics/console_logger.go b/flashring/internal/metrics/console_logger.go deleted file mode 100644 index fcc7f7ef..00000000 --- a/flashring/internal/metrics/console_logger.go +++ /dev/null @@ -1,44 +0,0 @@ -package metrics - -import ( - "time" - - "github.com/rs/zerolog/log" -) - -func RunConsoleLogger(metricsCollector *MetricsCollector) { - - // start a ticker to log the metrics every 30 seconds - - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - for { - select { - case <-metricsCollector.stopCh: - return - case <-ticker.C: - currentMetrics = metricsCollector.GetMetrics() - - rp99 := currentMetrics.AveragedMetrics.RP99.Milliseconds() - rp50 := currentMetrics.AveragedMetrics.RP50.Milliseconds() - rp25 := currentMetrics.AveragedMetrics.RP25.Milliseconds() - wp99 := currentMetrics.AveragedMetrics.WP99.Milliseconds() - wp50 := currentMetrics.AveragedMetrics.WP50.Milliseconds() - wp25 := currentMetrics.AveragedMetrics.WP25.Milliseconds() - - rThroughput := currentMetrics.AveragedMetrics.RThroughput - hitRate := currentMetrics.AveragedMetrics.HitRate - - log.Info().Msgf("RP99: %vms", rp99) - log.Info().Msgf("RP50: %vms", rp50) - log.Info().Msgf("RP25: %vms", rp25) - log.Info().Msgf("WP99: %vms", wp99) - log.Info().Msgf("WP50: %vms", wp50) - log.Info().Msgf("WP25: %vms", wp25) - log.Info().Msgf("RThroughput: %v", rThroughput) - log.Info().Msgf("HitRate: %v", hitRate) - - } - } -} diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 27468542..b1a0f48d 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -12,7 +12,7 @@ import ( "github.com/cespare/xxhash/v2" "github.com/rs/zerolog/log" - metrics "github.com/Meesho/BharatMLStack/flashring/internal/metrics" + metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) /* @@ -71,6 +71,9 @@ type WrapCacheConfig struct { BatchWindowMicros int // in microseconds MaxBatchSize int + //lockless mode for PutLL/GetLL + EnableLockless bool + // Optional metrics recorder MetricsRecorder metrics.MetricsRecorder @@ -172,6 +175,9 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m EnableBatching: config.EnableBatching, BatchWindow: batchWindow, MaxBatchSize: config.MaxBatchSize, + + //lockless mode for PutLL/GetLL + EnableLockless: config.EnableLockless, }, &shardLocks[i]) } @@ -198,30 +204,16 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m time.Sleep(sleepDuration) for i := 0; i < config.NumShards; i++ { - log.Info().Msgf("Shard %d has %d active entries", i, wc.stats[i].ShardWiseActiveEntries.Load()) total := wc.stats[i].TotalGets.Load() - // hits := wc.stats[i].Hits.Load() - // hitRate := float64(0) - // if total > 0 { - // hitRate = float64(hits) / float64(total) - // } - // log.Info().Msgf("Shard %d HitRate: %v", i, hitRate) - // log.Info().Msgf("Shard %d ReWrites: %v", i, wc.stats[i].ReWrites.Load()) - // log.Info().Msgf("Shard %d Expired: %v", i, wc.stats[i].Expired.Load()) - // log.Info().Msgf("Shard %d Total: %v", i, total) - // log.Info().Msgf("Gets/sec: %v", float64(total-perShardPrevTotalGets[i])/float64(sleepDuration.Seconds())) - // log.Info().Msgf("Puts/sec: %v", float64(wc.stats[i].TotalPuts.Load()-perShardPrevTotalPuts[i])/float64(sleepDuration.Seconds())) + + activeEntries := float64(wc.stats[i].ShardWiseActiveEntries.Load()) + perShardPrevTotalGets[i] = total perShardPrevTotalPuts[i] = wc.stats[i].TotalPuts.Load() getP25, getP50, getP99 := wc.stats[i].LatencyTracker.GetLatencyPercentiles() putP25, putP50, putP99 := wc.stats[i].LatencyTracker.PutLatencyPercentiles() - // log.Info().Msgf("Get Count: %v", wc.stats[i].TotalGets.Load()) - // log.Info().Msgf("Put Count: %v", wc.stats[i].TotalPuts.Load()) - // log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99) - // log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99) - shardGets := wc.stats[i].TotalGets.Load() shardPuts := wc.stats[i].TotalPuts.Load() shardHits := wc.stats[i].Hits.Load() @@ -245,6 +237,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m wc.metricsCollector.RecordRThroughput(i, rThroughput) wc.metricsCollector.RecordWThroughput(i, wThroughput) wc.metricsCollector.RecordHitRate(i, shardHitRate) + wc.metricsCollector.RecordActiveEntries(i, activeEntries) } diff --git a/flashring/pkg/metrics/console_logger.go b/flashring/pkg/metrics/console_logger.go new file mode 100644 index 00000000..f4d51acb --- /dev/null +++ b/flashring/pkg/metrics/console_logger.go @@ -0,0 +1,47 @@ +package metrics + +import ( + "time" + + "github.com/rs/zerolog/log" +) + +func RunConsoleLogger(metricsCollector *MetricsCollector) { + + // start a ticker to log the metrics every 30 seconds + + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for { + select { + case <-metricsCollector.stopCh: + return + case <-ticker.C: + currentMetrics = metricsCollector.GetMetrics() + + rp99 := currentMetrics.AveragedMetrics.RP99 + rp50 := currentMetrics.AveragedMetrics.RP50 + rp25 := currentMetrics.AveragedMetrics.RP25 + wp99 := currentMetrics.AveragedMetrics.WP99 + wp50 := currentMetrics.AveragedMetrics.WP50 + wp25 := currentMetrics.AveragedMetrics.WP25 + + rThroughput := currentMetrics.AveragedMetrics.RThroughput + hitRate := currentMetrics.AveragedMetrics.HitRate + wThroughput := currentMetrics.AveragedMetrics.WThroughput + activeEntries := currentMetrics.AveragedMetrics.ActiveEntries + + log.Info().Msgf("RP99: %v", rp99) + log.Info().Msgf("RP50: %v", rp50) + log.Info().Msgf("RP25: %v", rp25) + log.Info().Msgf("WP99: %v", wp99) + log.Info().Msgf("WP50: %v", wp50) + log.Info().Msgf("WP25: %v", wp25) + log.Info().Msgf("RThroughput: %v/s", rThroughput) + log.Info().Msgf("WThroughput: %v/s", wThroughput) + log.Info().Msgf("HitRate: %v", hitRate) + log.Info().Msgf("ActiveEntries: %v", activeEntries) + } + } +} diff --git a/flashring/internal/metrics/csv_logger.go b/flashring/pkg/metrics/csv_logger.go similarity index 100% rename from flashring/internal/metrics/csv_logger.go rename to flashring/pkg/metrics/csv_logger.go diff --git a/flashring/internal/metrics/metric.go b/flashring/pkg/metrics/metric.go similarity index 100% rename from flashring/internal/metrics/metric.go rename to flashring/pkg/metrics/metric.go diff --git a/flashring/internal/metrics/metrics_averager.go b/flashring/pkg/metrics/metrics_averager.go similarity index 89% rename from flashring/internal/metrics/metrics_averager.go rename to flashring/pkg/metrics/metrics_averager.go index f5346ced..955cf1cb 100644 --- a/flashring/internal/metrics/metrics_averager.go +++ b/flashring/pkg/metrics/metrics_averager.go @@ -14,9 +14,6 @@ type MetricAverager struct { } func (ma *MetricAverager) Add(value float64) { - if value == 0 { - return // Ignore zero values - } ma.mu.Lock() defer ma.mu.Unlock() ma.sum += value @@ -25,13 +22,11 @@ func (ma *MetricAverager) Add(value float64) { } func (ma *MetricAverager) AddDuration(value time.Duration) { - if value == 0 { - return // Ignore zero values - } ma.mu.Lock() defer ma.mu.Unlock() ma.sum += float64(value) ma.count++ + ma.lastValue = float64(value) } func (ma *MetricAverager) Average() float64 { diff --git a/flashring/internal/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go similarity index 75% rename from flashring/internal/metrics/runmetrics.go rename to flashring/pkg/metrics/runmetrics.go index 238ddce7..f3dee4c6 100644 --- a/flashring/internal/metrics/runmetrics.go +++ b/flashring/pkg/metrics/runmetrics.go @@ -42,15 +42,16 @@ type MetricsCollectorConfig struct { // ShardMetrics holds observation metrics for a single shard type ShardMetrics struct { - RP99 time.Duration - RP50 time.Duration - RP25 time.Duration - WP99 time.Duration - WP50 time.Duration - WP25 time.Duration - RThroughput float64 - WThroughput float64 - HitRate float64 + RP99 time.Duration + RP50 time.Duration + RP25 time.Duration + WP99 time.Duration + WP50 time.Duration + WP25 time.Duration + RThroughput float64 + WThroughput float64 + HitRate float64 + ActiveEntries float64 } // Define your parameter structure @@ -76,15 +77,16 @@ type ShardDurationValue struct { // MetricChannels holds separate channels for each metric type (per-shard) type MetricChannels struct { - RP99 chan ShardDurationValue - RP50 chan ShardDurationValue - RP25 chan ShardDurationValue - WP99 chan ShardDurationValue - WP50 chan ShardDurationValue - WP25 chan ShardDurationValue - RThroughput chan ShardMetricValue - WThroughput chan ShardMetricValue - HitRate chan ShardMetricValue + RP99 chan ShardDurationValue + RP50 chan ShardDurationValue + RP25 chan ShardDurationValue + WP99 chan ShardDurationValue + WP50 chan ShardDurationValue + WP25 chan ShardDurationValue + RThroughput chan ShardMetricValue + WThroughput chan ShardMetricValue + HitRate chan ShardMetricValue + ActiveEntries chan ShardMetricValue } // MetricsCollector collects and averages all metrics (per-shard) @@ -130,15 +132,16 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics mc := &MetricsCollector{ Config: config, channels: MetricChannels{ - RP99: make(chan ShardDurationValue, bufferSize), - RP50: make(chan ShardDurationValue, bufferSize), - RP25: make(chan ShardDurationValue, bufferSize), - WP99: make(chan ShardDurationValue, bufferSize), - WP50: make(chan ShardDurationValue, bufferSize), - WP25: make(chan ShardDurationValue, bufferSize), - RThroughput: make(chan ShardMetricValue, bufferSize), - WThroughput: make(chan ShardMetricValue, bufferSize), - HitRate: make(chan ShardMetricValue, bufferSize), + RP99: make(chan ShardDurationValue, bufferSize), + RP50: make(chan ShardDurationValue, bufferSize), + RP25: make(chan ShardDurationValue, bufferSize), + WP99: make(chan ShardDurationValue, bufferSize), + WP50: make(chan ShardDurationValue, bufferSize), + WP25: make(chan ShardDurationValue, bufferSize), + RThroughput: make(chan ShardMetricValue, bufferSize), + WThroughput: make(chan ShardMetricValue, bufferSize), + HitRate: make(chan ShardMetricValue, bufferSize), + ActiveEntries: make(chan ShardMetricValue, bufferSize), }, averagedMetrics: make(map[string]*MetricAverager), instantMetrics: make(map[int]map[string]*MetricAverager), @@ -146,7 +149,7 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics } // Initialize averagedMetrics with MetricAverager instances - metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "RThroughput", "WThroughput", "HitRate"} + metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "RThroughput", "WThroughput", "HitRate", "ActiveEntries"} for _, name := range metricNames { mc.averagedMetrics[name] = &MetricAverager{} } @@ -166,7 +169,7 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics // Start begins collecting metrics from all channels func (mc *MetricsCollector) Start() { // Start a goroutine for each metric channel - mc.wg.Add(9) + mc.wg.Add(10) go mc.collectShardDuration(mc.channels.RP99, "RP99") go mc.collectShardDuration(mc.channels.RP50, "RP50") @@ -177,6 +180,7 @@ func (mc *MetricsCollector) Start() { go mc.collectShardMetric(mc.channels.RThroughput, "RThroughput") go mc.collectShardMetric(mc.channels.WThroughput, "WThroughput") go mc.collectShardMetric(mc.channels.HitRate, "HitRate") + go mc.collectShardMetric(mc.channels.ActiveEntries, "ActiveEntries") } func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { @@ -286,6 +290,14 @@ func (mc *MetricsCollector) RecordHitRate(shardIdx int, value float64) { } } +// RecordActiveEntries sends a value to the ActiveEntries channel for a specific shard +func (mc *MetricsCollector) RecordActiveEntries(shardIdx int, value float64) { + select { + case mc.channels.ActiveEntries <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + default: + } +} + func (mc *MetricsCollector) GetMetrics() RunMetrics { mc.mu.RLock() defer mc.mu.RUnlock() @@ -297,29 +309,31 @@ func (mc *MetricsCollector) GetMetrics() RunMetrics { for shardIdx := 0; shardIdx < shards; shardIdx++ { if instants, exists := mc.instantMetrics[shardIdx]; exists { shardMetrics[shardIdx] = ShardMetrics{ - RP99: time.Duration(instants["RP99"].Latest()), - RP50: time.Duration(instants["RP50"].Latest()), - RP25: time.Duration(instants["RP25"].Latest()), - WP99: time.Duration(instants["WP99"].Latest()), - WP50: time.Duration(instants["WP50"].Latest()), - WP25: time.Duration(instants["WP25"].Latest()), - RThroughput: instants["RThroughput"].Latest(), - WThroughput: instants["WThroughput"].Latest(), - HitRate: instants["HitRate"].Latest(), + RP99: time.Duration(instants["RP99"].Latest()), + RP50: time.Duration(instants["RP50"].Latest()), + RP25: time.Duration(instants["RP25"].Latest()), + WP99: time.Duration(instants["WP99"].Latest()), + WP50: time.Duration(instants["WP50"].Latest()), + WP25: time.Duration(instants["WP25"].Latest()), + RThroughput: instants["RThroughput"].Latest(), + WThroughput: instants["WThroughput"].Latest(), + HitRate: instants["HitRate"].Latest(), + ActiveEntries: instants["ActiveEntries"].Latest(), } } } averagedMetrics := ShardMetrics{ - RP99: time.Duration(mc.averagedMetrics["RP99"].Average()), - RP50: time.Duration(mc.averagedMetrics["RP50"].Average()), - RP25: time.Duration(mc.averagedMetrics["RP25"].Average()), - WP99: time.Duration(mc.averagedMetrics["WP99"].Average()), - WP50: time.Duration(mc.averagedMetrics["WP50"].Average()), - WP25: time.Duration(mc.averagedMetrics["WP25"].Average()), - RThroughput: mc.averagedMetrics["RThroughput"].Average(), - WThroughput: mc.averagedMetrics["WThroughput"].Average(), - HitRate: mc.averagedMetrics["HitRate"].Average(), + RP99: time.Duration(mc.averagedMetrics["RP99"].Average()), + RP50: time.Duration(mc.averagedMetrics["RP50"].Average()), + RP25: time.Duration(mc.averagedMetrics["RP25"].Average()), + WP99: time.Duration(mc.averagedMetrics["WP99"].Average()), + WP50: time.Duration(mc.averagedMetrics["WP50"].Average()), + WP25: time.Duration(mc.averagedMetrics["WP25"].Average()), + RThroughput: mc.averagedMetrics["RThroughput"].Average(), + WThroughput: mc.averagedMetrics["WThroughput"].Average(), + HitRate: mc.averagedMetrics["HitRate"].Average(), + ActiveEntries: mc.averagedMetrics["ActiveEntries"].Average(), } return RunMetrics{ diff --git a/flashring/internal/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go similarity index 82% rename from flashring/internal/metrics/statsd_logger.go rename to flashring/pkg/metrics/statsd_logger.go index 28d741db..19d6f7bd 100644 --- a/flashring/internal/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -6,11 +6,12 @@ import ( ) const ( - KEY_READ_LATENCY = "flashringread_latency" - KEY_WRITE_LATENCY = "flashringwrite_latency" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" + KEY_READ_LATENCY = "flashringread_latency" + KEY_WRITE_LATENCY = "flashringwrite_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" TAG_LATENCY_PERCENTILE = "latency_percentile" TAG_VALUE_P25 = "p25" @@ -47,6 +48,7 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { Gauge(KEY_RTHROUGHPUT, shard.RThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) Gauge(KEY_WTHROUGHPUT, shard.WThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) Gauge(KEY_HITRATE, shard.HitRate, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) + Gauge(KEY_ACTIVE_ENTRIES, shard.ActiveEntries, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) } } diff --git a/flashring/internal/metrics/tag.go b/flashring/pkg/metrics/tag.go similarity index 100% rename from flashring/internal/metrics/tag.go rename to flashring/pkg/metrics/tag.go From 8a56a25c9882871e1af8326437405078f89385f2 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Tue, 13 Jan 2026 08:29:22 +0000 Subject: [PATCH 07/53] full sampling rate --- flashring/pkg/metrics/metric.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flashring/pkg/metrics/metric.go b/flashring/pkg/metrics/metric.go index 263a4e69..495514e2 100644 --- a/flashring/pkg/metrics/metric.go +++ b/flashring/pkg/metrics/metric.go @@ -23,8 +23,8 @@ const ( var ( // it is safe to use one client from multiple goroutines simultaneously statsDClient = getDefaultClient() - // by default full sampling - samplingRate = 0.0 + // by default full sampling (1.0 = 100%) + samplingRate = 1.0 telegrafAddress = "localhost:8125" appName = "" initialized = false From f565b32758d436dd6c2bfb7d99442203469a40ec Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Tue, 13 Jan 2026 12:50:49 +0000 Subject: [PATCH 08/53] fix metrics --- flashring/pkg/cache/cache.go | 32 +-- flashring/pkg/metrics/console_logger.go | 63 +++++- flashring/pkg/metrics/csv_logger.go | 153 ++++++++++++-- flashring/pkg/metrics/runmetrics.go | 265 +++++++++++------------- flashring/pkg/metrics/statsd_logger.go | 46 ++-- 5 files changed, 346 insertions(+), 213 deletions(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index b1a0f48d..729001f5 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -197,19 +197,11 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m go func() { sleepDuration := 10 * time.Second - perShardPrevTotalGets := make([]uint64, config.NumShards) - perShardPrevTotalPuts := make([]uint64, config.NumShards) for { time.Sleep(sleepDuration) for i := 0; i < config.NumShards; i++ { - total := wc.stats[i].TotalGets.Load() - - activeEntries := float64(wc.stats[i].ShardWiseActiveEntries.Load()) - - perShardPrevTotalGets[i] = total - perShardPrevTotalPuts[i] = wc.stats[i].TotalPuts.Load() getP25, getP50, getP99 := wc.stats[i].LatencyTracker.GetLatencyPercentiles() putP25, putP50, putP99 := wc.stats[i].LatencyTracker.PutLatencyPercentiles() @@ -217,16 +209,9 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m shardGets := wc.stats[i].TotalGets.Load() shardPuts := wc.stats[i].TotalPuts.Load() shardHits := wc.stats[i].Hits.Load() - - // Calculate per-shard throughput - rThroughput := float64(shardGets) / sleepDuration.Seconds() - wThroughput := float64(shardPuts) / sleepDuration.Seconds() - - // Calculate per-shard hit rate - shardHitRate := float64(0) - if shardGets > 0 { - shardHitRate = float64(shardHits) / float64(shardGets) - } + shardExpired := wc.stats[i].Expired.Load() + shardReWrites := wc.stats[i].ReWrites.Load() + shardActiveEntries := wc.stats[i].ShardWiseActiveEntries.Load() wc.metricsCollector.RecordRP25(i, getP25) wc.metricsCollector.RecordRP50(i, getP50) @@ -234,10 +219,13 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m wc.metricsCollector.RecordWP25(i, putP25) wc.metricsCollector.RecordWP50(i, putP50) wc.metricsCollector.RecordWP99(i, putP99) - wc.metricsCollector.RecordRThroughput(i, rThroughput) - wc.metricsCollector.RecordWThroughput(i, wThroughput) - wc.metricsCollector.RecordHitRate(i, shardHitRate) - wc.metricsCollector.RecordActiveEntries(i, activeEntries) + + wc.metricsCollector.RecordActiveEntries(i, int64(shardActiveEntries)) + wc.metricsCollector.RecordExpiredEntries(i, int64(shardExpired)) + wc.metricsCollector.RecordRewrites(i, int64(shardReWrites)) + wc.metricsCollector.RecordGets(i, int64(shardGets)) + wc.metricsCollector.RecordPuts(i, int64(shardPuts)) + wc.metricsCollector.RecordHits(i, int64(shardHits)) } diff --git a/flashring/pkg/metrics/console_logger.go b/flashring/pkg/metrics/console_logger.go index f4d51acb..6635f247 100644 --- a/flashring/pkg/metrics/console_logger.go +++ b/flashring/pkg/metrics/console_logger.go @@ -13,6 +13,15 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() + shards := metricsCollector.Config.Metadata["shards"].(int) + + prevGetsTotal := uint64(0) + prevPutsTotal := uint64(0) + prevHitsTotal := uint64(0) + prevExpiredTotal := uint64(0) + prevReWritesTotal := uint64(0) + prevActiveEntriesTotal := uint64(0) + for { select { case <-metricsCollector.stopCh: @@ -20,17 +29,49 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { case <-ticker.C: currentMetrics = metricsCollector.GetMetrics() - rp99 := currentMetrics.AveragedMetrics.RP99 - rp50 := currentMetrics.AveragedMetrics.RP50 - rp25 := currentMetrics.AveragedMetrics.RP25 - wp99 := currentMetrics.AveragedMetrics.WP99 - wp50 := currentMetrics.AveragedMetrics.WP50 - wp25 := currentMetrics.AveragedMetrics.WP25 + getsTotal := uint64(0) + putsTotal := uint64(0) + hitsTotal := uint64(0) + expiredTotal := uint64(0) + reWritesTotal := uint64(0) + activeEntriesTotal := uint64(0) + + rp99 := time.Duration(0) + rp50 := time.Duration(0) + rp25 := time.Duration(0) + wp99 := time.Duration(0) + wp50 := time.Duration(0) + wp25 := time.Duration(0) + + for _, shard := range currentMetrics.ShardMetrics { + getsTotal += uint64(shard.Gets) + putsTotal += uint64(shard.Puts) + hitsTotal += uint64(shard.Hits) + expiredTotal += uint64(shard.ExpiredEntries) + reWritesTotal += uint64(shard.Rewrites) + activeEntriesTotal += uint64(shard.ActiveEntries) + + rp99 += shard.RP99 + rp50 += shard.RP50 + rp25 += shard.RP25 + wp99 += shard.WP99 + wp50 += shard.WP50 + wp25 += shard.WP25 + } + + rp99 = rp99 / time.Duration(shards) + rp50 = rp50 / time.Duration(shards) + rp25 = rp25 / time.Duration(shards) + wp99 = wp99 / time.Duration(shards) + wp50 = wp50 / time.Duration(shards) + wp25 = wp25 / time.Duration(shards) - rThroughput := currentMetrics.AveragedMetrics.RThroughput - hitRate := currentMetrics.AveragedMetrics.HitRate - wThroughput := currentMetrics.AveragedMetrics.WThroughput - activeEntries := currentMetrics.AveragedMetrics.ActiveEntries + rThroughput := float64(getsTotal-prevGetsTotal) / float64(30) + wThroughput := float64(putsTotal-prevPutsTotal) / float64(30) + hitRate := float64(hitsTotal-prevHitsTotal) / float64(getsTotal-prevGetsTotal) + activeEntries := float64(activeEntriesTotal - prevActiveEntriesTotal) + expiredEntries := float64(expiredTotal - prevExpiredTotal) + reWrites := float64(reWritesTotal - prevReWritesTotal) log.Info().Msgf("RP99: %v", rp99) log.Info().Msgf("RP50: %v", rp50) @@ -42,6 +83,8 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { log.Info().Msgf("WThroughput: %v/s", wThroughput) log.Info().Msgf("HitRate: %v", hitRate) log.Info().Msgf("ActiveEntries: %v", activeEntries) + log.Info().Msgf("ExpiredEntries: %v", expiredEntries) + log.Info().Msgf("ReWrites: %v", reWrites) } } } diff --git a/flashring/pkg/metrics/csv_logger.go b/flashring/pkg/metrics/csv_logger.go index d5ae6cd3..95c54fea 100644 --- a/flashring/pkg/metrics/csv_logger.go +++ b/flashring/pkg/metrics/csv_logger.go @@ -17,8 +17,115 @@ import ( // --- CSV Configuration --- const CSVFileName = "performance_results.csv" +type CsvLogger struct { + prevGetsTotal uint64 + prevPutsTotal uint64 + prevHitsTotal uint64 + prevExpiredTotal uint64 + prevReWritesTotal uint64 + prevActiveEntriesTotal uint64 + + samplesRthroguhput []float64 + samplesWthroguhput []float64 + samplesHitRate []float64 + samplesActiveEntries []float64 + samplesExpiredEntries []float64 + samplesReWrites []float64 + samplesRP99 []time.Duration + samplesRP50 []time.Duration + samplesRP25 []time.Duration + samplesWP99 []time.Duration + samplesWP50 []time.Duration + samplesWP25 []time.Duration + + totalSamples int + + metricsCollector *MetricsCollector +} + +func (c *CsvLogger) collectMetrics() *time.Ticker { + + //tickered every 30 seconds + ticker := time.NewTicker(30 * time.Second) + defer ticker.Stop() + + for range ticker.C { + shards := metricsCollector.Config.Metadata["shards"].(int) + currentMetrics = metricsCollector.GetMetrics() + + getsTotal := uint64(0) + putsTotal := uint64(0) + hitsTotal := uint64(0) + expiredTotal := uint64(0) + reWritesTotal := uint64(0) + activeEntriesTotal := uint64(0) + + rp99 := time.Duration(0) + rp50 := time.Duration(0) + rp25 := time.Duration(0) + wp99 := time.Duration(0) + wp50 := time.Duration(0) + wp25 := time.Duration(0) + + for _, shard := range currentMetrics.ShardMetrics { + getsTotal += uint64(shard.Gets) + putsTotal += uint64(shard.Puts) + hitsTotal += uint64(shard.Hits) + expiredTotal += uint64(shard.ExpiredEntries) + reWritesTotal += uint64(shard.Rewrites) + activeEntriesTotal += uint64(shard.ActiveEntries) + + rp99 += shard.RP99 + rp50 += shard.RP50 + rp25 += shard.RP25 + wp99 += shard.WP99 + wp50 += shard.WP50 + wp25 += shard.WP25 + } + + rThroughput := float64(getsTotal-c.prevGetsTotal) / float64(30) + wThroughput := float64(putsTotal-c.prevPutsTotal) / float64(30) + hitRate := float64(hitsTotal-c.prevHitsTotal) / float64(getsTotal-c.prevGetsTotal) + activeEntries := float64(activeEntriesTotal - c.prevActiveEntriesTotal) + expiredEntries := float64(expiredTotal - c.prevExpiredTotal) + reWrites := float64(reWritesTotal - c.prevReWritesTotal) + + rp99 = rp99 / time.Duration(shards) + rp50 = rp50 / time.Duration(shards) + rp25 = rp25 / time.Duration(shards) + wp99 = wp99 / time.Duration(shards) + wp50 = wp50 / time.Duration(shards) + wp25 = wp25 / time.Duration(shards) + + c.samplesRthroguhput = append(c.samplesRthroguhput, rThroughput) + c.samplesWthroguhput = append(c.samplesWthroguhput, wThroughput) + c.samplesHitRate = append(c.samplesHitRate, hitRate) + c.samplesActiveEntries = append(c.samplesActiveEntries, activeEntries) + c.samplesExpiredEntries = append(c.samplesExpiredEntries, expiredEntries) + c.samplesReWrites = append(c.samplesReWrites, reWrites) + c.samplesRP99 = append(c.samplesRP99, rp99) + c.samplesRP50 = append(c.samplesRP50, rp50) + c.samplesRP25 = append(c.samplesRP25, rp25) + c.samplesWP99 = append(c.samplesWP99, wp99) + c.samplesWP50 = append(c.samplesWP50, wp50) + c.samplesWP25 = append(c.samplesWP25, wp25) + + c.prevGetsTotal = getsTotal + c.prevPutsTotal = putsTotal + c.prevHitsTotal = hitsTotal + c.prevExpiredTotal = expiredTotal + c.prevReWritesTotal = reWritesTotal + c.prevActiveEntriesTotal = activeEntriesTotal + } + + return ticker + +} + // RunCSVLoggerWaitForShutdown waits for shutdown signal and logs final metrics to CSV -func RunCSVLoggerWaitForShutdown() { +func (c *CsvLogger) RunCSVLoggerWaitForShutdown() { + + ticker := c.collectMetrics() // --- Set up Signal Handling --- stopChan := make(chan os.Signal, 1) signal.Notify(stopChan, syscall.SIGINT, syscall.SIGTERM) @@ -31,14 +138,12 @@ func RunCSVLoggerWaitForShutdown() { // Stop the metrics collector if metricsCollector != nil { + ticker.Stop() metricsCollector.Stop() - - // Get final averaged metrics - currentMetrics = metricsCollector.GetMetrics() } // --- Log Data to CSV --- - if err := LogResultsToCSV(metricsCollector.Config.Metadata); err != nil { + if err := c.LogResultsToCSV(); err != nil { log.Fatalf("FATAL: Failed to log results to CSV: %v", err) } @@ -48,7 +153,7 @@ func RunCSVLoggerWaitForShutdown() { os.Exit(0) } -func LogResultsToCSV(metadata map[string]any) error { +func (c *CsvLogger) LogResultsToCSV() error { // 1. Check if the file exists to determine if we need a header row. file, err := os.OpenFile(CSVFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { @@ -74,6 +179,7 @@ func LogResultsToCSV(metadata map[string]any) error { } } + metadata := c.metricsCollector.Config.Metadata timestamp := time.Now().In(time.FixedZone("IST", 5*60*60+30*60)).Format("2006-01-02 15:04:05") dataRow := []string{ @@ -85,15 +191,16 @@ func LogResultsToCSV(metadata map[string]any) error { metadata["plan"].(string), // averaged observation parameters - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RThroughput), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP99), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP50), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.RP25), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WThroughput), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP99), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP50), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.WP25), - fmt.Sprintf("%v", currentMetrics.AveragedMetrics.HitRate), + //sum sample and divide by total samples + fmt.Sprintf("%v", averageFloat64(c.samplesRthroguhput)), + fmt.Sprintf("%v", averageDuration(c.samplesRP99)), + fmt.Sprintf("%v", averageDuration(c.samplesRP50)), + fmt.Sprintf("%v", averageDuration(c.samplesRP25)), + fmt.Sprintf("%v", averageFloat64(c.samplesWthroguhput)), + fmt.Sprintf("%v", averageDuration(c.samplesWP99)), + fmt.Sprintf("%v", averageDuration(c.samplesWP50)), + fmt.Sprintf("%v", averageDuration(c.samplesWP25)), + fmt.Sprintf("%v", averageFloat64(c.samplesHitRate)), fmt.Sprintf("%v", getCPUUsagePercent()), fmt.Sprintf("%v", getMemoryUsageMB()), timestamp, @@ -106,6 +213,22 @@ func LogResultsToCSV(metadata map[string]any) error { return nil } +func averageFloat64(samples []float64) float64 { + sum := 0.0 + for _, sample := range samples { + sum += sample + } + return sum / float64(len(samples)) +} + +func averageDuration(samples []time.Duration) time.Duration { + sum := time.Duration(0) + for _, sample := range samples { + sum += sample + } + return sum / time.Duration(len(samples)) +} + // getMemoryUsageMB returns the current memory usage of this process in MB func getMemoryUsageMB() float64 { var m runtime.MemStats diff --git a/flashring/pkg/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go index f3dee4c6..03891c7e 100644 --- a/flashring/pkg/metrics/runmetrics.go +++ b/flashring/pkg/metrics/runmetrics.go @@ -12,6 +12,12 @@ var metricsCollector *MetricsCollector // MetricsRecorder is an interface for recording metrics from the cache // Implement this interface to receive per-shard metrics from the cache layer type MetricsRecorder interface { + RecordGets(shardIdx int, value int64) + RecordPuts(shardIdx int, value int64) + RecordHits(shardIdx int, value int64) + RecordActiveEntries(shardIdx int, value int64) + RecordExpiredEntries(shardIdx int, value int64) + RecordRewrites(shardIdx int, value int64) // Per-shard observation metrics RecordRP99(shardIdx int, value time.Duration) @@ -20,9 +26,6 @@ type MetricsRecorder interface { RecordWP99(shardIdx int, value time.Duration) RecordWP50(shardIdx int, value time.Duration) RecordWP25(shardIdx int, value time.Duration) - RecordRThroughput(shardIdx int, value float64) - RecordWThroughput(shardIdx int, value float64) - RecordHitRate(shardIdx int, value float64) } type MetricsCollectorConfig struct { @@ -42,62 +45,56 @@ type MetricsCollectorConfig struct { // ShardMetrics holds observation metrics for a single shard type ShardMetrics struct { - RP99 time.Duration - RP50 time.Duration - RP25 time.Duration - WP99 time.Duration - WP50 time.Duration - WP25 time.Duration - RThroughput float64 - WThroughput float64 - HitRate float64 - ActiveEntries float64 + Gets int64 + Puts int64 + Hits int64 + ActiveEntries int64 + ExpiredEntries int64 + Rewrites int64 + RP99 time.Duration + RP50 time.Duration + RP25 time.Duration + WP99 time.Duration + WP50 time.Duration + WP25 time.Duration } // Define your parameter structure type RunMetrics struct { // Per-shard observation parameters ShardMetrics []ShardMetrics - - // Averaged metrics over all shards - AveragedMetrics ShardMetrics } // ShardMetricValue represents a metric value for a specific shard type ShardMetricValue struct { ShardIdx int - Value float64 -} - -// ShardDurationValue represents a duration metric value for a specific shard -type ShardDurationValue struct { - ShardIdx int - Value time.Duration + value int64 } // MetricChannels holds separate channels for each metric type (per-shard) type MetricChannels struct { - RP99 chan ShardDurationValue - RP50 chan ShardDurationValue - RP25 chan ShardDurationValue - WP99 chan ShardDurationValue - WP50 chan ShardDurationValue - WP25 chan ShardDurationValue - RThroughput chan ShardMetricValue - WThroughput chan ShardMetricValue - HitRate chan ShardMetricValue - ActiveEntries chan ShardMetricValue + Gets chan ShardMetricValue + Puts chan ShardMetricValue + Hits chan ShardMetricValue + ActiveEntries chan ShardMetricValue + ExpiredEntries chan ShardMetricValue + Rewrites chan ShardMetricValue + RP99 chan ShardMetricValue + RP50 chan ShardMetricValue + RP25 chan ShardMetricValue + WP99 chan ShardMetricValue + WP50 chan ShardMetricValue + WP25 chan ShardMetricValue } // MetricsCollector collects and averages all metrics (per-shard) type MetricsCollector struct { - Config MetricsCollectorConfig - channels MetricChannels //channels for each metric type (per-shard) - averagedMetrics map[string]*MetricAverager // metricName -> averager - instantMetrics map[int]map[string]*MetricAverager // shardIdx -> metricName -> averager - stopCh chan struct{} //channel to stop the collector when running from console - wg sync.WaitGroup - mu sync.RWMutex + Config MetricsCollectorConfig + channels MetricChannels //channels for each metric type (per-shard) + instantMetrics map[int]map[string]int64 // shardIdx -> metricName -> value + stopCh chan struct{} //channel to stop the collector when running from console + wg sync.WaitGroup + mu sync.RWMutex } // InitMetricsCollector creates and starts the metrics collector, returning it @@ -113,7 +110,8 @@ func InitMetricsCollector(config MetricsCollectorConfig) *MetricsCollector { } if config.CsvLogging { - go RunCSVLoggerWaitForShutdown() + csvLogger := CsvLogger{metricsCollector: metricsCollector} + go csvLogger.RunCSVLoggerWaitForShutdown() } if config.StatsdLogging { @@ -132,34 +130,33 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics mc := &MetricsCollector{ Config: config, channels: MetricChannels{ - RP99: make(chan ShardDurationValue, bufferSize), - RP50: make(chan ShardDurationValue, bufferSize), - RP25: make(chan ShardDurationValue, bufferSize), - WP99: make(chan ShardDurationValue, bufferSize), - WP50: make(chan ShardDurationValue, bufferSize), - WP25: make(chan ShardDurationValue, bufferSize), - RThroughput: make(chan ShardMetricValue, bufferSize), - WThroughput: make(chan ShardMetricValue, bufferSize), - HitRate: make(chan ShardMetricValue, bufferSize), - ActiveEntries: make(chan ShardMetricValue, bufferSize), + Gets: make(chan ShardMetricValue, bufferSize), + Puts: make(chan ShardMetricValue, bufferSize), + Hits: make(chan ShardMetricValue, bufferSize), + ActiveEntries: make(chan ShardMetricValue, bufferSize), + ExpiredEntries: make(chan ShardMetricValue, bufferSize), + Rewrites: make(chan ShardMetricValue, bufferSize), + RP99: make(chan ShardMetricValue, bufferSize), + RP50: make(chan ShardMetricValue, bufferSize), + RP25: make(chan ShardMetricValue, bufferSize), + WP99: make(chan ShardMetricValue, bufferSize), + WP50: make(chan ShardMetricValue, bufferSize), + WP25: make(chan ShardMetricValue, bufferSize), }, - averagedMetrics: make(map[string]*MetricAverager), - instantMetrics: make(map[int]map[string]*MetricAverager), - stopCh: make(chan struct{}), + + instantMetrics: make(map[int]map[string]int64), + stopCh: make(chan struct{}), } // Initialize averagedMetrics with MetricAverager instances - metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "RThroughput", "WThroughput", "HitRate", "ActiveEntries"} - for _, name := range metricNames { - mc.averagedMetrics[name] = &MetricAverager{} - } + metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "Gets", "Puts", "Hits", "ActiveEntries", "ExpiredEntries", "Rewrites"} // Initialize instantMetrics for each shard with MetricAverager instances shards := config.Metadata["shards"].(int) for shardIdx := 0; shardIdx < shards; shardIdx++ { - mc.instantMetrics[shardIdx] = make(map[string]*MetricAverager) + mc.instantMetrics[shardIdx] = make(map[string]int64) for _, name := range metricNames { - mc.instantMetrics[shardIdx][name] = &MetricAverager{} + mc.instantMetrics[shardIdx][name] = 0 } } @@ -169,18 +166,21 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics // Start begins collecting metrics from all channels func (mc *MetricsCollector) Start() { // Start a goroutine for each metric channel - mc.wg.Add(10) - - go mc.collectShardDuration(mc.channels.RP99, "RP99") - go mc.collectShardDuration(mc.channels.RP50, "RP50") - go mc.collectShardDuration(mc.channels.RP25, "RP25") - go mc.collectShardDuration(mc.channels.WP99, "WP99") - go mc.collectShardDuration(mc.channels.WP50, "WP50") - go mc.collectShardDuration(mc.channels.WP25, "WP25") - go mc.collectShardMetric(mc.channels.RThroughput, "RThroughput") - go mc.collectShardMetric(mc.channels.WThroughput, "WThroughput") - go mc.collectShardMetric(mc.channels.HitRate, "HitRate") + mc.wg.Add(12) + + go mc.collectShardMetric(mc.channels.RP99, "RP99") + go mc.collectShardMetric(mc.channels.RP50, "RP50") + go mc.collectShardMetric(mc.channels.RP25, "RP25") + go mc.collectShardMetric(mc.channels.WP99, "WP99") + go mc.collectShardMetric(mc.channels.WP50, "WP50") + go mc.collectShardMetric(mc.channels.WP25, "WP25") + go mc.collectShardMetric(mc.channels.ActiveEntries, "ActiveEntries") + go mc.collectShardMetric(mc.channels.ExpiredEntries, "ExpiredEntries") + go mc.collectShardMetric(mc.channels.Rewrites, "Rewrites") + go mc.collectShardMetric(mc.channels.Gets, "Gets") + go mc.collectShardMetric(mc.channels.Puts, "Puts") + go mc.collectShardMetric(mc.channels.Hits, "Hits") } func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { @@ -193,27 +193,9 @@ func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name st if !ok { return } - instants := mc.instantMetrics[sv.ShardIdx] - instants[name].Add(sv.Value) - mc.averagedMetrics[name].Add(sv.Value) - } - } -} + mc.instantMetrics[sv.ShardIdx][name] = sv.value -func (mc *MetricsCollector) collectShardDuration(ch chan ShardDurationValue, name string) { - defer mc.wg.Done() - for { - select { - case <-mc.stopCh: - return - case sv, ok := <-ch: - if !ok { - return - } - instants := mc.instantMetrics[sv.ShardIdx] - instants[name].AddDuration(sv.Value) - mc.averagedMetrics[name].AddDuration(sv.Value) } } } @@ -221,7 +203,7 @@ func (mc *MetricsCollector) collectShardDuration(ch chan ShardDurationValue, nam // RecordRP99 sends a value to the RP99 channel for a specific shard func (mc *MetricsCollector) RecordRP99(shardIdx int, value time.Duration) { select { - case mc.channels.RP99 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.RP99 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: // Don't block if channel is full } } @@ -229,7 +211,7 @@ func (mc *MetricsCollector) RecordRP99(shardIdx int, value time.Duration) { // RecordRP50 sends a value to the RP50 channel for a specific shard func (mc *MetricsCollector) RecordRP50(shardIdx int, value time.Duration) { select { - case mc.channels.RP50 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.RP50 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: } } @@ -237,7 +219,7 @@ func (mc *MetricsCollector) RecordRP50(shardIdx int, value time.Duration) { // RecordRP25 sends a value to the RP25 channel for a specific shard func (mc *MetricsCollector) RecordRP25(shardIdx int, value time.Duration) { select { - case mc.channels.RP25 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.RP25 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: } } @@ -245,7 +227,7 @@ func (mc *MetricsCollector) RecordRP25(shardIdx int, value time.Duration) { // RecordWP99 sends a value to the WP99 channel for a specific shard func (mc *MetricsCollector) RecordWP99(shardIdx int, value time.Duration) { select { - case mc.channels.WP99 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.WP99 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: } } @@ -253,7 +235,7 @@ func (mc *MetricsCollector) RecordWP99(shardIdx int, value time.Duration) { // RecordWP50 sends a value to the WP50 channel for a specific shard func (mc *MetricsCollector) RecordWP50(shardIdx int, value time.Duration) { select { - case mc.channels.WP50 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.WP50 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: } } @@ -261,39 +243,55 @@ func (mc *MetricsCollector) RecordWP50(shardIdx int, value time.Duration) { // RecordWP25 sends a value to the WP25 channel for a specific shard func (mc *MetricsCollector) RecordWP25(shardIdx int, value time.Duration) { select { - case mc.channels.WP25 <- ShardDurationValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.WP25 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: default: } } -// RecordRThroughput sends a value to the RThroughput channel for a specific shard -func (mc *MetricsCollector) RecordRThroughput(shardIdx int, value float64) { +// RecordGets sends a value to the Gets channel for a specific shard +func (mc *MetricsCollector) RecordGets(shardIdx int, value int64) { select { - case mc.channels.RThroughput <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.Gets <- ShardMetricValue{ShardIdx: shardIdx, value: value}: default: } } -// RecordWThroughput sends a value to the WThroughput channel for a specific shard -func (mc *MetricsCollector) RecordWThroughput(shardIdx int, value float64) { +// RecordPuts sends a value to the Puts channel for a specific shard +func (mc *MetricsCollector) RecordPuts(shardIdx int, value int64) { select { - case mc.channels.WThroughput <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.Puts <- ShardMetricValue{ShardIdx: shardIdx, value: value}: default: } } -// RecordHitRate sends a value to the HitRate channel for a specific shard -func (mc *MetricsCollector) RecordHitRate(shardIdx int, value float64) { +// RecordHits sends a value to the Hits channel for a specific shard +func (mc *MetricsCollector) RecordHits(shardIdx int, value int64) { select { - case mc.channels.HitRate <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.Hits <- ShardMetricValue{ShardIdx: shardIdx, value: value}: default: } } // RecordActiveEntries sends a value to the ActiveEntries channel for a specific shard -func (mc *MetricsCollector) RecordActiveEntries(shardIdx int, value float64) { +func (mc *MetricsCollector) RecordActiveEntries(shardIdx int, value int64) { + select { + case mc.channels.ActiveEntries <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +// RecordExpiredEntries sends a value to the ExpiredEntries channel for a specific shard +func (mc *MetricsCollector) RecordExpiredEntries(shardIdx int, value int64) { select { - case mc.channels.ActiveEntries <- ShardMetricValue{ShardIdx: shardIdx, Value: value}: + case mc.channels.ExpiredEntries <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +// RecordRewrites sends a value to the Rewrites channel for a specific shard +func (mc *MetricsCollector) RecordRewrites(shardIdx int, value int64) { + select { + case mc.channels.Rewrites <- ShardMetricValue{ShardIdx: shardIdx, value: value}: default: } } @@ -309,51 +307,24 @@ func (mc *MetricsCollector) GetMetrics() RunMetrics { for shardIdx := 0; shardIdx < shards; shardIdx++ { if instants, exists := mc.instantMetrics[shardIdx]; exists { shardMetrics[shardIdx] = ShardMetrics{ - RP99: time.Duration(instants["RP99"].Latest()), - RP50: time.Duration(instants["RP50"].Latest()), - RP25: time.Duration(instants["RP25"].Latest()), - WP99: time.Duration(instants["WP99"].Latest()), - WP50: time.Duration(instants["WP50"].Latest()), - WP25: time.Duration(instants["WP25"].Latest()), - RThroughput: instants["RThroughput"].Latest(), - WThroughput: instants["WThroughput"].Latest(), - HitRate: instants["HitRate"].Latest(), - ActiveEntries: instants["ActiveEntries"].Latest(), + RP99: time.Duration(instants["RP99"]), + RP50: time.Duration(instants["RP50"]), + RP25: time.Duration(instants["RP25"]), + WP99: time.Duration(instants["WP99"]), + WP50: time.Duration(instants["WP50"]), + WP25: time.Duration(instants["WP25"]), + Gets: instants["Gets"], + Puts: instants["Puts"], + Hits: instants["Hits"], + ActiveEntries: instants["ActiveEntries"], + ExpiredEntries: instants["ExpiredEntries"], + Rewrites: instants["Rewrites"], } } } - averagedMetrics := ShardMetrics{ - RP99: time.Duration(mc.averagedMetrics["RP99"].Average()), - RP50: time.Duration(mc.averagedMetrics["RP50"].Average()), - RP25: time.Duration(mc.averagedMetrics["RP25"].Average()), - WP99: time.Duration(mc.averagedMetrics["WP99"].Average()), - WP50: time.Duration(mc.averagedMetrics["WP50"].Average()), - WP25: time.Duration(mc.averagedMetrics["WP25"].Average()), - RThroughput: mc.averagedMetrics["RThroughput"].Average(), - WThroughput: mc.averagedMetrics["WThroughput"].Average(), - HitRate: mc.averagedMetrics["HitRate"].Average(), - ActiveEntries: mc.averagedMetrics["ActiveEntries"].Average(), - } - return RunMetrics{ - ShardMetrics: shardMetrics, - AveragedMetrics: averagedMetrics, - } -} - -// ResetAverages resets all averagers to start fresh -func (mc *MetricsCollector) ResetAverages() { - mc.mu.Lock() - defer mc.mu.Unlock() - - for _, shardInstant := range mc.instantMetrics { - for _, instantMetric := range shardInstant { - instantMetric.Reset() // Reset the instant metric - } - } - for _, averagedMetric := range mc.averagedMetrics { - averagedMetric.Reset() // Reset the averaged metric + ShardMetrics: shardMetrics, } } diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 19d6f7bd..73382333 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -6,19 +6,22 @@ import ( ) const ( - KEY_READ_LATENCY = "flashringread_latency" - KEY_WRITE_LATENCY = "flashringwrite_latency" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" - KEY_ACTIVE_ENTRIES = "flashring_active_entries" - + KEY_READ_LATENCY = "flashringread_latency" + KEY_WRITE_LATENCY = "flashringwrite_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" + KEY_EXPIRED_ENTRIES = "flashring_expired_entries" + KEY_REWRITES = "flashring_rewrites" + KEY_GETS = "flashring_gets" + KEY_PUTS = "flashring_puts" + KEY_HITS = "flashring_hits" TAG_LATENCY_PERCENTILE = "latency_percentile" TAG_VALUE_P25 = "p25" TAG_VALUE_P50 = "p50" TAG_VALUE_P99 = "p99" - - TAG_SHARD_IDX = "shard_idx" + TAG_SHARD_IDX = "shard_idx" ) func RunStatsdLogger(metricsCollector *MetricsCollector) { @@ -38,17 +41,22 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { for idx, shard := range currentMetrics.ShardMetrics { shardIdx := strconv.Itoa(idx) + shardBuildTag := NewTag(TAG_SHARD_IDX, shardIdx) + + Count(KEY_ACTIVE_ENTRIES, shard.ActiveEntries, BuildTag(shardBuildTag)) + Count(KEY_EXPIRED_ENTRIES, shard.ExpiredEntries, BuildTag(shardBuildTag)) + Count(KEY_REWRITES, shard.Rewrites, BuildTag(shardBuildTag)) + Count(KEY_GETS, shard.Gets, BuildTag(shardBuildTag)) + Count(KEY_PUTS, shard.Puts, BuildTag(shardBuildTag)) + Count(KEY_HITS, shard.Hits, BuildTag(shardBuildTag)) + + Timing(KEY_READ_LATENCY, shard.RP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), shardBuildTag)) + Timing(KEY_READ_LATENCY, shard.RP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) + Timing(KEY_READ_LATENCY, shard.RP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), shardBuildTag)) + Timing(KEY_WRITE_LATENCY, shard.WP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), shardBuildTag)) + Timing(KEY_WRITE_LATENCY, shard.WP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) + Timing(KEY_WRITE_LATENCY, shard.WP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), shardBuildTag)) - Timing(KEY_READ_LATENCY, shard.RP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), NewTag(TAG_SHARD_IDX, shardIdx))) - Timing(KEY_READ_LATENCY, shard.RP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), NewTag(TAG_SHARD_IDX, shardIdx))) - Timing(KEY_READ_LATENCY, shard.RP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), NewTag(TAG_SHARD_IDX, shardIdx))) - Timing(KEY_WRITE_LATENCY, shard.WP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), NewTag(TAG_SHARD_IDX, shardIdx))) - Timing(KEY_WRITE_LATENCY, shard.WP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), NewTag(TAG_SHARD_IDX, shardIdx))) - Timing(KEY_WRITE_LATENCY, shard.WP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), NewTag(TAG_SHARD_IDX, shardIdx))) - Gauge(KEY_RTHROUGHPUT, shard.RThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) - Gauge(KEY_WTHROUGHPUT, shard.WThroughput, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) - Gauge(KEY_HITRATE, shard.HitRate, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) - Gauge(KEY_ACTIVE_ENTRIES, shard.ActiveEntries, BuildTag(NewTag(TAG_SHARD_IDX, shardIdx))) } } From f76e9e8c7ca48d5bd0718b3274530f25531de856 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Tue, 13 Jan 2026 21:54:36 +0000 Subject: [PATCH 09/53] fix metrics --- flashring/pkg/metrics/runmetrics.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flashring/pkg/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go index 03891c7e..1ca28f52 100644 --- a/flashring/pkg/metrics/runmetrics.go +++ b/flashring/pkg/metrics/runmetrics.go @@ -193,9 +193,9 @@ func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name st if !ok { return } - + mc.mu.Lock() mc.instantMetrics[sv.ShardIdx][name] = sv.value - + mc.mu.Unlock() } } } From 626ded40bb35994b1501ec6ea844330821e09c76 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Tue, 13 Jan 2026 22:55:17 +0000 Subject: [PATCH 10/53] fix metrics --- flashring/pkg/metrics/statsd_logger.go | 27 ++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 73382333..3038ee6d 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -31,6 +31,14 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() + //prev values per shard + prevActiveEntries := make(map[int]int64) + prevExpiredEntries := make(map[int]int64) + prevRewrites := make(map[int]int64) + prevGets := make(map[int]int64) + prevPuts := make(map[int]int64) + prevHits := make(map[int]int64) + for { select { case <-metricsCollector.stopCh: @@ -43,12 +51,12 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { shardIdx := strconv.Itoa(idx) shardBuildTag := NewTag(TAG_SHARD_IDX, shardIdx) - Count(KEY_ACTIVE_ENTRIES, shard.ActiveEntries, BuildTag(shardBuildTag)) - Count(KEY_EXPIRED_ENTRIES, shard.ExpiredEntries, BuildTag(shardBuildTag)) - Count(KEY_REWRITES, shard.Rewrites, BuildTag(shardBuildTag)) - Count(KEY_GETS, shard.Gets, BuildTag(shardBuildTag)) - Count(KEY_PUTS, shard.Puts, BuildTag(shardBuildTag)) - Count(KEY_HITS, shard.Hits, BuildTag(shardBuildTag)) + Count(KEY_ACTIVE_ENTRIES, shard.ActiveEntries-prevActiveEntries[idx], BuildTag(shardBuildTag)) + Count(KEY_EXPIRED_ENTRIES, shard.ExpiredEntries-prevExpiredEntries[idx], BuildTag(shardBuildTag)) + Count(KEY_REWRITES, shard.Rewrites-prevRewrites[idx], BuildTag(shardBuildTag)) + Count(KEY_GETS, shard.Gets-prevGets[idx], BuildTag(shardBuildTag)) + Count(KEY_PUTS, shard.Puts-prevPuts[idx], BuildTag(shardBuildTag)) + Count(KEY_HITS, shard.Hits-prevHits[idx], BuildTag(shardBuildTag)) Timing(KEY_READ_LATENCY, shard.RP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), shardBuildTag)) Timing(KEY_READ_LATENCY, shard.RP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) @@ -57,6 +65,13 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { Timing(KEY_WRITE_LATENCY, shard.WP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) Timing(KEY_WRITE_LATENCY, shard.WP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), shardBuildTag)) + prevActiveEntries[idx] = shard.ActiveEntries + prevExpiredEntries[idx] = shard.ExpiredEntries + prevRewrites[idx] = shard.Rewrites + prevGets[idx] = shard.Gets + prevPuts[idx] = shard.Puts + prevHits[idx] = shard.Hits + } } From 4eb459d4bd05d0bb3c582cff394ff5caf5737865 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Thu, 15 Jan 2026 10:04:06 +0000 Subject: [PATCH 11/53] include fine grained stats --- flashring/pkg/cache/cache.go | 9 ++ flashring/pkg/metrics/runmetrics.go | 118 ++++++++++++++++++++++++- flashring/pkg/metrics/statsd_logger.go | 63 ++++++++++--- 3 files changed, 176 insertions(+), 14 deletions(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 729001f5..87e9c0fe 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -227,6 +227,15 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m wc.metricsCollector.RecordPuts(i, int64(shardPuts)) wc.metricsCollector.RecordHits(i, int64(shardHits)) + //shard level index and rb data + wc.shards[i].Stats.KeyNotFoundCount.Load() + wc.shards[i].Stats.KeyExpiredCount.Load() + wc.shards[i].Stats.BadDataCount.Load() + wc.shards[i].Stats.BadLengthCount.Load() + wc.shards[i].Stats.BadCR32Count.Load() + wc.shards[i].Stats.BadKeyCount.Load() + wc.shards[i].Stats.DeletedKeyCount.Load() + } log.Info().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) diff --git a/flashring/pkg/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go index 1ca28f52..f5ffd798 100644 --- a/flashring/pkg/metrics/runmetrics.go +++ b/flashring/pkg/metrics/runmetrics.go @@ -26,6 +26,15 @@ type MetricsRecorder interface { RecordWP99(shardIdx int, value time.Duration) RecordWP50(shardIdx int, value time.Duration) RecordWP25(shardIdx int, value time.Duration) + + //shard level index and rb data + RecordKeyNotFoundCount(shardIdx int, value int64) + RecordKeyExpiredCount(shardIdx int, value int64) + RecordBadDataCount(shardIdx int, value int64) + RecordBadLengthCount(shardIdx int, value int64) + RecordBadCR32Count(shardIdx int, value int64) + RecordBadKeyCount(shardIdx int, value int64) + RecordDeletedKeyCount(shardIdx int, value int64) } type MetricsCollectorConfig struct { @@ -59,10 +68,21 @@ type ShardMetrics struct { WP25 time.Duration } +type ShardIndexMetrics struct { + KeyNotFoundCount int64 + KeyExpiredCount int64 + BadDataCount int64 + BadLengthCount int64 + BadCR32Count int64 + BadKeyCount int64 + DeletedKeyCount int64 +} + // Define your parameter structure type RunMetrics struct { // Per-shard observation parameters - ShardMetrics []ShardMetrics + ShardMetrics []ShardMetrics + ShardIndexMetrics []ShardIndexMetrics } // ShardMetricValue represents a metric value for a specific shard @@ -85,6 +105,15 @@ type MetricChannels struct { WP99 chan ShardMetricValue WP50 chan ShardMetricValue WP25 chan ShardMetricValue + + KeyNotFoundCount chan ShardMetricValue + KeyExpiredCount chan ShardMetricValue + BadDataCount chan ShardMetricValue + BadLengthCount chan ShardMetricValue + BadCR32Count chan ShardMetricValue + BadKeyCount chan ShardMetricValue + DeletedKeyCount chan ShardMetricValue + BadCRCMemIds chan ShardMetricValue } // MetricsCollector collects and averages all metrics (per-shard) @@ -142,6 +171,14 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics WP99: make(chan ShardMetricValue, bufferSize), WP50: make(chan ShardMetricValue, bufferSize), WP25: make(chan ShardMetricValue, bufferSize), + + KeyNotFoundCount: make(chan ShardMetricValue, bufferSize), + KeyExpiredCount: make(chan ShardMetricValue, bufferSize), + BadDataCount: make(chan ShardMetricValue, bufferSize), + BadLengthCount: make(chan ShardMetricValue, bufferSize), + BadCR32Count: make(chan ShardMetricValue, bufferSize), + BadKeyCount: make(chan ShardMetricValue, bufferSize), + DeletedKeyCount: make(chan ShardMetricValue, bufferSize), }, instantMetrics: make(map[int]map[string]int64), @@ -158,6 +195,14 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics for _, name := range metricNames { mc.instantMetrics[shardIdx][name] = 0 } + + mc.instantMetrics[shardIdx]["KeyNotFoundCount"] = 0 + mc.instantMetrics[shardIdx]["KeyExpiredCount"] = 0 + mc.instantMetrics[shardIdx]["BadDataCount"] = 0 + mc.instantMetrics[shardIdx]["BadLengthCount"] = 0 + mc.instantMetrics[shardIdx]["BadCR32Count"] = 0 + mc.instantMetrics[shardIdx]["BadKeyCount"] = 0 + mc.instantMetrics[shardIdx]["DeletedKeyCount"] = 0 } return mc @@ -181,6 +226,14 @@ func (mc *MetricsCollector) Start() { go mc.collectShardMetric(mc.channels.Gets, "Gets") go mc.collectShardMetric(mc.channels.Puts, "Puts") go mc.collectShardMetric(mc.channels.Hits, "Hits") + + go mc.collectShardMetric(mc.channels.KeyNotFoundCount, "KeyNotFoundCount") + go mc.collectShardMetric(mc.channels.KeyExpiredCount, "KeyExpiredCount") + go mc.collectShardMetric(mc.channels.BadDataCount, "BadDataCount") + go mc.collectShardMetric(mc.channels.BadLengthCount, "BadLengthCount") + go mc.collectShardMetric(mc.channels.BadCR32Count, "BadCR32Count") + go mc.collectShardMetric(mc.channels.BadKeyCount, "BadKeyCount") + go mc.collectShardMetric(mc.channels.DeletedKeyCount, "DeletedKeyCount") } func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { @@ -296,6 +349,55 @@ func (mc *MetricsCollector) RecordRewrites(shardIdx int, value int64) { } } +func (mc *MetricsCollector) RecordKeyNotFoundCount(shardIdx int, value int64) { + select { + case mc.channels.KeyNotFoundCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordKeyExpiredCount(shardIdx int, value int64) { + select { + case mc.channels.KeyExpiredCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordBadDataCount(shardIdx int, value int64) { + select { + case mc.channels.BadDataCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordBadLengthCount(shardIdx int, value int64) { + select { + case mc.channels.BadLengthCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordBadCR32Count(shardIdx int, value int64) { + select { + case mc.channels.BadCR32Count <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordBadKeyCount(shardIdx int, value int64) { + select { + case mc.channels.BadKeyCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordDeletedKeyCount(shardIdx int, value int64) { + select { + case mc.channels.DeletedKeyCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + func (mc *MetricsCollector) GetMetrics() RunMetrics { mc.mu.RLock() defer mc.mu.RUnlock() @@ -304,6 +406,7 @@ func (mc *MetricsCollector) GetMetrics() RunMetrics { // Build per-shard metrics shardMetrics := make([]ShardMetrics, shards) + shardIndexMetrics := make([]ShardIndexMetrics, shards) for shardIdx := 0; shardIdx < shards; shardIdx++ { if instants, exists := mc.instantMetrics[shardIdx]; exists { shardMetrics[shardIdx] = ShardMetrics{ @@ -320,11 +423,22 @@ func (mc *MetricsCollector) GetMetrics() RunMetrics { ExpiredEntries: instants["ExpiredEntries"], Rewrites: instants["Rewrites"], } + + shardIndexMetrics[shardIdx] = ShardIndexMetrics{ + KeyNotFoundCount: instants["KeyNotFoundCount"], + KeyExpiredCount: instants["KeyExpiredCount"], + BadDataCount: instants["BadDataCount"], + BadLengthCount: instants["BadLengthCount"], + BadCR32Count: instants["BadCR32Count"], + BadKeyCount: instants["BadKeyCount"], + DeletedKeyCount: instants["DeletedKeyCount"], + } } } return RunMetrics{ - ShardMetrics: shardMetrics, + ShardMetrics: shardMetrics, + ShardIndexMetrics: shardIndexMetrics, } } diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 3038ee6d..b51c840a 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -6,17 +6,27 @@ import ( ) const ( - KEY_READ_LATENCY = "flashringread_latency" - KEY_WRITE_LATENCY = "flashringwrite_latency" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" - KEY_ACTIVE_ENTRIES = "flashring_active_entries" - KEY_EXPIRED_ENTRIES = "flashring_expired_entries" - KEY_REWRITES = "flashring_rewrites" - KEY_GETS = "flashring_gets" - KEY_PUTS = "flashring_puts" - KEY_HITS = "flashring_hits" + KEY_READ_LATENCY = "flashringread_latency" + KEY_WRITE_LATENCY = "flashringwrite_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" + KEY_EXPIRED_ENTRIES = "flashring_expired_entries" + KEY_REWRITES = "flashring_rewrites" + KEY_GETS = "flashring_gets" + KEY_PUTS = "flashring_puts" + KEY_HITS = "flashring_hits" + + KEY_KEY_NOT_FOUND_COUNT = "flashring_key_not_found_count" + KEY_KEY_EXPIRED_COUNT = "flashring_key_expired_count" + KEY_BAD_DATA_COUNT = "flashring_bad_data_count" + KEY_BAD_LENGTH_COUNT = "flashring_bad_length_count" + KEY_BAD_CR32_COUNT = "flashring_bad_cr32_count" + KEY_BAD_KEY_COUNT = "flashring_bad_key_count" + KEY_DELETED_KEY_COUNT = "flashring_deleted_key_count" + KEY_BAD_CRC_MEM_IDS = "flashring_bad_crc_mem_ids" + TAG_LATENCY_PERCENTILE = "latency_percentile" TAG_VALUE_P25 = "p25" TAG_VALUE_P50 = "p50" @@ -39,6 +49,14 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { prevPuts := make(map[int]int64) prevHits := make(map[int]int64) + prevKeyNotFoundCount := make(map[int]int64) + prevKeyExpiredCount := make(map[int]int64) + prevBadDataCount := make(map[int]int64) + prevBadLengthCount := make(map[int]int64) + prevBadCR32Count := make(map[int]int64) + prevBadKeyCount := make(map[int]int64) + prevDeletedKeyCount := make(map[int]int64) + for { select { case <-metricsCollector.stopCh: @@ -74,7 +92,28 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { } - } + for idx, shard := range currentMetrics.ShardIndexMetrics { + shardIdx := strconv.Itoa(idx) + shardBuildTag := NewTag(TAG_SHARD_IDX, shardIdx) + Count(KEY_KEY_NOT_FOUND_COUNT, shard.KeyNotFoundCount-prevKeyNotFoundCount[idx], BuildTag(shardBuildTag)) + Count(KEY_KEY_EXPIRED_COUNT, shard.KeyExpiredCount-prevKeyExpiredCount[idx], BuildTag(shardBuildTag)) + Count(KEY_BAD_DATA_COUNT, shard.BadDataCount-prevBadDataCount[idx], BuildTag(shardBuildTag)) + Count(KEY_BAD_LENGTH_COUNT, shard.BadLengthCount-prevBadLengthCount[idx], BuildTag(shardBuildTag)) + Count(KEY_BAD_CR32_COUNT, shard.BadCR32Count-prevBadCR32Count[idx], BuildTag(shardBuildTag)) + Count(KEY_BAD_KEY_COUNT, shard.BadKeyCount-prevBadKeyCount[idx], BuildTag(shardBuildTag)) + Count(KEY_DELETED_KEY_COUNT, shard.DeletedKeyCount-prevDeletedKeyCount[idx], BuildTag(shardBuildTag)) + + prevKeyNotFoundCount[idx] = shard.KeyNotFoundCount + prevKeyExpiredCount[idx] = shard.KeyExpiredCount + prevBadDataCount[idx] = shard.BadDataCount + prevBadLengthCount[idx] = shard.BadLengthCount + prevBadCR32Count[idx] = shard.BadCR32Count + prevBadKeyCount[idx] = shard.BadKeyCount + prevDeletedKeyCount[idx] = shard.DeletedKeyCount + + } + + } } } From 54ddd211b4b8a4329097aeff5ecd2e1cf5d258d2 Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Mon, 19 Jan 2026 08:06:54 +0000 Subject: [PATCH 12/53] correct stats change filesize multiplier in plans --- flashring/cmd/flashringtest/plan_lockless.go | 4 +- .../cmd/flashringtest/plan_random_gausian.go | 2 +- .../flashringtest/plan_readthrough_gausian.go | 10 ++-- .../plan_readthrough_gausian_batched.go | 2 +- flashring/pkg/cache/cache.go | 16 +++--- flashring/pkg/metrics/console_logger.go | 53 +++++++++++++++++-- flashring/pkg/metrics/statsd_logger.go | 1 - 7 files changed, 67 insertions(+), 21 deletions(-) diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index 3203db23..1164602a 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -38,7 +38,7 @@ func planLockless() { flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme", "data directory for shard files") flag.IntVar(&numShards, "shards", 100, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 10_00_00, "keys per shard") + flag.IntVar(&keysPerShard, "keys-per-shard", 50_00_00, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard") flag.IntVar(&readWorkers, "readers", 8, "number of read workers") @@ -85,7 +85,7 @@ func planLockless() { } memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 - fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes) + fileSizeInBytes := int64(fileSizeMultiplier) * 1024 * 1024 * 1024 // fileSizeMultiplier in GiB cfg := cachepkg.WrapCacheConfig{ NumShards: numShards, diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go index ffa493a6..719a8106 100644 --- a/flashring/cmd/flashringtest/plan_random_gausian.go +++ b/flashring/cmd/flashringtest/plan_random_gausian.go @@ -84,7 +84,7 @@ func planRandomGaussian() { } memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 - fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes) + fileSizeInBytes := int64(fileSizeMultiplier) * 1024 * 1024 * 1024 // fileSizeMultiplier in GiB cfg := cachepkg.WrapCacheConfig{ NumShards: numShards, diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 29885572..a0d6f9c7 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -37,11 +37,11 @@ func planReadthroughGaussian() { cpuProfile string ) - flag.StringVar(&mountPoint, "mount", "/media/a0d00kc/trishul/", "data directory for shard files") - flag.IntVar(&numShards, "shards", 500, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 4_00_00, "keys per shard") + flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme/", "data directory for shard files") + flag.IntVar(&numShards, "shards", 100, "number of shards") + flag.IntVar(&keysPerShard, "keys-per-shard", 5_00_000, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") - flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard") + flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 1, "file size in GiB per shard") flag.IntVar(&readWorkers, "readers", 8, "number of read workers") flag.IntVar(&writeWorkers, "writers", 8, "number of write workers") flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds") @@ -86,7 +86,7 @@ func planReadthroughGaussian() { } memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 - fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes) + fileSizeInBytes := int64(fileSizeMultiplier) * 1024 * 1024 * 1024 // fileSizeMultiplier in GiB metricsConfig := metrics.MetricsCollectorConfig{ StatsEnabled: true, diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go index 21e6c0d2..d0b5e9c2 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go @@ -94,7 +94,7 @@ func planReadthroughGaussianBatched() { } memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 - fileSizeInBytes := int64(fileSizeMultiplier) * int64(memtableSizeInBytes) + fileSizeInBytes := int64(fileSizeMultiplier) * 1024 * 1024 * 1024 // fileSizeMultiplier in GiB cfg := cachepkg.WrapCacheConfig{ NumShards: numShards, diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 87e9c0fe..fa2db5dc 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -227,14 +227,14 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m wc.metricsCollector.RecordPuts(i, int64(shardPuts)) wc.metricsCollector.RecordHits(i, int64(shardHits)) - //shard level index and rb data - wc.shards[i].Stats.KeyNotFoundCount.Load() - wc.shards[i].Stats.KeyExpiredCount.Load() - wc.shards[i].Stats.BadDataCount.Load() - wc.shards[i].Stats.BadLengthCount.Load() - wc.shards[i].Stats.BadCR32Count.Load() - wc.shards[i].Stats.BadKeyCount.Load() - wc.shards[i].Stats.DeletedKeyCount.Load() + //shard level index and rb data - actually send to metrics collector! + wc.metricsCollector.RecordKeyNotFoundCount(i, wc.shards[i].Stats.KeyNotFoundCount.Load()) + wc.metricsCollector.RecordKeyExpiredCount(i, wc.shards[i].Stats.KeyExpiredCount.Load()) + wc.metricsCollector.RecordBadDataCount(i, wc.shards[i].Stats.BadDataCount.Load()) + wc.metricsCollector.RecordBadLengthCount(i, wc.shards[i].Stats.BadLengthCount.Load()) + wc.metricsCollector.RecordBadCR32Count(i, wc.shards[i].Stats.BadCR32Count.Load()) + wc.metricsCollector.RecordBadKeyCount(i, wc.shards[i].Stats.BadKeyCount.Load()) + wc.metricsCollector.RecordDeletedKeyCount(i, wc.shards[i].Stats.DeletedKeyCount.Load()) } diff --git a/flashring/pkg/metrics/console_logger.go b/flashring/pkg/metrics/console_logger.go index 6635f247..5affea62 100644 --- a/flashring/pkg/metrics/console_logger.go +++ b/flashring/pkg/metrics/console_logger.go @@ -66,10 +66,10 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { wp50 = wp50 / time.Duration(shards) wp25 = wp25 / time.Duration(shards) - rThroughput := float64(getsTotal-prevGetsTotal) / float64(30) - wThroughput := float64(putsTotal-prevPutsTotal) / float64(30) + rThroughput := int(float64(getsTotal-prevGetsTotal) / float64(30)) + wThroughput := int(float64(putsTotal-prevPutsTotal) / float64(30)) hitRate := float64(hitsTotal-prevHitsTotal) / float64(getsTotal-prevGetsTotal) - activeEntries := float64(activeEntriesTotal - prevActiveEntriesTotal) + activeEntries := float64(activeEntriesTotal-prevActiveEntriesTotal) / float64(30) expiredEntries := float64(expiredTotal - prevExpiredTotal) reWrites := float64(reWritesTotal - prevReWritesTotal) @@ -85,6 +85,53 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { log.Info().Msgf("ActiveEntries: %v", activeEntries) log.Info().Msgf("ExpiredEntries: %v", expiredEntries) log.Info().Msgf("ReWrites: %v", reWrites) + + keyNotFoundTotal := int64(0) + keyExpiredTotal := int64(0) + badDataTotal := int64(0) + badLengthTotal := int64(0) + badCR32Total := int64(0) + badKeyTotal := int64(0) + deletedKeyTotal := int64(0) + + for _, shard := range currentMetrics.ShardIndexMetrics { + keyNotFoundTotal += shard.KeyNotFoundCount + keyExpiredTotal += shard.KeyExpiredCount + badDataTotal += shard.BadDataCount + badLengthTotal += shard.BadLengthCount + badCR32Total += shard.BadCR32Count + badKeyTotal += shard.BadKeyCount + deletedKeyTotal += shard.DeletedKeyCount + } + + log.Info().Msgf("KeyNotFoundTotal: %v", keyNotFoundTotal) + log.Info().Msgf("KeyExpiredTotal: %v", keyExpiredTotal) + log.Info().Msgf("BadDataTotal: %v", badDataTotal) + log.Info().Msgf("BadLengthTotal: %v", badLengthTotal) + log.Info().Msgf("BadCR32Total: %v", badCR32Total) + log.Info().Msgf("BadKeyTotal: %v", badKeyTotal) + log.Info().Msgf("DeletedKeyTotal: %v", deletedKeyTotal) + + // Debug: Log cumulative totals to understand the issue + log.Info().Msgf("DEBUG - GetsTotal: %v, HitsTotal: %v, PutsTotal: %v, ActiveEntriesTotal: %v", getsTotal, hitsTotal, putsTotal, activeEntriesTotal) + + // Debug: Log per-shard ActiveEntries to check distribution (first 5 shards) + if len(currentMetrics.ShardMetrics) >= 5 { + log.Info().Msgf("DEBUG PER-SHARD ActiveEntries - shard0: %d, shard1: %d, shard2: %d, shard3: %d, shard4: %d", + currentMetrics.ShardMetrics[0].ActiveEntries, + currentMetrics.ShardMetrics[1].ActiveEntries, + currentMetrics.ShardMetrics[2].ActiveEntries, + currentMetrics.ShardMetrics[3].ActiveEntries, + currentMetrics.ShardMetrics[4].ActiveEntries) + } + + // Update prev values for next iteration + prevGetsTotal = getsTotal + prevPutsTotal = putsTotal + prevHitsTotal = hitsTotal + prevExpiredTotal = expiredTotal + prevReWritesTotal = reWritesTotal + prevActiveEntriesTotal = activeEntriesTotal } } } diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index b51c840a..d04da596 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -25,7 +25,6 @@ const ( KEY_BAD_CR32_COUNT = "flashring_bad_cr32_count" KEY_BAD_KEY_COUNT = "flashring_bad_key_count" KEY_DELETED_KEY_COUNT = "flashring_deleted_key_count" - KEY_BAD_CRC_MEM_IDS = "flashring_bad_crc_mem_ids" TAG_LATENCY_PERCENTILE = "latency_percentile" TAG_VALUE_P25 = "p25" From f5a1d6c60317b333e339ef4d3fc77367fd42952f Mon Sep 17 00:00:00 2001 From: nileshsolankimeesho Date: Mon, 19 Jan 2026 10:45:08 +0000 Subject: [PATCH 13/53] remove syncpool changes --- flashring/internal/shard/shard_cache.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 9e28b17d..927136bb 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -216,9 +216,8 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { memtableExists = false } if !memtableExists { - bufPtr := BufPool.Get().(*[]byte) - buf = *bufPtr - defer BufPool.Put(bufPtr) + // Allocate buffer of exact size needed - no pool since readFromDisk already copies once + buf = make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { @@ -232,7 +231,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { } } gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) - computedCR32 := crc32.ChecksumIEEE(buf[4:]) + computedCR32 := crc32.ChecksumIEEE(buf[4:length]) gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { fc.Stats.BadCR32Count.Add(1) @@ -327,10 +326,8 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) return fc.validateAndReturnBuffer(key, buf, length, memId, remainingTTL, shouldReWrite) } - // Read from disk - bufPtr := BufPool.Get().(*[]byte) - buf := *bufPtr - defer BufPool.Put(bufPtr) + // Read from disk - allocate buffer of exact size needed (no pool since readFromDisk already copies once) + buf := make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { @@ -344,7 +341,7 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) // validateAndReturnBuffer validates CRC and key, then returns the value func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uint16, memId uint32, remainingTTL uint16, shouldReWrite bool) (bool, []byte, uint16, bool, bool) { gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) - computedCR32 := crc32.ChecksumIEEE(buf[4:]) + computedCR32 := crc32.ChecksumIEEE(buf[4:length]) if gotCR32 != computedCR32 { fc.Stats.BadCR32Count.Add(1) fc.Stats.IncBadCRCMemIds(memId) From 1788d64c04d5678e70218ba1d5e3c25c76431685 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 21 Jan 2026 12:47:54 +0000 Subject: [PATCH 14/53] grid search fixes --- flashring/internal/maths/estimator.go | 7 +++++++ flashring/pkg/cache/cache.go | 23 ++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/flashring/internal/maths/estimator.go b/flashring/internal/maths/estimator.go index f477d96e..154298e1 100644 --- a/flashring/internal/maths/estimator.go +++ b/flashring/internal/maths/estimator.go @@ -5,6 +5,8 @@ package maths import ( "math" "time" + + "github.com/rs/zerolog/log" ) const ( @@ -75,6 +77,7 @@ func (g *GridSearchEstimator) RecordHitRate(hitRate float64) { stat.HitRate = (stat.HitRate*float64(stat.Trials) + hitRate) / float64(stat.Trials+1) stat.Trials++ if stat.HitRate < g.bestHitRate*0.9 { + log.Error().Msgf("GridSearchRestarted: hitRate %v bestHitRate %v", stat.HitRate, g.bestHitRate) g.RestartGridSearch() } return @@ -130,6 +133,10 @@ func (g *GridSearchEstimator) GenerateRefinedGrid(base WeightTuple, steps int, d refined := make([]WeightTuple, 0, (2*steps+1)*(2*steps+1)) for i := -steps; i <= steps; i++ { for j := -steps; j <= steps; j++ { + + if i == 0 && j == 0 { + continue + } wf := base.WFreq + float64(i)*delta la := base.WLA + float64(j)*delta if math.Abs(wf-base.WFreq) < g.epsilon && math.Abs(la-base.WLA) < g.epsilon { diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index fa2db5dc..981e382f 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -55,6 +55,10 @@ type CacheStats struct { ShardWiseActiveEntries atomic.Uint64 LatencyTracker *filecache.LatencyTracker BatchTracker *filecache.BatchTracker + + PrevHits atomic.Uint64 + PrevTotalGets atomic.Uint64 + timeStarted time.Time } type WrapCacheConfig struct { @@ -238,7 +242,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m } - log.Info().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) + log.Error().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) } }() } @@ -363,8 +367,8 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { keyFound, val, remainingTTL, expired, shouldReWrite = result.Found, result.Data, result.TTL, result.Expired, result.ShouldRewrite } else { - wc.shardLocks[shardIdx].RLock() - defer wc.shardLocks[shardIdx].RUnlock() + wc.shardLocks[shardIdx].Lock() + defer wc.shardLocks[shardIdx].Unlock() keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) } @@ -375,11 +379,20 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { wc.stats[shardIdx].Expired.Add(1) } wc.stats[shardIdx].TotalGets.Add(1) - if false && shouldReWrite { + if shouldReWrite { wc.stats[shardIdx].ReWrites.Add(1) wc.putLocked(shardIdx, h32, key, val, remainingTTL) } - wc.predictor.Observe(float64(wc.stats[shardIdx].Hits.Load()) / float64(wc.stats[shardIdx].TotalGets.Load())) + + if time.Since(wc.stats[shardIdx].timeStarted) > 10*time.Second { + //observing hit rate every call can be avoided because average remains the same + hitRate := float64(wc.stats[shardIdx].Hits.Load()-wc.stats[shardIdx].PrevHits.Load()) / float64(wc.stats[shardIdx].TotalGets.Load()-wc.stats[shardIdx].PrevTotalGets.Load()) + wc.predictor.Observe(hitRate) + + wc.stats[shardIdx].timeStarted = time.Now() + wc.stats[shardIdx].PrevHits.Store(wc.stats[shardIdx].Hits.Load()) + wc.stats[shardIdx].PrevTotalGets.Store(wc.stats[shardIdx].TotalGets.Load()) + } return val, keyFound, expired } From 97da61eb776e054b75fc90dcedd39cd7c55724dc Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 22 Jan 2026 08:10:24 +0000 Subject: [PATCH 15/53] clear files at mountpoint on start --- flashring/pkg/cache/cache.go | 13 ++++++ flashring/pkg/metrics/metrics_averager.go | 52 ----------------------- 2 files changed, 13 insertions(+), 52 deletions(-) delete mode 100644 flashring/pkg/metrics/metrics_averager.go diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 981e382f..d675975b 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -2,6 +2,8 @@ package internal import ( "fmt" + "os" + "path/filepath" "strconv" "sync" "sync/atomic" @@ -110,6 +112,17 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m if config.FileSize%BLOCK_SIZE != 0 { return nil, ErrFileSizeNotMultipleOf4KB } + + //clear existing data + files, err := os.ReadDir(mountPoint) + if err != nil { + log.Error().Err(err).Msg("Failed to read directory") + panic(err) + } + for _, file := range files { + os.Remove(filepath.Join(mountPoint, file.Name())) + } + weights := []maths.WeightTuple{ { WFreq: 0.1, diff --git a/flashring/pkg/metrics/metrics_averager.go b/flashring/pkg/metrics/metrics_averager.go deleted file mode 100644 index 955cf1cb..00000000 --- a/flashring/pkg/metrics/metrics_averager.go +++ /dev/null @@ -1,52 +0,0 @@ -package metrics - -import ( - "sync" - "time" -) - -// MetricAverager maintains running averages for a metric -type MetricAverager struct { - mu sync.RWMutex - sum float64 - count int64 - lastValue float64 -} - -func (ma *MetricAverager) Add(value float64) { - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum += value - ma.count++ - ma.lastValue = value -} - -func (ma *MetricAverager) AddDuration(value time.Duration) { - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum += float64(value) - ma.count++ - ma.lastValue = float64(value) -} - -func (ma *MetricAverager) Average() float64 { - ma.mu.RLock() - defer ma.mu.RUnlock() - if ma.count == 0 { - return 0 - } - return ma.sum / float64(ma.count) -} - -func (ma *MetricAverager) Latest() float64 { - ma.mu.RLock() - defer ma.mu.RUnlock() - return ma.lastValue -} - -func (ma *MetricAverager) Reset() { - ma.mu.Lock() - defer ma.mu.Unlock() - ma.sum = 0 - ma.count = 0 -} From 56a508083ace74b599b7b8e657ced0c6994e533c Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 22 Jan 2026 19:39:48 +0000 Subject: [PATCH 16/53] fixed delete manager and added file stats --- .../internal/indicesV3/delete_manager.go | 5 ++- flashring/internal/shard/shard_cache.go | 4 +++ flashring/pkg/cache/cache.go | 36 ++++++++++++++----- flashring/pkg/metrics/runmetrics.go | 33 +++++++++++++++++ flashring/pkg/metrics/statsd_logger.go | 11 +++++- 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/flashring/internal/indicesV3/delete_manager.go b/flashring/internal/indicesV3/delete_manager.go index 6b218915..977bf3ee 100644 --- a/flashring/internal/indicesV3/delete_manager.go +++ b/flashring/internal/indicesV3/delete_manager.go @@ -69,7 +69,10 @@ func (dm *DeleteManager) ExecuteDeleteIfNeeded() error { if memIdAtHead != dm.toBeDeletedMemId { return fmt.Errorf("memIdAtHead: %d, toBeDeletedMemId: %d", memIdAtHead, dm.toBeDeletedMemId) } - dm.wrapFile.TrimHead() + + if trimNeeded { + dm.wrapFile.TrimHead() + } return nil } return nil diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 927136bb..3c6da3ff 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -400,3 +400,7 @@ func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadR Data: value, } } + +func (fc *ShardCache) GetFileStat() *fs.Stat { + return fc.file.Stat +} diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index d675975b..b8c5f444 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -253,6 +253,10 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m wc.metricsCollector.RecordBadKeyCount(i, wc.shards[i].Stats.BadKeyCount.Load()) wc.metricsCollector.RecordDeletedKeyCount(i, wc.shards[i].Stats.DeletedKeyCount.Load()) + //wrapAppendFilt stats + wc.metricsCollector.RecordWriteCount(i, wc.shards[i].GetFileStat().WriteCount) + wc.metricsCollector.RecordPunchHoleCount(i, wc.shards[i].GetFileStat().PunchHoleCount) + } log.Error().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) @@ -344,16 +348,14 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro wc.shardLocks[shardIdx].Lock() defer wc.shardLocks[shardIdx].Unlock() - wc.putLocked(shardIdx, h32, key, value, exptimeInMinutes) - return nil -} -func (wc *WrapCache) putLocked(shardIdx uint32, h32 uint32, key string, value []byte, exptimeInMinutes uint16) { wc.shards[shardIdx].Put(key, value, exptimeInMinutes) wc.stats[shardIdx].TotalPuts.Add(1) if h32%100 < 10 { wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries())) } + + return nil } func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { @@ -367,6 +369,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { var keyFound bool var val []byte + var valCopy []byte var remainingTTL uint16 var expired bool var shouldReWrite bool @@ -377,12 +380,27 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { Result: reqChan, } result := <-reqChan - keyFound, val, remainingTTL, expired, shouldReWrite = result.Found, result.Data, result.TTL, result.Expired, result.ShouldRewrite + if shouldReWrite { + valCopy = make([]byte, len(val)) + copy(valCopy, val) + } } else { - wc.shardLocks[shardIdx].Lock() - defer wc.shardLocks[shardIdx].Unlock() - keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) + + func(key string, shardIdx uint32) { + wc.shardLocks[shardIdx].RLock() + defer wc.shardLocks[shardIdx].RUnlock() + keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) + + if shouldReWrite { + //copy val into a safe variable because we are unlocking the shard + // at the end of anon function execution + valCopy = make([]byte, len(val)) + copy(valCopy, val) + val = valCopy + } + }(key, shardIdx) + } if keyFound && !expired { @@ -394,7 +412,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { wc.stats[shardIdx].TotalGets.Add(1) if shouldReWrite { wc.stats[shardIdx].ReWrites.Add(1) - wc.putLocked(shardIdx, h32, key, val, remainingTTL) + wc.Put(key, valCopy, remainingTTL) } if time.Since(wc.stats[shardIdx].timeStarted) > 10*time.Second { diff --git a/flashring/pkg/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go index f5ffd798..1587ce12 100644 --- a/flashring/pkg/metrics/runmetrics.go +++ b/flashring/pkg/metrics/runmetrics.go @@ -76,6 +76,9 @@ type ShardIndexMetrics struct { BadCR32Count int64 BadKeyCount int64 DeletedKeyCount int64 + + WriteCount int64 + PunchHoleCount int64 } // Define your parameter structure @@ -114,6 +117,9 @@ type MetricChannels struct { BadKeyCount chan ShardMetricValue DeletedKeyCount chan ShardMetricValue BadCRCMemIds chan ShardMetricValue + + WriteCount chan ShardMetricValue + PunchHoleCount chan ShardMetricValue } // MetricsCollector collects and averages all metrics (per-shard) @@ -179,6 +185,9 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics BadCR32Count: make(chan ShardMetricValue, bufferSize), BadKeyCount: make(chan ShardMetricValue, bufferSize), DeletedKeyCount: make(chan ShardMetricValue, bufferSize), + + WriteCount: make(chan ShardMetricValue, bufferSize), + PunchHoleCount: make(chan ShardMetricValue, bufferSize), }, instantMetrics: make(map[int]map[string]int64), @@ -203,6 +212,9 @@ func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *Metrics mc.instantMetrics[shardIdx]["BadCR32Count"] = 0 mc.instantMetrics[shardIdx]["BadKeyCount"] = 0 mc.instantMetrics[shardIdx]["DeletedKeyCount"] = 0 + + mc.instantMetrics[shardIdx]["WriteCount"] = 0 + mc.instantMetrics[shardIdx]["PunchHoleCount"] = 0 } return mc @@ -234,6 +246,9 @@ func (mc *MetricsCollector) Start() { go mc.collectShardMetric(mc.channels.BadCR32Count, "BadCR32Count") go mc.collectShardMetric(mc.channels.BadKeyCount, "BadKeyCount") go mc.collectShardMetric(mc.channels.DeletedKeyCount, "DeletedKeyCount") + + go mc.collectShardMetric(mc.channels.WriteCount, "WriteCount") + go mc.collectShardMetric(mc.channels.PunchHoleCount, "PunchHoleCount") } func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { @@ -398,6 +413,21 @@ func (mc *MetricsCollector) RecordDeletedKeyCount(shardIdx int, value int64) { } } +func (mc *MetricsCollector) RecordWriteCount(shardIdx int, value int64) { + select { + case mc.channels.WriteCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + +func (mc *MetricsCollector) RecordPunchHoleCount(shardIdx int, value int64) { + + select { + case mc.channels.PunchHoleCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: + default: + } +} + func (mc *MetricsCollector) GetMetrics() RunMetrics { mc.mu.RLock() defer mc.mu.RUnlock() @@ -432,6 +462,9 @@ func (mc *MetricsCollector) GetMetrics() RunMetrics { BadCR32Count: instants["BadCR32Count"], BadKeyCount: instants["BadKeyCount"], DeletedKeyCount: instants["DeletedKeyCount"], + + WriteCount: instants["WriteCount"], + PunchHoleCount: instants["PunchHoleCount"], } } } diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index d04da596..47b9c524 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -31,6 +31,9 @@ const ( TAG_VALUE_P50 = "p50" TAG_VALUE_P99 = "p99" TAG_SHARD_IDX = "shard_idx" + + KEY_WRITE_COUNT = "flashring_write_count" + KEY_PUNCH_HOLE_COUNT = "flashring_punch_hole_count" ) func RunStatsdLogger(metricsCollector *MetricsCollector) { @@ -56,6 +59,9 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { prevBadKeyCount := make(map[int]int64) prevDeletedKeyCount := make(map[int]int64) + prevWriteCount := make(map[int]int64) + prevPunchHoleCount := make(map[int]int64) + for { select { case <-metricsCollector.stopCh: @@ -102,6 +108,8 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { Count(KEY_BAD_CR32_COUNT, shard.BadCR32Count-prevBadCR32Count[idx], BuildTag(shardBuildTag)) Count(KEY_BAD_KEY_COUNT, shard.BadKeyCount-prevBadKeyCount[idx], BuildTag(shardBuildTag)) Count(KEY_DELETED_KEY_COUNT, shard.DeletedKeyCount-prevDeletedKeyCount[idx], BuildTag(shardBuildTag)) + Count(KEY_WRITE_COUNT, shard.WriteCount-prevWriteCount[idx], BuildTag(shardBuildTag)) + Count(KEY_PUNCH_HOLE_COUNT, shard.PunchHoleCount-prevPunchHoleCount[idx], BuildTag(shardBuildTag)) prevKeyNotFoundCount[idx] = shard.KeyNotFoundCount prevKeyExpiredCount[idx] = shard.KeyExpiredCount @@ -110,7 +118,8 @@ func RunStatsdLogger(metricsCollector *MetricsCollector) { prevBadCR32Count[idx] = shard.BadCR32Count prevBadKeyCount[idx] = shard.BadKeyCount prevDeletedKeyCount[idx] = shard.DeletedKeyCount - + prevWriteCount[idx] = shard.WriteCount + prevPunchHoleCount[idx] = shard.PunchHoleCount } } From 659d8e20748fd4f05b1c2715e771b4b72c1c4ca8 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 22 Jan 2026 22:00:19 +0000 Subject: [PATCH 17/53] fix filewrite error after first punch hole --- .../cmd/flashringtest/plan_readthrough_gausian.go | 10 +++++----- flashring/internal/indicesV3/delete_manager.go | 3 +++ flashring/internal/indicesV3/index.go | 3 +++ flashring/pkg/cache/cache.go | 6 +++++- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index a0d6f9c7..901dda12 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -26,7 +26,7 @@ func planReadthroughGaussian() { numShards int keysPerShard int memtableMB int - fileSizeMultiplier int + fileSizeMultiplier float64 readWorkers int writeWorkers int sampleSecs int @@ -39,9 +39,9 @@ func planReadthroughGaussian() { flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme/", "data directory for shard files") flag.IntVar(&numShards, "shards", 100, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 5_00_000, "keys per shard") - flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") - flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 1, "file size in GiB per shard") + flag.IntVar(&keysPerShard, "keys-per-shard", 2_00_000, "keys per shard") + flag.IntVar(&memtableMB, "memtable-mb", 2, "memtable size in MiB") + flag.Float64Var(&fileSizeMultiplier, "file-size-multiplier", 0.25, "file size in GiB per shard") flag.IntVar(&readWorkers, "readers", 8, "number of read workers") flag.IntVar(&writeWorkers, "writers", 8, "number of write workers") flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds") @@ -86,7 +86,7 @@ func planReadthroughGaussian() { } memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 - fileSizeInBytes := int64(fileSizeMultiplier) * 1024 * 1024 * 1024 // fileSizeMultiplier in GiB + fileSizeInBytes := int64(float64(fileSizeMultiplier) * 1024 * 1024 * 1024) // fileSizeMultiplier in GiB metricsConfig := metrics.MetricsCollectorConfig{ StatsEnabled: true, diff --git a/flashring/internal/indicesV3/delete_manager.go b/flashring/internal/indicesV3/delete_manager.go index 977bf3ee..839a0274 100644 --- a/flashring/internal/indicesV3/delete_manager.go +++ b/flashring/internal/indicesV3/delete_manager.go @@ -62,6 +62,9 @@ func (dm *DeleteManager) ExecuteDeleteIfNeeded() error { if trimNeeded || nextAddNeedsDelete { dm.deleteInProgress = true dm.deleteCount = int(dm.memtableData[dm.toBeDeletedMemId] / dm.deleteAmortizedStep) + if dm.deleteCount == 0 { + dm.deleteCount = int(dm.memtableData[dm.toBeDeletedMemId] % dm.deleteAmortizedStep) + } memIdAtHead, err := dm.keyIndex.PeekMemIdAtHead() if err != nil { return err diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index 29261585..95bc9c3e 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -96,6 +96,9 @@ func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq u } func (ix *Index) Delete(count int) (uint32, int) { + if count == 0 { + return 0, 0 + } for i := 0; i < count; i++ { deleted, deletedHashNextPrev, deletedIdx, next := ix.rb.Delete() if deleted == nil { diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index b8c5f444..943d2b62 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -349,7 +349,11 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro wc.shardLocks[shardIdx].Lock() defer wc.shardLocks[shardIdx].Unlock() - wc.shards[shardIdx].Put(key, value, exptimeInMinutes) + err := wc.shards[shardIdx].Put(key, value, exptimeInMinutes) + if err != nil { + log.Error().Err(err).Msgf("Put failed for key: %s", key) + return fmt.Errorf("put failed for key: %s", key) + } wc.stats[shardIdx].TotalPuts.Add(1) if h32%100 < 10 { wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries())) From 900845bb2715b74cb2cd21f7a566dd461fffac5f Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 07:21:59 +0000 Subject: [PATCH 18/53] console logger fixes --- .vscode/launch.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2decad3c..e9505d8c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -26,7 +26,7 @@ "mode": "debug", "program": "${workspaceFolder}/flashring/cmd/flashringtest", "env": { - "PLAN": "readthrough-batched" + "PLAN": "readthrough" } } From 7d279f5ed69962ed0eb9cfcbd1c3b3504a89d2de Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 07:22:11 +0000 Subject: [PATCH 19/53] console logger fixes --- .../flashringtest/plan_readthrough_gausian.go | 15 +++++----- .../internal/indicesV3/delete_manager.go | 4 +-- flashring/internal/indicesV3/index.go | 28 +++++++++---------- flashring/pkg/metrics/console_logger.go | 6 ++++ 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 901dda12..02dcbcb1 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -39,11 +39,11 @@ func planReadthroughGaussian() { flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme/", "data directory for shard files") flag.IntVar(&numShards, "shards", 100, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 2_00_000, "keys per shard") + flag.IntVar(&keysPerShard, "keys-per-shard", 3_00_000, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 2, "memtable size in MiB") flag.Float64Var(&fileSizeMultiplier, "file-size-multiplier", 0.25, "file size in GiB per shard") - flag.IntVar(&readWorkers, "readers", 8, "number of read workers") - flag.IntVar(&writeWorkers, "writers", 8, "number of write workers") + flag.IntVar(&readWorkers, "readers", 16, "number of read workers") + flag.IntVar(&writeWorkers, "writers", 16, "number of write workers") flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds") flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations") flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor") @@ -127,7 +127,7 @@ func planReadthroughGaussian() { missedKeyChanList[i] = make(chan int) } - totalKeys := keysPerShard * numShards + totalKeys := 30_000_000 str1kb := strings.Repeat("a", 1024) str1kb = "%d" + str1kb @@ -145,7 +145,7 @@ func planReadthroughGaussian() { key := fmt.Sprintf("key%d", k) val := []byte(fmt.Sprintf(str1kb, k)) if err := pc.Put(key, val, 60); err != nil { - panic(err) + log.Error().Err(err).Msgf("error putting key %s", key) } if k%5000000 == 0 { fmt.Printf("----------------------------------------------prepopulated %d keys\n", k) @@ -164,7 +164,7 @@ func planReadthroughGaussian() { key := fmt.Sprintf("key%d", mk) val := []byte(fmt.Sprintf(str1kb, mk)) if err := pc.Put(key, val, 60); err != nil { - panic(err) + log.Error().Err(err).Msgf("error putting key %s", key) } } }(w) @@ -189,7 +189,8 @@ func planReadthroughGaussian() { } if expired { - panic("key expired") + log.Error().Msgf("key %s expired", key) + // panic("key expired") } if found && string(val) != fmt.Sprintf(str1kb, randomval) { diff --git a/flashring/internal/indicesV3/delete_manager.go b/flashring/internal/indicesV3/delete_manager.go index 839a0274..887a16b7 100644 --- a/flashring/internal/indicesV3/delete_manager.go +++ b/flashring/internal/indicesV3/delete_manager.go @@ -73,9 +73,7 @@ func (dm *DeleteManager) ExecuteDeleteIfNeeded() error { return fmt.Errorf("memIdAtHead: %d, toBeDeletedMemId: %d", memIdAtHead, dm.toBeDeletedMemId) } - if trimNeeded { - dm.wrapFile.TrimHead() - } + dm.wrapFile.TrimHead() return nil } return nil diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index 95bc9c3e..abdd74b2 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -2,12 +2,10 @@ package indicesv2 import ( "errors" - "sync" "time" "github.com/Meesho/BharatMLStack/flashring/internal/maths" "github.com/cespare/xxhash/v2" - "github.com/rs/zerolog/log" "github.com/zeebo/xxh3" ) @@ -22,7 +20,7 @@ const ( ) type Index struct { - rm sync.Map + rm map[uint64]int rb *RingBuffer mc *maths.MorrisLogCounter startAt int64 @@ -35,7 +33,7 @@ func NewIndex(hashBits int, rbInitial, rbMax, deleteAmortizedStep int) *Index { } // rm := make(map[uint64]int) return &Index{ - rm: sync.Map{}, + rm: make(map[uint64]int), rb: NewRingBuffer(rbInitial, rbMax), mc: maths.New(12), startAt: time.Now().Unix(), @@ -52,15 +50,15 @@ func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint3 delta := uint16(expiryAt - (i.startAt / 60)) encode(key, length, delta, lastAccess, freq, memId, offset, entry) - if headIdx, ok := i.rm.Load(hlo); !ok { + if headIdx, ok := i.rm[hlo]; !ok { encodeHashNextPrev(hhi, hlo, -1, -1, hashNextPrev) - i.rm.Store(hlo, idx) + i.rm[hlo] = idx return } else { - _, headHashNextPrev, _ := i.rb.Get(int(headIdx.(int))) + _, headHashNextPrev, _ := i.rb.Get(int(headIdx)) encodeUpdatePrev(int32(idx), headHashNextPrev) - encodeHashNextPrev(hhi, hlo, -1, int32(headIdx.(int)), hashNextPrev) - i.rm.Store(hlo, idx) + encodeHashNextPrev(hhi, hlo, -1, int32(headIdx), hashNextPrev) + i.rm[hlo] = idx return } @@ -68,8 +66,8 @@ func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint3 func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq uint64, memId, offset uint32, status Status) { hhi, hlo := hash128(key) - if idx, ok := i.rm.Load(hlo); ok { - entry, hashNextPrev, _ := i.rb.Get(int(idx.(int))) + if idx, ok := i.rm[hlo]; ok { + entry, hashNextPrev, _ := i.rb.Get(int(idx)) for { if isHashMatch(hhi, hlo, hashNextPrev) { length, deltaExptime, lastAccess, freq, memId, offset := decode(entry) @@ -106,15 +104,15 @@ func (ix *Index) Delete(count int) (uint32, int) { } delMemId, _ := decodeMemIdOffset(deleted) deletedHlo := decodeHashLo(deletedHashNextPrev) - mapIdx, ok := ix.rm.Load(deletedHlo) - if ok && mapIdx.(int) == deletedIdx { - ix.rm.Delete(deletedHlo) + mapIdx, ok := ix.rm[deletedHlo] + if ok && mapIdx == deletedIdx { + delete(ix.rm, deletedHlo) } else if ok && hasPrev(deletedHashNextPrev) { prevIdx := decodePrev(deletedHashNextPrev) _, hashNextPrev, _ := ix.rb.Get(int(prevIdx)) encodeUpdateNext(-1, hashNextPrev) } else { - log.Warn().Msgf("broken link. Entry in RB but cannot be linked to map. deletedIdx: %d", deletedIdx) + //log.Warn().Msgf("broken link. Entry in RB but cannot be linked to map. deletedIdx: %d", deletedIdx) } nextMemId, _ := decodeMemIdOffset(next) diff --git a/flashring/pkg/metrics/console_logger.go b/flashring/pkg/metrics/console_logger.go index 5affea62..c3a90311 100644 --- a/flashring/pkg/metrics/console_logger.go +++ b/flashring/pkg/metrics/console_logger.go @@ -93,6 +93,8 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { badCR32Total := int64(0) badKeyTotal := int64(0) deletedKeyTotal := int64(0) + writeTotal := int64(0) + punchHoleTotal := int64(0) for _, shard := range currentMetrics.ShardIndexMetrics { keyNotFoundTotal += shard.KeyNotFoundCount @@ -102,6 +104,8 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { badCR32Total += shard.BadCR32Count badKeyTotal += shard.BadKeyCount deletedKeyTotal += shard.DeletedKeyCount + writeTotal += shard.WriteCount + punchHoleTotal += shard.PunchHoleCount } log.Info().Msgf("KeyNotFoundTotal: %v", keyNotFoundTotal) @@ -111,6 +115,8 @@ func RunConsoleLogger(metricsCollector *MetricsCollector) { log.Info().Msgf("BadCR32Total: %v", badCR32Total) log.Info().Msgf("BadKeyTotal: %v", badKeyTotal) log.Info().Msgf("DeletedKeyTotal: %v", deletedKeyTotal) + log.Info().Msgf("WriteTotal: %v", writeTotal) + log.Info().Msgf("PunchHoleTotal: %v", punchHoleTotal) // Debug: Log cumulative totals to understand the issue log.Info().Msgf("DEBUG - GetsTotal: %v, HitsTotal: %v, PutsTotal: %v, ActiveEntriesTotal: %v", getsTotal, hitsTotal, putsTotal, activeEntriesTotal) From e92786dbb02a6b970704eafa38738a71bb32bc3f Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 07:30:33 +0000 Subject: [PATCH 20/53] add direct statsd metrics for latencies --- flashring/pkg/cache/cache.go | 2 ++ flashring/pkg/metrics/statsd_logger.go | 24 +++++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 943d2b62..dec3706c 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -344,6 +344,7 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro start := time.Now() defer func() { wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start)) + metrics.Timing(metrics.KEY_WRITE_LATENCY, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) }() wc.shardLocks[shardIdx].Lock() @@ -369,6 +370,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { start := time.Now() defer func() { wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) + metrics.Timing(metrics.KEY_READ_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) }() var keyFound bool diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 47b9c524..c9dad5ac 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -6,17 +6,19 @@ import ( ) const ( - KEY_READ_LATENCY = "flashringread_latency" - KEY_WRITE_LATENCY = "flashringwrite_latency" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" - KEY_ACTIVE_ENTRIES = "flashring_active_entries" - KEY_EXPIRED_ENTRIES = "flashring_expired_entries" - KEY_REWRITES = "flashring_rewrites" - KEY_GETS = "flashring_gets" - KEY_PUTS = "flashring_puts" - KEY_HITS = "flashring_hits" + KEY_READ_LATENCY = "flashringread_latency" + KEY_READ_LATENCY_STATSD = "flashringread_latency_statsd" + KEY_WRITE_LATENCY = "flashringwrite_latency" + KEY_WRITE_LATENCY_STATSD = "flashringwrite_latency_statsd" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" + KEY_EXPIRED_ENTRIES = "flashring_expired_entries" + KEY_REWRITES = "flashring_rewrites" + KEY_GETS = "flashring_gets" + KEY_PUTS = "flashring_puts" + KEY_HITS = "flashring_hits" KEY_KEY_NOT_FOUND_COUNT = "flashring_key_not_found_count" KEY_KEY_EXPIRED_COUNT = "flashring_key_expired_count" From 1ce67d13d2f9e8a08649ae6934a02980b914a250 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 08:39:11 +0000 Subject: [PATCH 21/53] direct statsD metric for write latency --- flashring/pkg/cache/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index dec3706c..a0c2def9 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -344,7 +344,7 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro start := time.Now() defer func() { wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start)) - metrics.Timing(metrics.KEY_WRITE_LATENCY, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Timing(metrics.KEY_WRITE_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) }() wc.shardLocks[shardIdx].Lock() From 903165dbec6563ccd691be3e0bfb23974c366f16 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 11:01:55 +0000 Subject: [PATCH 22/53] try lockless --- flashring/cmd/flashringtest/plan_lockless.go | 10 ++++---- flashring/pkg/cache/cache.go | 24 +++++++++++--------- flashring/pkg/metrics/metric.go | 2 +- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index 1164602a..e52482f7 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -38,11 +38,11 @@ func planLockless() { flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme", "data directory for shard files") flag.IntVar(&numShards, "shards", 100, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 50_00_00, "keys per shard") - flag.IntVar(&memtableMB, "memtable-mb", 16, "memtable size in MiB") + flag.IntVar(&keysPerShard, "keys-per-shard", 3_00_000, "keys per shard") + flag.IntVar(&memtableMB, "memtable-mb", 2, "memtable size in MiB") flag.IntVar(&fileSizeMultiplier, "file-size-multiplier", 2, "file size in GiB per shard") - flag.IntVar(&readWorkers, "readers", 8, "number of read workers") - flag.IntVar(&writeWorkers, "writers", 8, "number of write workers") + flag.IntVar(&readWorkers, "readers", 16, "number of read workers") + flag.IntVar(&writeWorkers, "writers", 16, "number of write workers") flag.IntVar(&sampleSecs, "sample-secs", 30, "predictor sampling window in seconds") flag.Int64Var(&iterations, "iterations", 100_000_000, "number of iterations") flag.Float64Var(&aVal, "a", 0.4, "a value for the predictor") @@ -127,7 +127,7 @@ func planLockless() { missedKeyChanList[i] = make(chan int) } - totalKeys := keysPerShard * numShards + totalKeys := 30_000_000 str1kb := strings.Repeat("a", 1024) str1kb = "%d" + str1kb diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index a0c2def9..2be29e71 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -289,6 +289,7 @@ func (wc *WrapCache) PutLL(key string, value []byte, exptimeInMinutes uint16) er filecache.ErrorPool.Put(result) wc.stats[shardIdx].TotalPuts.Add(1) wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start)) + metrics.Timing(metrics.KEY_WRITE_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) return op } @@ -298,19 +299,19 @@ func (wc *WrapCache) GetLL(key string) ([]byte, bool, bool) { start := time.Now() - found, value, _, expired, needsSlowPath := wc.shards[shardIdx].GetFastPath(key) + // found, value, _, expired, needsSlowPath := wc.shards[shardIdx].GetFastPath(key) - if !needsSlowPath { - if found && !expired { - wc.stats[shardIdx].Hits.Add(1) - } else if expired { - wc.stats[shardIdx].Expired.Add(1) - } + // if !needsSlowPath { + // if found && !expired { + // wc.stats[shardIdx].Hits.Add(1) + // } else if expired { + // wc.stats[shardIdx].Expired.Add(1) + // } - wc.stats[shardIdx].TotalGets.Add(1) - wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) - return value, found, expired - } + // wc.stats[shardIdx].TotalGets.Add(1) + // wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) + // return value, found, expired + // } result := filecache.ReadResultPool.Get().(chan filecache.ReadResultV2) @@ -331,6 +332,7 @@ func (wc *WrapCache) GetLL(key string) ([]byte, bool, bool) { wc.stats[shardIdx].Expired.Add(1) } wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) + metrics.Timing(metrics.KEY_READ_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) wc.stats[shardIdx].TotalGets.Add(1) return op.Data, op.Found, op.Expired diff --git a/flashring/pkg/metrics/metric.go b/flashring/pkg/metrics/metric.go index 495514e2..db127882 100644 --- a/flashring/pkg/metrics/metric.go +++ b/flashring/pkg/metrics/metric.go @@ -24,7 +24,7 @@ var ( // it is safe to use one client from multiple goroutines simultaneously statsDClient = getDefaultClient() // by default full sampling (1.0 = 100%) - samplingRate = 1.0 + samplingRate = 0.1 telegrafAddress = "localhost:8125" appName = "" initialized = false From b0a8e47df572cca5a4112e14df7be82ec30b3a4b Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 10 Feb 2026 14:55:10 +0000 Subject: [PATCH 23/53] return error on trim needed --- flashring/internal/indicesV3/delete_manager.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flashring/internal/indicesV3/delete_manager.go b/flashring/internal/indicesV3/delete_manager.go index 887a16b7..c6e632db 100644 --- a/flashring/internal/indicesV3/delete_manager.go +++ b/flashring/internal/indicesV3/delete_manager.go @@ -1,6 +1,7 @@ package indicesv2 import ( + "errors" "fmt" "github.com/Meesho/BharatMLStack/flashring/internal/fs" @@ -74,7 +75,7 @@ func (dm *DeleteManager) ExecuteDeleteIfNeeded() error { } dm.wrapFile.TrimHead() - return nil + return errors.New("trim needed retry this write") } return nil } From f16d4a667c99db249cc356449f8324c736002913 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 06:37:20 +0000 Subject: [PATCH 24/53] add pread and pwrite latencies --- .../cmd/flashringtest/plan_readthrough_gausian.go | 4 ++-- flashring/internal/fs/wrap_file.go | 10 ++++++++++ flashring/pkg/metrics/statsd_logger.go | 4 ++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 02dcbcb1..0eac284b 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -38,8 +38,8 @@ func planReadthroughGaussian() { ) flag.StringVar(&mountPoint, "mount", "/mnt/disks/nvme/", "data directory for shard files") - flag.IntVar(&numShards, "shards", 100, "number of shards") - flag.IntVar(&keysPerShard, "keys-per-shard", 3_00_000, "keys per shard") + flag.IntVar(&numShards, "shards", 50, "number of shards") + flag.IntVar(&keysPerShard, "keys-per-shard", 6_00_000, "keys per shard") flag.IntVar(&memtableMB, "memtable-mb", 2, "memtable size in MiB") flag.Float64Var(&fileSizeMultiplier, "file-size-multiplier", 0.25, "file size in GiB per shard") flag.IntVar(&readWorkers, "readers", 16, "number of read workers") diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index fc91e006..24e68670 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -6,7 +6,9 @@ package fs import ( "os" "syscall" + "time" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "golang.org/x/sys/unix" ) @@ -72,10 +74,13 @@ func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err er return 0, ErrBufNoAlign } } + startTime := time.Now() n, err := syscall.Pwrite(r.WriteFd, buf, r.PhysicalWriteOffset) + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) if err != nil { return 0, err } + r.PhysicalWriteOffset += int64(n) if r.PhysicalWriteOffset >= r.MaxFileSize { r.wrapped = true @@ -126,7 +131,9 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { return 0, ErrFileOffsetOutOfRange } + startTime := time.Now() n, err := syscall.Pread(r.ReadFd, buf, fileOffset) + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) // flags := unix.RWF_HIPRI // optionally: | unix.RWF_NOWAIT // n, err := preadv2(r.ReadFd, buf, fileOffset, flags) if err != nil { @@ -137,6 +144,8 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { } func (r *WrapAppendFile) TrimHead() (err error) { + + startTime := time.Now() if r.WriteDirectIO { if !isAlignedOffset(r.PhysicalStartOffset, r.blockSize) { return ErrOffsetNotAligned @@ -151,6 +160,7 @@ func (r *WrapAppendFile) TrimHead() (err error) { r.PhysicalStartOffset = 0 } r.Stat.PunchHoleCount++ + metrics.Timing(metrics.KEY_TRIM_HEAD_LATENCY, time.Since(startTime), []string{}) return nil } diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index c9dad5ac..22936ee1 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -36,6 +36,10 @@ const ( KEY_WRITE_COUNT = "flashring_write_count" KEY_PUNCH_HOLE_COUNT = "flashring_punch_hole_count" + + KEY_TRIM_HEAD_LATENCY = "flashring_wrap_file_trim_head_latency" + KEY_PREAD_LATENCY = "flashring_pread_latency" + KEY_PWRITE_LATENCY = "flashring_pwrite_latency" ) func RunStatsdLogger(metricsCollector *MetricsCollector) { From dcac7f9d7f16032592d8def9b1529cdb68df86a1 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 07:42:25 +0000 Subject: [PATCH 25/53] remove dsync from pwrite --- flashring/internal/fs/fs.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flashring/internal/fs/fs.go b/flashring/internal/fs/fs.go index 186e524e..822d9f14 100644 --- a/flashring/internal/fs/fs.go +++ b/flashring/internal/fs/fs.go @@ -82,11 +82,13 @@ func createAppendOnlyWriteFileDescriptor(filename string) (int, *os.File, bool, } func createPreAllocatedWriteFileDescriptor(filename string, maxFileSize int64) (int, *os.File, bool, error) { - flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC + // flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC + flags := O_DIRECT | O_WRONLY | O_CREAT fd, err := syscall.Open(filename, flags, FILE_MODE) if err != nil { log.Warn().Msgf("DIRECT_IO not supported, falling back to regular flags: %v", err) - flags = O_WRONLY | O_CREAT | O_DSYNC + // flags = O_WRONLY | O_CREAT | O_DSYNC + flags = O_WRONLY | O_CREAT fd, err = syscall.Open(filename, flags, FILE_MODE) if err != nil { return 0, nil, false, err From 0b27e2424468c1a39385fab9a1f3dbaf48a48c3c Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 09:04:30 +0000 Subject: [PATCH 26/53] remove o-direct from write path --- flashring/internal/fs/fs.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/internal/fs/fs.go b/flashring/internal/fs/fs.go index 822d9f14..fb074bc3 100644 --- a/flashring/internal/fs/fs.go +++ b/flashring/internal/fs/fs.go @@ -83,7 +83,7 @@ func createAppendOnlyWriteFileDescriptor(filename string) (int, *os.File, bool, func createPreAllocatedWriteFileDescriptor(filename string, maxFileSize int64) (int, *os.File, bool, error) { // flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC - flags := O_DIRECT | O_WRONLY | O_CREAT + flags := O_WRONLY | O_CREAT fd, err := syscall.Open(filename, flags, FILE_MODE) if err != nil { log.Warn().Msgf("DIRECT_IO not supported, falling back to regular flags: %v", err) From aeeff968ad6fd09a2ae1cc8104e8b96e3076b9e6 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 09:36:46 +0000 Subject: [PATCH 27/53] error read if more than 1ms --- flashring/internal/fs/fs.go | 7 +++---- flashring/internal/fs/wrap_file.go | 9 +++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/flashring/internal/fs/fs.go b/flashring/internal/fs/fs.go index fb074bc3..b69be0a4 100644 --- a/flashring/internal/fs/fs.go +++ b/flashring/internal/fs/fs.go @@ -32,6 +32,7 @@ var ( ErrFileSizeExceeded = errors.New("file size exceeded. Please punch hole") ErrFileOffsetOutOfRange = errors.New("file offset is out of range") ErrOffsetNotAligned = errors.New("offset is not aligned to block size") + ErrReadTimeout = errors.New("read timeout") ) type Stat struct { @@ -82,13 +83,11 @@ func createAppendOnlyWriteFileDescriptor(filename string) (int, *os.File, bool, } func createPreAllocatedWriteFileDescriptor(filename string, maxFileSize int64) (int, *os.File, bool, error) { - // flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC - flags := O_WRONLY | O_CREAT + flags := O_DIRECT | O_WRONLY | O_CREAT | O_DSYNC fd, err := syscall.Open(filename, flags, FILE_MODE) if err != nil { log.Warn().Msgf("DIRECT_IO not supported, falling back to regular flags: %v", err) - // flags = O_WRONLY | O_CREAT | O_DSYNC - flags = O_WRONLY | O_CREAT + flags = O_WRONLY | O_CREAT | O_DSYNC fd, err = syscall.Open(filename, flags, FILE_MODE) if err != nil { return 0, nil, false, err diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 24e68670..8d626b39 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -108,6 +108,15 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { } } + //return if the pread command doesnt complete in 1ms + timeoutTicker := time.NewTicker(1 * time.Millisecond) + defer timeoutTicker.Stop() + timeoutChan := timeoutTicker.C + select { + case <-timeoutChan: + return 0, ErrReadTimeout + default: + } // Validate read window depending on wrap state readEnd := fileOffset + int64(len(buf)) valid := false From aeb8bd33dec29f1516c8d90c7988f5169369578c Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 13:21:59 +0000 Subject: [PATCH 28/53] revert ticker --- flashring/internal/fs/wrap_file.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 8d626b39..24e68670 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -108,15 +108,6 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { } } - //return if the pread command doesnt complete in 1ms - timeoutTicker := time.NewTicker(1 * time.Millisecond) - defer timeoutTicker.Stop() - timeoutChan := timeoutTicker.C - select { - case <-timeoutChan: - return 0, ErrReadTimeout - default: - } // Validate read window depending on wrap state readEnd := fileOffset + int64(len(buf)) valid := false From cbc6d3d3f6e9e59b2fc7fb8448d955e8699076b4 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 13:24:00 +0000 Subject: [PATCH 29/53] add memtable chunking on flush --- flashring/internal/memtables/memtable.go | 29 ++++++++++++++++++------ flashring/pkg/metrics/statsd_logger.go | 2 ++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index bc92f0ff..19f4a9b9 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -2,9 +2,11 @@ package memtables import ( "errors" + "runtime" + "strconv" "github.com/Meesho/BharatMLStack/flashring/internal/fs" - "github.com/rs/zerolog/log" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) var ( @@ -98,15 +100,28 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { if !m.readyForFlush { return 0, 0, ErrMemtableNotReadyForFlush } - fileOffset, err = m.file.Pwrite(m.page.Buf) - if err != nil { - return 0, 0, err - } else { - log.Debug().Msgf("Flushed memtable %d to file %d", m.Id, fileOffset) + + chunkSize := 32 * fs.BLOCK_SIZE + totalWritten := 0 + + for totalWritten < len(m.page.Buf) { + metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, 1, []string{"memtable_id", strconv.Itoa(int(m.Id))}) + chunk := m.page.Buf[totalWritten : totalWritten+chunkSize] + + if err != nil { + return 0, 0, err + } + totalWritten += chunkSize + fileOffset, err = m.file.Pwrite(chunk) + if err != nil { + return 0, 0, err + } + + runtime.Gosched() } m.currentOffset = 0 m.readyForFlush = false - return len(m.page.Buf), fileOffset, nil + return totalWritten, fileOffset, nil } func (m *Memtable) Discard() { diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 22936ee1..61623f28 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -40,6 +40,8 @@ const ( KEY_TRIM_HEAD_LATENCY = "flashring_wrap_file_trim_head_latency" KEY_PREAD_LATENCY = "flashring_pread_latency" KEY_PWRITE_LATENCY = "flashring_pwrite_latency" + + KEY_MEMTABLE_FLUSH_COUNT = "flashring_memtable_flush_count" ) func RunStatsdLogger(metricsCollector *MetricsCollector) { From e3abf42f63b181a19a06875d1eb8c83d2a4ea08f Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 14:37:23 +0000 Subject: [PATCH 30/53] chunk size smaller --- flashring/internal/memtables/memtable.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index 19f4a9b9..d7d99a32 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -101,7 +101,7 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { return 0, 0, ErrMemtableNotReadyForFlush } - chunkSize := 32 * fs.BLOCK_SIZE + chunkSize := 8 * fs.BLOCK_SIZE totalWritten := 0 for totalWritten < len(m.page.Buf) { From 710c80e5556808402c32f0b0eb7328ecd4dccf01 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Wed, 11 Feb 2026 15:09:06 +0000 Subject: [PATCH 31/53] stats time wasted in lock --- flashring/pkg/cache/cache.go | 4 ++++ flashring/pkg/metrics/statsd_logger.go | 3 +++ 2 files changed, 7 insertions(+) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 2be29e71..bb96e5a4 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -349,7 +349,9 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro metrics.Timing(metrics.KEY_WRITE_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) }() + start = time.Now() wc.shardLocks[shardIdx].Lock() + metrics.Timing(metrics.LATENCY_WLOCK, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) defer wc.shardLocks[shardIdx].Unlock() err := wc.shards[shardIdx].Put(key, value, exptimeInMinutes) @@ -396,7 +398,9 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { } else { func(key string, shardIdx uint32) { + start := time.Now() wc.shardLocks[shardIdx].RLock() + metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) defer wc.shardLocks[shardIdx].RUnlock() keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 61623f28..2bf0d1fe 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -42,6 +42,9 @@ const ( KEY_PWRITE_LATENCY = "flashring_pwrite_latency" KEY_MEMTABLE_FLUSH_COUNT = "flashring_memtable_flush_count" + + LATENCY_RLOCK = "flashring_rlock_latency" + LATENCY_WLOCK = "flashring_wlock_latency" ) func RunStatsdLogger(metricsCollector *MetricsCollector) { From 8fb761fa54f1c737f3e1ea1050063e73c4c99962 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 05:17:14 +0000 Subject: [PATCH 32/53] simplify metrics --- flashring/internal/memtables/memtable.go | 3 +-- flashring/pkg/cache/cache.go | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index d7d99a32..06345053 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -3,7 +3,6 @@ package memtables import ( "errors" "runtime" - "strconv" "github.com/Meesho/BharatMLStack/flashring/internal/fs" "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" @@ -105,7 +104,7 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { totalWritten := 0 for totalWritten < len(m.page.Buf) { - metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, 1, []string{"memtable_id", strconv.Itoa(int(m.Id))}) + metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, 1, []string{}) chunk := m.page.Buf[totalWritten : totalWritten+chunkSize] if err != nil { diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index bb96e5a4..9566ebcd 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -351,7 +351,7 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro start = time.Now() wc.shardLocks[shardIdx].Lock() - metrics.Timing(metrics.LATENCY_WLOCK, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Timing(metrics.LATENCY_WLOCK, time.Since(start), []string{}) defer wc.shardLocks[shardIdx].Unlock() err := wc.shards[shardIdx].Put(key, value, exptimeInMinutes) @@ -400,7 +400,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { func(key string, shardIdx uint32) { start := time.Now() wc.shardLocks[shardIdx].RLock() - metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), []string{}) defer wc.shardLocks[shardIdx].RUnlock() keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) From f1d3b26d0d1bd2f336dfb4dd8230f80c0b43f7ba Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 06:52:25 +0000 Subject: [PATCH 33/53] implement iouring --- flashring/go.mod | 2 +- flashring/go.sum | 26 +- flashring/internal/fs/iouring.go | 499 +++++++++++++++++++++++ flashring/internal/fs/iouring_test.go | 103 +++++ flashring/internal/fs/iouring_wrapper.go | 40 ++ flashring/internal/fs/wrap_file.go | 47 +++ flashring/internal/shard/shard_cache.go | 46 ++- 7 files changed, 755 insertions(+), 8 deletions(-) create mode 100644 flashring/internal/fs/iouring.go create mode 100644 flashring/internal/fs/iouring_test.go create mode 100644 flashring/internal/fs/iouring_wrapper.go diff --git a/flashring/go.mod b/flashring/go.mod index 288dd765..206adab3 100644 --- a/flashring/go.mod +++ b/flashring/go.mod @@ -29,7 +29,7 @@ require ( require ( github.com/DataDog/datadog-go/v5 v5.8.2 - github.com/dgraph-io/badger/v4 v4.9.0 // indirect + github.com/dgraph-io/badger/v4 v4.9.0 github.com/dgraph-io/ristretto/v2 v2.2.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/go-logr/logr v1.4.3 // indirect diff --git a/flashring/go.sum b/flashring/go.sum index 701011b5..5d69f8d2 100644 --- a/flashring/go.sum +++ b/flashring/go.sum @@ -9,13 +9,18 @@ github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/ github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger/v4 v4.9.0 h1:tpqWb0NewSrCYqTvywbcXOhQdWcqephkVkbBmaaqHzc= github.com/dgraph-io/badger/v4 v4.9.0/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= +github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -29,25 +34,30 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= -github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= @@ -66,11 +76,14 @@ github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= @@ -108,8 +121,6 @@ golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= -golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -126,5 +137,8 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/flashring/internal/fs/iouring.go b/flashring/internal/fs/iouring.go new file mode 100644 index 00000000..4a72439b --- /dev/null +++ b/flashring/internal/fs/iouring.go @@ -0,0 +1,499 @@ +//go:build linux +// +build linux + +// Package fs provides a minimal io_uring implementation using raw syscalls. +// No external dependencies beyond golang.org/x/sys/unix are needed. +// Compatible with Go 1.24+ (no go:linkname usage). +package fs + +import ( + "fmt" + "sync" + "sync/atomic" + "syscall" + "unsafe" + + "golang.org/x/sys/unix" +) + +// ----------------------------------------------------------------------- +// io_uring syscall numbers (amd64) +// ----------------------------------------------------------------------- + +const ( + sysIOUringSetup = 425 + sysIOUringEnter = 426 + sysIOUringRegister = 427 +) + +// ----------------------------------------------------------------------- +// io_uring constants +// ----------------------------------------------------------------------- + +const ( + // Setup flags + iouringSetupSQPoll = 1 << 1 + + // Enter flags + iouringEnterGetEvents = 1 << 0 + iouringEnterSQWakeup = 1 << 1 + + // SQ flags (read from kernel-shared memory) + iouringSQNeedWakeup = 1 << 0 + + // Opcodes + iouringOpNop = 0 + iouringOpRead = 22 + iouringOpWrite = 23 + + // offsets for mmap + iouringOffSQRing = 0 + iouringOffCQRing = 0x8000000 + iouringOffSQEs = 0x10000000 +) + +// ----------------------------------------------------------------------- +// io_uring kernel structures (must match kernel ABI exactly) +// ----------------------------------------------------------------------- + +// ioUringSqe is the 64-byte submission queue entry. +type ioUringSqe struct { + Opcode uint8 + Flags uint8 + IoPrio uint16 + Fd int32 + Off uint64 // union: off / addr2 + Addr uint64 // union: addr / splice_off_in + Len uint32 + OpFlags uint32 // union: rw_flags, etc. + UserData uint64 + BufIndex uint16 // union: buf_index / buf_group + _ uint16 // personality + _ int32 // splice_fd_in / file_index + _ uint64 // addr3 + _ uint64 // __pad2[0] +} + +// ioUringCqe is the 16-byte completion queue entry. +type ioUringCqe struct { + UserData uint64 + Res int32 + Flags uint32 +} + +// ioUringParams is passed to io_uring_setup. +type ioUringParams struct { + SqEntries uint32 + CqEntries uint32 + Flags uint32 + SqThreadCPU uint32 + SqThreadIdle uint32 + Features uint32 + WqFd uint32 + Resv [3]uint32 + SqOff ioUringSqringOffsets + CqOff ioUringCqringOffsets +} + +type ioUringSqringOffsets struct { + Head uint32 + Tail uint32 + RingMask uint32 + RingEntries uint32 + Flags uint32 + Dropped uint32 + Array uint32 + Resv1 uint32 + Resv2 uint64 +} + +type ioUringCqringOffsets struct { + Head uint32 + Tail uint32 + RingMask uint32 + RingEntries uint32 + Overflow uint32 + Cqes uint32 + Flags uint32 + Resv1 uint32 + Resv2 uint64 +} + +// ----------------------------------------------------------------------- +// IoUring is the main ring handle +// ----------------------------------------------------------------------- + +// IoUring wraps a single io_uring instance with SQ/CQ ring mappings. +type IoUring struct { + fd int + + // SQ ring mapped memory + sqRingPtr []byte + sqMask uint32 + sqEntries uint32 + sqHead *uint32 // kernel-updated + sqTail *uint32 // user-updated + sqFlags *uint32 // kernel-updated (NEED_WAKEUP etc.) + sqArray unsafe.Pointer + sqeTail uint32 // local tracking of next SQE slot + sqeHead uint32 // local tracking of submitted SQEs + sqesMmap []byte + sqesBase unsafe.Pointer // base pointer to SQE array + sqRingSz int + cqRingSz int + sqesSz int + singleMmap bool + + // CQ ring mapped memory + cqRingPtr []byte + cqMask uint32 + cqEntries uint32 + cqHead *uint32 // user-updated + cqTail *uint32 // kernel-updated + cqesBase unsafe.Pointer + + // Setup flags + flags uint32 + + // Mutex for concurrent SQE submission from multiple goroutines + mu sync.Mutex + + // Diagnostic counter -- limits debug output to first N failures + debugCount int +} + +// NewIoUring creates a new io_uring instance with the given queue depth. +// flags can be 0 for normal mode. +func NewIoUring(entries uint32, flags uint32) (*IoUring, error) { + var params ioUringParams + params.Flags = flags + + fd, _, errno := syscall.Syscall(sysIOUringSetup, uintptr(entries), uintptr(unsafe.Pointer(¶ms)), 0) + if errno != 0 { + return nil, fmt.Errorf("io_uring_setup failed: %w", errno) + } + + ring := &IoUring{ + fd: int(fd), + flags: params.Flags, + } + + if err := ring.mapRings(¶ms); err != nil { + syscall.Close(ring.fd) + return nil, err + } + + return ring, nil +} + +func (r *IoUring) mapRings(p *ioUringParams) error { + sqOff := &p.SqOff + cqOff := &p.CqOff + + // Calculate SQ ring size + r.sqRingSz = int(sqOff.Array + p.SqEntries*4) // Array + entries*sizeof(uint32) + + // Calculate CQ ring size + r.cqRingSz = int(cqOff.Cqes + p.CqEntries*uint32(unsafe.Sizeof(ioUringCqe{}))) + + // Check if kernel supports single mmap for both rings + r.singleMmap = (p.Features & 1) != 0 // IORING_FEAT_SINGLE_MMAP = 1 + if r.singleMmap { + if r.cqRingSz > r.sqRingSz { + r.sqRingSz = r.cqRingSz + } + } + + // Map SQ ring + var err error + r.sqRingPtr, err = unix.Mmap(r.fd, iouringOffSQRing, r.sqRingSz, + unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED|unix.MAP_POPULATE) + if err != nil { + return fmt.Errorf("mmap SQ ring: %w", err) + } + + // Map CQ ring (same or separate mapping) + if r.singleMmap { + r.cqRingPtr = r.sqRingPtr + } else { + r.cqRingPtr, err = unix.Mmap(r.fd, iouringOffCQRing, r.cqRingSz, + unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED|unix.MAP_POPULATE) + if err != nil { + unix.Munmap(r.sqRingPtr) + return fmt.Errorf("mmap CQ ring: %w", err) + } + } + + // Map SQE array + r.sqesSz = int(p.SqEntries) * int(unsafe.Sizeof(ioUringSqe{})) + r.sqesMmap, err = unix.Mmap(r.fd, iouringOffSQEs, r.sqesSz, + unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED|unix.MAP_POPULATE) + if err != nil { + unix.Munmap(r.sqRingPtr) + if !r.singleMmap { + unix.Munmap(r.cqRingPtr) + } + return fmt.Errorf("mmap SQEs: %w", err) + } + r.sqesBase = unsafe.Pointer(&r.sqesMmap[0]) + + // Set up SQ ring pointers + sqBase := unsafe.Pointer(&r.sqRingPtr[0]) + r.sqHead = (*uint32)(unsafe.Add(sqBase, sqOff.Head)) + r.sqTail = (*uint32)(unsafe.Add(sqBase, sqOff.Tail)) + r.sqFlags = (*uint32)(unsafe.Add(sqBase, sqOff.Flags)) + r.sqMask = *(*uint32)(unsafe.Add(sqBase, sqOff.RingMask)) + r.sqEntries = *(*uint32)(unsafe.Add(sqBase, sqOff.RingEntries)) + r.sqArray = unsafe.Add(sqBase, sqOff.Array) + + // Set up CQ ring pointers + cqBase := unsafe.Pointer(&r.cqRingPtr[0]) + r.cqHead = (*uint32)(unsafe.Add(cqBase, cqOff.Head)) + r.cqTail = (*uint32)(unsafe.Add(cqBase, cqOff.Tail)) + r.cqMask = *(*uint32)(unsafe.Add(cqBase, cqOff.RingMask)) + r.cqEntries = *(*uint32)(unsafe.Add(cqBase, cqOff.RingEntries)) + r.cqesBase = unsafe.Add(cqBase, cqOff.Cqes) + + return nil +} + +// Close releases all resources associated with the ring. +func (r *IoUring) Close() { + unix.Munmap(r.sqesMmap) + unix.Munmap(r.sqRingPtr) + if !r.singleMmap { + unix.Munmap(r.cqRingPtr) + } + syscall.Close(r.fd) +} + +// ----------------------------------------------------------------------- +// SQE helpers +// ----------------------------------------------------------------------- + +func (r *IoUring) getSqeAt(idx uint32) *ioUringSqe { + return (*ioUringSqe)(unsafe.Add(r.sqesBase, uintptr(idx)*unsafe.Sizeof(ioUringSqe{}))) +} + +func (r *IoUring) getCqeAt(idx uint32) *ioUringCqe { + return (*ioUringCqe)(unsafe.Add(r.cqesBase, uintptr(idx)*unsafe.Sizeof(ioUringCqe{}))) +} + +func (r *IoUring) sqArrayAt(idx uint32) *uint32 { + return (*uint32)(unsafe.Add(r.sqArray, uintptr(idx)*4)) +} + +// getSqe returns the next available SQE, or nil if the SQ is full. +func (r *IoUring) getSqe() *ioUringSqe { + head := atomic.LoadUint32(r.sqHead) + next := r.sqeTail + 1 + if next-head > r.sqEntries { + return nil // SQ full + } + sqe := r.getSqeAt(r.sqeTail & r.sqMask) + r.sqeTail++ + // Zero out the SQE + *sqe = ioUringSqe{} + return sqe +} + +// flushSq flushes locally queued SQEs into the kernel-visible SQ ring. +func (r *IoUring) flushSq() uint32 { + tail := *r.sqTail + toSubmit := r.sqeTail - r.sqeHead + if toSubmit == 0 { + return tail - atomic.LoadUint32(r.sqHead) + } + for ; toSubmit > 0; toSubmit-- { + *r.sqArrayAt(tail & r.sqMask) = r.sqeHead & r.sqMask + tail++ + r.sqeHead++ + } + atomic.StoreUint32(r.sqTail, tail) + return tail - atomic.LoadUint32(r.sqHead) +} + +// ----------------------------------------------------------------------- +// Submission and completion +// ----------------------------------------------------------------------- + +func ioUringEnter(fd int, toSubmit, minComplete, flags uint32) (int, error) { + ret, _, errno := syscall.Syscall6(sysIOUringEnter, + uintptr(fd), uintptr(toSubmit), uintptr(minComplete), uintptr(flags), 0, 0) + if errno != 0 { + return int(ret), errno + } + return int(ret), nil +} + +// submit flushes SQEs and calls io_uring_enter if needed. +func (r *IoUring) submit(waitNr uint32) (int, error) { + submitted := r.flushSq() + var flags uint32 = 0 + + // If not using SQPOLL, we always need to enter + if r.flags&iouringSetupSQPoll == 0 { + if waitNr > 0 { + flags |= iouringEnterGetEvents + } + return ioUringEnter(r.fd, submitted, waitNr, flags) + } + + // SQPOLL: only enter if kernel thread needs wakeup + if atomic.LoadUint32(r.sqFlags)&iouringSQNeedWakeup != 0 { + flags |= iouringEnterSQWakeup + } + if waitNr > 0 { + flags |= iouringEnterGetEvents + } + if flags != 0 { + return ioUringEnter(r.fd, submitted, waitNr, flags) + } + return int(submitted), nil +} + +// waitCqe waits for at least one CQE to be available and returns it. +// The caller MUST call SeenCqe after processing. +func (r *IoUring) waitCqe() (*ioUringCqe, error) { + for { + head := atomic.LoadUint32(r.cqHead) + tail := atomic.LoadUint32(r.cqTail) + if head != tail { + cqe := r.getCqeAt(head & r.cqMask) + return cqe, nil + } + // No CQE available, ask the kernel + _, err := ioUringEnter(r.fd, 0, 1, iouringEnterGetEvents) + if err != nil { + return nil, err + } + } +} + +// seenCqe advances the CQ head by 1, releasing the CQE slot. +func (r *IoUring) seenCqe() { + atomic.StoreUint32(r.cqHead, atomic.LoadUint32(r.cqHead)+1) +} + +// ----------------------------------------------------------------------- +// PrepRead / PrepWrite helpers +// ----------------------------------------------------------------------- + +func prepRead(sqe *ioUringSqe, fd int, buf []byte, offset uint64) { + sqe.Opcode = iouringOpRead + sqe.Fd = int32(fd) + sqe.Addr = uint64(uintptr(unsafe.Pointer(&buf[0]))) + sqe.Len = uint32(len(buf)) + sqe.Off = offset +} + +func prepWrite(sqe *ioUringSqe, fd int, buf []byte, offset uint64) { + sqe.Opcode = iouringOpWrite + sqe.Fd = int32(fd) + sqe.Addr = uint64(uintptr(unsafe.Pointer(&buf[0]))) + sqe.Len = uint32(len(buf)) + sqe.Off = offset +} + +// ----------------------------------------------------------------------- +// High-level thread-safe API +// ----------------------------------------------------------------------- + +// SubmitRead submits a pread and waits for completion. Thread-safe. +// Returns bytes read or an error. +func (r *IoUring) SubmitRead(fd int, buf []byte, offset uint64) (int, error) { + if len(buf) == 0 { + return 0, nil + } + + r.mu.Lock() + + sqe := r.getSqe() + if sqe == nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring: SQ full, no SQE available") + } + prepRead(sqe, fd, buf, offset) + // Tag the SQE so we can verify the CQE belongs to this request + sqe.UserData = offset + + submitted, err := r.submit(1) + if err != nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring_enter failed: %w", err) + } + + cqe, err := r.waitCqe() + if err != nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring wait cqe: %w", err) + } + + res := cqe.Res + userData := cqe.UserData + cqeFlags := cqe.Flags + r.seenCqe() + r.mu.Unlock() + + if res < 0 { + return 0, fmt.Errorf("io_uring pread errno %d (%s), fd=%d off=%d len=%d submitted=%d ud=%d", + -res, syscall.Errno(-res), fd, offset, len(buf), submitted, userData) + } + + // Diagnostic: if io_uring returned 0 (EOF) or short read, compare with syscall.Pread + if r.debugCount < 20 && int(res) != len(buf) { + r.debugCount++ + pn, perr := syscall.Pread(fd, buf, int64(offset)) + // Also stat the fd to check file size + var stat syscall.Stat_t + fstatErr := syscall.Fstat(fd, &stat) + var fsize int64 + if fstatErr == nil { + fsize = stat.Size + } + fmt.Printf("[io_uring diag] fd=%d off=%d len=%d uring_res=%d uring_ud=%d uring_flags=%d "+ + "submitted=%d pread_n=%d pread_err=%v filesize=%d fstat_err=%v sqeHead=%d sqeTail=%d\n", + fd, offset, len(buf), res, userData, cqeFlags, + submitted, pn, perr, fsize, fstatErr, r.sqeHead, r.sqeTail) + } + + return int(res), nil +} + +// SubmitWrite submits a pwrite and waits for completion. Thread-safe. +// Returns bytes written or an error. +func (r *IoUring) SubmitWrite(fd int, buf []byte, offset uint64) (int, error) { + if len(buf) == 0 { + return 0, nil + } + + r.mu.Lock() + + sqe := r.getSqe() + if sqe == nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring: SQ full, no SQE available") + } + prepWrite(sqe, fd, buf, offset) + + _, err := r.submit(1) + if err != nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring_enter failed: %w", err) + } + + cqe, err := r.waitCqe() + if err != nil { + r.mu.Unlock() + return 0, fmt.Errorf("io_uring wait cqe: %w", err) + } + + res := cqe.Res + r.seenCqe() + r.mu.Unlock() + + if res < 0 { + return 0, fmt.Errorf("io_uring pwrite failed: errno %d (%s)", -res, syscall.Errno(-res)) + } + return int(res), nil +} diff --git a/flashring/internal/fs/iouring_test.go b/flashring/internal/fs/iouring_test.go new file mode 100644 index 00000000..37f1cfa7 --- /dev/null +++ b/flashring/internal/fs/iouring_test.go @@ -0,0 +1,103 @@ +//go:build linux +// +build linux + +package fs + +import ( + "os" + "syscall" + "testing" + "unsafe" +) + +func TestIoUringBasicRead(t *testing.T) { + // 1. Create a temp file with known data + f, err := os.CreateTemp("", "iouring_test_*") + if err != nil { + t.Fatal(err) + } + defer os.Remove(f.Name()) + + data := make([]byte, 4096) + for i := range data { + data[i] = byte(i % 251) // non-zero pattern + } + if _, err := f.Write(data); err != nil { + t.Fatal(err) + } + if err := f.Sync(); err != nil { + t.Fatal(err) + } + f.Close() + + // 2. Open with O_DIRECT | O_RDONLY + fd, err := syscall.Open(f.Name(), syscall.O_RDONLY|syscall.O_DIRECT, 0) + if err != nil { + t.Fatalf("open O_DIRECT: %v", err) + } + defer syscall.Close(fd) + + // 3. Create io_uring ring + ring, err := NewIoUring(32, 0) + if err != nil { + t.Fatalf("NewIoUring: %v", err) + } + defer ring.Close() + + // 4. Allocate aligned buffer + buf := AlignedBlock(4096, 4096) + + // 5. Submit read via io_uring + n, err := ring.SubmitRead(fd, buf, 0) + if err != nil { + t.Fatalf("SubmitRead: %v", err) + } + if n != 4096 { + t.Fatalf("SubmitRead returned %d bytes, expected 4096", n) + } + + // 6. Verify data + for i := 0; i < 4096; i++ { + if buf[i] != data[i] { + t.Fatalf("data mismatch at byte %d: got %d, want %d", i, buf[i], data[i]) + } + } + t.Logf("io_uring read of 4096 bytes succeeded and data matches") + + // 7. Test a second read (to verify ring reuse works) + buf2 := AlignedBlock(4096, 4096) + n2, err := ring.SubmitRead(fd, buf2, 0) + if err != nil { + t.Fatalf("SubmitRead #2: %v", err) + } + if n2 != 4096 { + t.Fatalf("SubmitRead #2 returned %d bytes, expected 4096", n2) + } + for i := 0; i < 4096; i++ { + if buf2[i] != data[i] { + t.Fatalf("data mismatch #2 at byte %d: got %d, want %d", i, buf2[i], data[i]) + } + } + t.Logf("io_uring second read also succeeded") + + // 8. Test multiple sequential reads to exercise ring cycling + for iter := 0; iter < 100; iter++ { + buf3 := AlignedBlock(4096, 4096) + n3, err := ring.SubmitRead(fd, buf3, 0) + if err != nil { + t.Fatalf("SubmitRead iter %d: %v", iter, err) + } + if n3 != 4096 { + t.Fatalf("SubmitRead iter %d returned %d bytes, expected 4096", iter, n3) + } + } + t.Logf("100 sequential io_uring reads succeeded") +} + +// AlignedBlock returns a 4096-byte-aligned buffer. +func AlignedBlock(size, alignment int) []byte { + raw := make([]byte, size+alignment) + addr := uintptr(unsafe.Pointer(&raw[0])) + off := (alignment - int(addr%uintptr(alignment))) % alignment + return raw[off : off+size] +} diff --git a/flashring/internal/fs/iouring_wrapper.go b/flashring/internal/fs/iouring_wrapper.go new file mode 100644 index 00000000..b059e4ed --- /dev/null +++ b/flashring/internal/fs/iouring_wrapper.go @@ -0,0 +1,40 @@ +//go:build linux +// +build linux + +package fs + +import ( + "fmt" +) + +// IOUringFile wraps an existing WrapAppendFile with an io_uring ring for async I/O. +// It does NOT own the WrapAppendFile -- the caller manages its lifecycle. +type IOUringFile struct { + *WrapAppendFile // embed existing file (shared, not owned) + ring *IoUring // our raw io_uring instance + depth uint32 // submission queue depth +} + +// NewIOUringFile attaches an io_uring ring to an existing WrapAppendFile. +// The WrapAppendFile is shared (not duplicated) -- writes and reads use +// the same file descriptors, so offset tracking stays in sync. +// ringDepth controls the SQ/CQ size (64-256 is a good starting point). +// flags can be 0 for normal mode. +func NewIOUringFile(waf *WrapAppendFile, ringDepth uint32, flags uint32) (*IOUringFile, error) { + ring, err := NewIoUring(ringDepth, flags) + if err != nil { + return nil, fmt.Errorf("io_uring init failed: %w", err) + } + + return &IOUringFile{ + WrapAppendFile: waf, + ring: ring, + depth: ringDepth, + }, nil +} + +// Close releases only the io_uring ring. The underlying WrapAppendFile +// is NOT closed here since it is shared with the shard. +func (f *IOUringFile) Close() { + f.ring.Close() +} diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 24e68670..4b0e5640 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -143,6 +143,53 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { return int32(n), nil } +// PreadAsync submits a pread via io_uring and waits for completion. +// Thread-safe: multiple goroutines can call this concurrently on the same IOUringFile. +// Applies the same read-window validation and offset wrapping as Pread so that +// stale index entries (pointing past MaxFileSize) are rejected cheaply without +// hitting the kernel. +func (f *IOUringFile) PreadAsync(fileOffset int64, buf []byte) (int, error) { + if f.ReadDirectIO { + if !isAlignedOffset(fileOffset, f.blockSize) { + return 0, ErrOffsetNotAligned + } + if !isAlignedBuffer(buf, f.blockSize) { + return 0, ErrBufNoAlign + } + } + + // Validate read window and wrap offset (mirrors Pread logic exactly) + readEnd := fileOffset + int64(len(buf)) + valid := false + + if !f.wrapped { + // Single valid region: [PhysicalStartOffset, PhysicalWriteOffset) + valid = fileOffset >= f.PhysicalStartOffset && readEnd <= f.PhysicalWriteOffset + } else { + // Ring buffer has wrapped -- map the logical offset back into [0, MaxFileSize) + fileOffset = fileOffset % f.MaxFileSize + readEnd = readEnd % f.MaxFileSize + if fileOffset >= f.PhysicalStartOffset { + valid = readEnd <= f.MaxFileSize + } else { + valid = readEnd <= f.PhysicalWriteOffset + } + } + if !valid { + return 0, ErrFileOffsetOutOfRange + } + + startTime := time.Now() + n, err := f.ring.SubmitRead(f.ReadFd, buf, uint64(fileOffset)) + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + if err != nil { + return 0, err + } + + f.Stat.ReadCount++ + return n, nil +} + func (r *WrapAppendFile) TrimHead() (err error) { startTime := time.Now() diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 3c6da3ff..ace790de 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -18,6 +18,7 @@ import ( type ShardCache struct { keyIndex *indices.Index file *fs.WrapAppendFile + ioFile *fs.IOUringFile mm *memtables.MemtableManager readPageAllocator *allocators.SlabAlignedPageAllocator dm *indices.DeleteManager @@ -121,10 +122,18 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { log.Panic().Err(err).Msg("Failed to create read page allocator") } dm := indices.NewDeleteManager(ki, file, config.DeleteAmortizedStep) + + // Attach io_uring to the existing file -- do NOT create a second WrapAppendFile, + // otherwise the ioFile would have stale offset tracking (PhysicalWriteOffset etc.). + ioFile, err := fs.NewIOUringFile(file, 256, 0) + if err != nil { + log.Panic().Err(err).Msg("Failed to create io_uring file") + } sc := &ShardCache{ keyIndex: ki, mm: memtableManager, file: file, + ioFile: ioFile, readPageAllocator: readPageAllocator, dm: dm, predictor: config.Predictor, @@ -219,7 +228,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { // Allocate buffer of exact size needed - no pool since readFromDisk already copies once buf = make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) - n := fc.readFromDisk(int64(fileOffset), length, buf) + n := fc.readFromDiskAsync(int64(fileOffset), length, buf) if n != int(length) { fc.Stats.BadLengthCount.Add(1) return false, nil, 0, false, shouldReWrite @@ -360,18 +369,53 @@ func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uin } func (fc *ShardCache) readFromDisk(fileOffset int64, length uint16, buf []byte) int { + alignedStartOffset := (fileOffset / fs.BLOCK_SIZE) * fs.BLOCK_SIZE endndOffset := fileOffset + int64(length) endAlignedOffset := ((endndOffset + fs.BLOCK_SIZE - 1) / fs.BLOCK_SIZE) * fs.BLOCK_SIZE alignedReadSize := endAlignedOffset - alignedStartOffset + page := fc.readPageAllocator.Get(int(alignedReadSize)) + fc.file.Pread(alignedStartOffset, page.Buf) + start := int(fileOffset - alignedStartOffset) n := copy(buf, page.Buf[start:start+int(length)]) fc.readPageAllocator.Put(page) return n } +func (fc *ShardCache) readFromDiskAsync(fileOffset int64, length uint16, buf []byte) int { + alignedStartOffset := (fileOffset / fs.BLOCK_SIZE) * fs.BLOCK_SIZE + endndOffset := fileOffset + int64(length) + endAlignedOffset := ((endndOffset + fs.BLOCK_SIZE - 1) / fs.BLOCK_SIZE) * fs.BLOCK_SIZE + alignedReadSize := int(endAlignedOffset - alignedStartOffset) + page := fc.readPageAllocator.Get(alignedReadSize) + + // Use exactly alignedReadSize bytes, not the full page.Buf which may be + // larger due to slab allocator rounding to the next size class. + readBuf := page.Buf[:alignedReadSize] + n, err := fc.ioFile.PreadAsync(alignedStartOffset, readBuf) + if err != nil || n != alignedReadSize { + // ErrFileOffsetOutOfRange is expected for stale index entries -- don't log. + // Only log genuine io_uring / I/O errors. + if err != nil && err != fs.ErrFileOffsetOutOfRange { + log.Warn().Err(err). + Int64("offset", alignedStartOffset). + Int("alignedReadSize", alignedReadSize). + Int("n", n). + Msg("io_uring pread failed") + } + fc.readPageAllocator.Put(page) + return 0 + } + + start := int(fileOffset - alignedStartOffset) + copied := copy(buf, page.Buf[start:start+int(length)]) + fc.readPageAllocator.Put(page) + return copied +} + func (fc *ShardCache) GetRingBufferActiveEntries() int { return fc.keyIndex.GetRB().ActiveEntries() } From 98362c21fd717c7c5d399dfde1dfc75db9a9f7da Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 07:28:04 +0000 Subject: [PATCH 34/53] change lock position for rlock --- flashring/internal/indicesV3/index.go | 12 +++++++++++- flashring/pkg/cache/cache.go | 5 +---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index abdd74b2..3b20f6a0 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -2,9 +2,11 @@ package indicesv2 import ( "errors" + "sync" "time" "github.com/Meesho/BharatMLStack/flashring/internal/maths" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/cespare/xxhash/v2" "github.com/zeebo/xxh3" ) @@ -20,6 +22,7 @@ const ( ) type Index struct { + mu sync.RWMutex rm map[uint64]int rb *RingBuffer mc *maths.MorrisLogCounter @@ -66,7 +69,14 @@ func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint3 func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq uint64, memId, offset uint32, status Status) { hhi, hlo := hash128(key) - if idx, ok := i.rm[hlo]; ok { + + start := time.Now() + i.mu.RLock() + idx, ok := i.rm[hlo] + i.mu.RUnlock() + metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), []string{}) + + if ok { entry, hashNextPrev, _ := i.rb.Get(int(idx)) for { if isHashMatch(hhi, hlo, hashNextPrev) { diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 9566ebcd..0a2f3403 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -398,10 +398,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { } else { func(key string, shardIdx uint32) { - start := time.Now() - wc.shardLocks[shardIdx].RLock() - metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), []string{}) - defer wc.shardLocks[shardIdx].RUnlock() + keyFound, val, remainingTTL, expired, shouldReWrite = wc.shards[shardIdx].Get(key) if shouldReWrite { From 0afc603db1158d1c9fa2296899c412f308eea52b Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 07:28:41 +0000 Subject: [PATCH 35/53] metable chunk size 16*4 --- flashring/internal/memtables/memtable.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index 06345053..1586490d 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -100,7 +100,7 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { return 0, 0, ErrMemtableNotReadyForFlush } - chunkSize := 8 * fs.BLOCK_SIZE + chunkSize := 16 * fs.BLOCK_SIZE totalWritten := 0 for totalWritten < len(m.page.Buf) { From 02f92f71801779e0bd13a8dd5cd90107e3d282d6 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 09:12:54 +0000 Subject: [PATCH 36/53] fix rb loop --- flashring/internal/indicesV3/index.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index 3b20f6a0..62dc7f22 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -77,8 +77,8 @@ func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq u metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), []string{}) if ok { - entry, hashNextPrev, _ := i.rb.Get(int(idx)) for { + entry, hashNextPrev, _ := i.rb.Get(int(idx)) if isHashMatch(hhi, hlo, hashNextPrev) { length, deltaExptime, lastAccess, freq, memId, offset := decode(entry) exptime := int(deltaExptime) + int(i.startAt/60) From fbaa622ad339080c9b44d37cbce50d2fe1fc5e49 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 10:08:11 +0000 Subject: [PATCH 37/53] correct the mutex used for rlock --- flashring/internal/indicesV3/index.go | 5 +++-- flashring/internal/shard/shard_cache.go | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index 62dc7f22..5f93f7b3 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -22,7 +22,7 @@ const ( ) type Index struct { - mu sync.RWMutex + mu *sync.RWMutex rm map[uint64]int rb *RingBuffer mc *maths.MorrisLogCounter @@ -30,12 +30,13 @@ type Index struct { hashBits int } -func NewIndex(hashBits int, rbInitial, rbMax, deleteAmortizedStep int) *Index { +func NewIndex(hashBits int, rbInitial, rbMax, deleteAmortizedStep int, mu *sync.RWMutex) *Index { if ByteOrder == nil { loadByteOrder() } // rm := make(map[uint64]int) return &Index{ + mu: mu, rm: make(map[uint64]int), rb: NewRingBuffer(rbInitial, rbMax), mc: maths.New(12), diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index ace790de..d20a6652 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -109,7 +109,7 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { if err != nil { log.Panic().Err(err).Msg("Failed to create memtable manager") } - ki := indices.NewIndex(0, config.RbInitial, config.RbMax, config.DeleteAmortizedStep) + ki := indices.NewIndex(0, config.RbInitial, config.RbMax, config.DeleteAmortizedStep, sl) sizeClasses := make([]allocators.SizeClass, 0) i := fs.BLOCK_SIZE iMax := (1 << 16) From 5036e0b70ffae3b636485bf2f3b87198a2e5c2c9 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Thu, 12 Feb 2026 10:42:02 +0000 Subject: [PATCH 38/53] implement iouring batching --- flashring/internal/fs/batch_iouring.go | 254 ++++++++++++++++++++++++ flashring/internal/fs/wrap_file.go | 32 +++ flashring/internal/shard/shard_cache.go | 43 +++- flashring/pkg/cache/cache.go | 19 ++ 4 files changed, 339 insertions(+), 9 deletions(-) create mode 100644 flashring/internal/fs/batch_iouring.go diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go new file mode 100644 index 00000000..85b2ab84 --- /dev/null +++ b/flashring/internal/fs/batch_iouring.go @@ -0,0 +1,254 @@ +//go:build linux +// +build linux + +package fs + +import ( + "fmt" + "sync" + "syscall" + "time" +) + +// batchReadResult holds the outcome of a single batched pread. +type batchReadResult struct { + N int + Err error +} + +// batchReadRequest is a pread submitted to the batch reader. +type batchReadRequest struct { + fd int + buf []byte + offset uint64 + done chan batchReadResult +} + +var batchReqPool = sync.Pool{ + New: func() interface{} { + return &batchReadRequest{ + done: make(chan batchReadResult, 1), + } + }, +} + +// BatchIoUringReader collects pread requests from multiple goroutines into a +// single channel and submits them as one io_uring batch. This amortizes the +// syscall overhead (1 io_uring_enter instead of N) and lets NVMe process +// multiple commands in parallel (queue depth > 1). +// +// Typical usage: create one global instance shared across all shards. +type BatchIoUringReader struct { + ring *IoUring + reqCh chan *batchReadRequest + maxBatch int + window time.Duration + closeCh chan struct{} + wg sync.WaitGroup +} + +// BatchIoUringConfig configures the batch reader. +type BatchIoUringConfig struct { + RingDepth uint32 // io_uring SQ/CQ size (default 256) + MaxBatch int // max requests per batch (capped to RingDepth) + Window time.Duration // collection window after first request arrives + QueueSize int // channel buffer size (default 1024) +} + +// NewBatchIoUringReader creates a batch reader with its own io_uring ring +// and starts the background collection goroutine. +func NewBatchIoUringReader(cfg BatchIoUringConfig) (*BatchIoUringReader, error) { + if cfg.RingDepth == 0 { + cfg.RingDepth = 256 + } + if cfg.MaxBatch == 0 || cfg.MaxBatch > int(cfg.RingDepth) { + cfg.MaxBatch = int(cfg.RingDepth) + } + if cfg.Window == 0 { + cfg.Window = time.Millisecond + } + if cfg.QueueSize == 0 { + cfg.QueueSize = 1024 + } + + ring, err := NewIoUring(cfg.RingDepth, 0) + if err != nil { + return nil, fmt.Errorf("batch io_uring init: %w", err) + } + + b := &BatchIoUringReader{ + ring: ring, + reqCh: make(chan *batchReadRequest, cfg.QueueSize), + maxBatch: cfg.MaxBatch, + window: cfg.Window, + closeCh: make(chan struct{}), + } + b.wg.Add(1) + go b.loop() + return b, nil +} + +// Submit sends a pread request into the batch channel and blocks until the +// io_uring completion is received. Thread-safe; called from many goroutines. +func (b *BatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, error) { + if len(buf) == 0 { + return 0, nil + } + + req := batchReqPool.Get().(*batchReadRequest) + req.fd = fd + req.buf = buf + req.offset = offset + + b.reqCh <- req + + result := <-req.done + n, err := result.N, result.Err + + // Reset and return to pool + req.fd = 0 + req.buf = nil + req.offset = 0 + batchReqPool.Put(req) + + return n, err +} + +// Close shuts down the collection goroutine and releases the io_uring ring. +func (b *BatchIoUringReader) Close() { + close(b.closeCh) + b.wg.Wait() + b.ring.Close() +} + +// loop is the single background goroutine that collects and submits batches. +// +// Phase 1: block on first request (no timer ticking when idle). +// Phase 2: collect up to maxBatch or until the window expires. +// Phase 3: submit the batch in one io_uring_enter call. +func (b *BatchIoUringReader) loop() { + defer b.wg.Done() + + batch := make([]*batchReadRequest, 0, b.maxBatch) + + // Pre-allocate and stop the timer so Reset works correctly. + collectTimer := time.NewTimer(0) + if !collectTimer.Stop() { + <-collectTimer.C + } + + for { + // Phase 1: wait for first request (idle) + select { + case req := <-b.reqCh: + batch = append(batch, req) + case <-b.closeCh: + return + } + + // Phase 2: collect more within the window + collectTimer.Reset(b.window) + collect: + for len(batch) < b.maxBatch { + select { + case req := <-b.reqCh: + batch = append(batch, req) + case <-collectTimer.C: + break collect + } + } + // Drain timer if we exited early (maxBatch reached before timer fired) + if !collectTimer.Stop() { + select { + case <-collectTimer.C: + default: + } + } + + // Phase 3: submit the batch + b.submitBatch(batch) + batch = batch[:0] + } +} + +// submitBatch prepares N SQEs, submits them in one io_uring_enter(N, N), +// drains all CQEs, and dispatches results back to callers via done channels. +func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { + // log.Info().Msgf("submitting batch of %d requests", len(batch)) + n := len(batch) + if n == 0 { + return + } + + b.ring.mu.Lock() + + // Prepare SQEs + prepared := 0 + for i, req := range batch { + sqe := b.ring.getSqe() + if sqe == nil { + // SQ full -- error the rest + for j := i; j < n; j++ { + batch[j].done <- batchReadResult{ + Err: fmt.Errorf("io_uring: SQ full, batch=%d depth=%d", n, b.ring.sqEntries), + } + } + break + } + prepRead(sqe, req.fd, req.buf, req.offset) + sqe.UserData = uint64(i) // index for CQE matching + prepared++ + } + + if prepared == 0 { + b.ring.mu.Unlock() + return + } + + // Submit all at once; kernel waits for all completions before returning. + _, err := b.ring.submit(uint32(prepared)) + if err != nil { + b.ring.mu.Unlock() + for i := 0; i < prepared; i++ { + batch[i].done <- batchReadResult{Err: fmt.Errorf("io_uring_enter: %w", err)} + } + return + } + + // Drain CQEs -- order may differ from submission order. + completed := 0 + for completed < prepared { + cqe, err := b.ring.waitCqe() + if err != nil { + // Catastrophic ring error -- unblock all unsatisfied callers. + b.ring.mu.Unlock() + for i := 0; i < n; i++ { + select { + case batch[i].done <- batchReadResult{Err: fmt.Errorf("io_uring waitCqe: %w", err)}: + default: // already sent + } + } + return + } + + idx := int(cqe.UserData) + res := cqe.Res + b.ring.seenCqe() + completed++ + + if idx < 0 || idx >= prepared { + continue // unexpected UserData; skip + } + + if res < 0 { + batch[idx].done <- batchReadResult{ + Err: fmt.Errorf("io_uring pread errno %d (%s), fd=%d off=%d len=%d", + -res, syscall.Errno(-res), batch[idx].fd, batch[idx].offset, len(batch[idx].buf)), + } + } else { + batch[idx].done <- batchReadResult{N: int(res)} + } + } + + b.ring.mu.Unlock() +} diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 4b0e5640..cd17e2f3 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -143,6 +143,38 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { return int32(n), nil } +// ValidateReadOffset checks the read window and wraps the offset for ring-buffer +// files. Returns the physical file offset to use, or an error. +// Mirrors the validation logic in PreadAsync / Pread so callers that bypass +// PreadAsync (e.g. the batched io_uring path) get identical safety checks. +func (r *WrapAppendFile) ValidateReadOffset(fileOffset int64, bufLen int) (int64, error) { + if r.ReadDirectIO { + if !isAlignedOffset(fileOffset, r.blockSize) { + return 0, ErrOffsetNotAligned + } + } + + readEnd := fileOffset + int64(bufLen) + valid := false + + if !r.wrapped { + valid = fileOffset >= r.PhysicalStartOffset && readEnd <= r.PhysicalWriteOffset + } else { + fileOffset = fileOffset % r.MaxFileSize + readEnd = readEnd % r.MaxFileSize + if fileOffset >= r.PhysicalStartOffset { + valid = readEnd <= r.MaxFileSize + } else { + valid = readEnd <= r.PhysicalWriteOffset + } + } + if !valid { + return 0, ErrFileOffsetOutOfRange + } + + return fileOffset, nil +} + // PreadAsync submits a pread via io_uring and waits for completion. // Thread-safe: multiple goroutines can call this concurrently on the same IOUringFile. // Applies the same read-window validation and offset wrapping as Pread so that diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index d20a6652..6b6caf37 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -19,6 +19,7 @@ type ShardCache struct { keyIndex *indices.Index file *fs.WrapAppendFile ioFile *fs.IOUringFile + batchReader *fs.BatchIoUringReader // global batched io_uring reader (shared across shards) mm *memtables.MemtableManager readPageAllocator *allocators.SlabAlignedPageAllocator dm *indices.DeleteManager @@ -90,6 +91,10 @@ type ShardCacheConfig struct { //lockless EnableLockless bool + + // Global batched io_uring reader (shared across all shards). + // When set, disk reads go through this instead of the per-shard IOUringFile. + BatchIoUringReader *fs.BatchIoUringReader } func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { @@ -123,17 +128,10 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { } dm := indices.NewDeleteManager(ki, file, config.DeleteAmortizedStep) - // Attach io_uring to the existing file -- do NOT create a second WrapAppendFile, - // otherwise the ioFile would have stale offset tracking (PhysicalWriteOffset etc.). - ioFile, err := fs.NewIOUringFile(file, 256, 0) - if err != nil { - log.Panic().Err(err).Msg("Failed to create io_uring file") - } sc := &ShardCache{ keyIndex: ki, mm: memtableManager, file: file, - ioFile: ioFile, readPageAllocator: readPageAllocator, dm: dm, predictor: config.Predictor, @@ -144,6 +142,18 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { }, } + if config.BatchIoUringReader != nil { + // Use the global batched io_uring reader (shared across all shards). + sc.batchReader = config.BatchIoUringReader + } else { + // Fallback: per-shard io_uring ring for backward compatibility. + ioFile, err := fs.NewIOUringFile(file, 256, 0) + if err != nil { + log.Panic().Err(err).Msg("Failed to create io_uring file") + } + sc.ioFile = ioFile + } + // Initialize batch reader if enabled if config.EnableBatching { sc.BatchReader = NewBatchReaderV2(BatchReaderV2Config{ @@ -395,10 +405,25 @@ func (fc *ShardCache) readFromDiskAsync(fileOffset int64, length uint16, buf []b // Use exactly alignedReadSize bytes, not the full page.Buf which may be // larger due to slab allocator rounding to the next size class. readBuf := page.Buf[:alignedReadSize] - n, err := fc.ioFile.PreadAsync(alignedStartOffset, readBuf) + + var n int + var err error + + if fc.batchReader != nil { + // Batched path: validate offset locally, then submit to the global + // io_uring batch reader which accumulates requests across all shards. + var validOffset int64 + validOffset, err = fc.file.ValidateReadOffset(alignedStartOffset, alignedReadSize) + if err == nil { + n, err = fc.batchReader.Submit(fc.file.ReadFd, readBuf, uint64(validOffset)) + } + } else { + // Per-shard io_uring fallback + n, err = fc.ioFile.PreadAsync(alignedStartOffset, readBuf) + } + if err != nil || n != alignedReadSize { // ErrFileOffsetOutOfRange is expected for stale index entries -- don't log. - // Only log genuine io_uring / I/O errors. if err != nil && err != fs.ErrFileOffsetOutOfRange { log.Warn().Err(err). Int64("offset", alignedStartOffset). diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 0a2f3403..da0de853 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -9,6 +9,7 @@ import ( "sync/atomic" "time" + "github.com/Meesho/BharatMLStack/flashring/internal/fs" "github.com/Meesho/BharatMLStack/flashring/internal/maths" filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard" "github.com/cespare/xxhash/v2" @@ -46,6 +47,7 @@ type WrapCache struct { predictor *maths.Predictor stats []*CacheStats metricsCollector *metrics.MetricsCollector + batchReader *fs.BatchIoUringReader // global batched io_uring reader } type CacheStats struct { @@ -170,6 +172,20 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m GridSearchEpsilon: config.GridSearchEpsilon, }) + // Create a single global batched io_uring reader shared across all shards. + // All disk reads funnel into one channel; the background goroutine collects + // them for up to 1ms and submits them in a single io_uring_enter call. + batchReader, err := fs.NewBatchIoUringReader(fs.BatchIoUringConfig{ + RingDepth: 256, + MaxBatch: 256, + Window: time.Millisecond, + QueueSize: 1024, + }) + if err != nil { + log.Error().Err(err).Msg("Failed to create batched io_uring reader, falling back to per-shard rings") + batchReader = nil + } + batchWindow := time.Duration(0) if config.EnableBatching && config.BatchWindowMicros > 0 { batchWindow = time.Duration(config.BatchWindowMicros) * time.Microsecond @@ -195,6 +211,8 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m //lockless mode for PutLL/GetLL EnableLockless: config.EnableLockless, + + BatchIoUringReader: batchReader, }, &shardLocks[i]) } @@ -208,6 +226,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m predictor: predictor, stats: stats, metricsCollector: metricsCollector, + batchReader: batchReader, } if metricsCollector.Config.StatsEnabled { From 8c510a34b56a30d16b54f543ae3697c6c8b04d92 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Fri, 13 Feb 2026 07:33:42 +0000 Subject: [PATCH 39/53] add pread metric and change iouring to wait 500microsecs --- flashring/internal/fs/batch_iouring.go | 5 +++++ flashring/pkg/cache/cache.go | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index 85b2ab84..3381bbea 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -8,6 +8,8 @@ import ( "sync" "syscall" "time" + + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) // batchReadResult holds the outcome of a single batched pread. @@ -95,6 +97,8 @@ func (b *BatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, err return 0, nil } + startTime := time.Now() + req := batchReqPool.Get().(*batchReadRequest) req.fd = fd req.buf = buf @@ -104,6 +108,7 @@ func (b *BatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, err result := <-req.done n, err := result.N, result.Err + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) // Reset and return to pool req.fd = 0 diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index da0de853..d618ae24 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -178,7 +178,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m batchReader, err := fs.NewBatchIoUringReader(fs.BatchIoUringConfig{ RingDepth: 256, MaxBatch: 256, - Window: time.Millisecond, + Window: time.Microsecond * 500, QueueSize: 1024, }) if err != nil { From f2126ffaf020c8e0089940dafb91decb1ac4ead8 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Fri, 13 Feb 2026 08:48:08 +0000 Subject: [PATCH 40/53] change write iouring batch size --- flashring/internal/fs/iouring.go | 51 ++++++++++++++++ flashring/internal/fs/wrap_file.go | 74 ++++++++++++++++++++++++ flashring/internal/memtables/memtable.go | 29 ++++------ flashring/internal/shard/shard_cache.go | 8 +++ flashring/pkg/cache/cache.go | 10 ++++ 5 files changed, 155 insertions(+), 17 deletions(-) diff --git a/flashring/internal/fs/iouring.go b/flashring/internal/fs/iouring.go index 4a72439b..246e1cbd 100644 --- a/flashring/internal/fs/iouring.go +++ b/flashring/internal/fs/iouring.go @@ -460,6 +460,57 @@ func (r *IoUring) SubmitRead(fd int, buf []byte, offset uint64) (int, error) { return int(res), nil } +// SubmitWriteBatch submits N pwrite operations in a single io_uring_enter call +// and waits for all completions. Thread-safe. +// Returns per-chunk bytes written. On error, partial results may be returned. +func (r *IoUring) SubmitWriteBatch(fd int, bufs [][]byte, offsets []uint64) ([]int, error) { + n := len(bufs) + if n == 0 { + return nil, nil + } + + r.mu.Lock() + defer r.mu.Unlock() + + // Prepare all SQEs + for i := 0; i < n; i++ { + sqe := r.getSqe() + if sqe == nil { + return nil, fmt.Errorf("io_uring: SQ full, need %d slots but ring has %d", n, r.sqEntries) + } + prepWrite(sqe, fd, bufs[i], offsets[i]) + sqe.UserData = uint64(i) + } + + // Submit all at once; kernel waits for all completions + _, err := r.submit(uint32(n)) + if err != nil { + return nil, fmt.Errorf("io_uring_enter: %w", err) + } + + // Drain all CQEs (order may differ from submission) + results := make([]int, n) + for i := 0; i < n; i++ { + cqe, err := r.waitCqe() + if err != nil { + return results, fmt.Errorf("io_uring waitCqe: %w", err) + } + idx := int(cqe.UserData) + res := cqe.Res + r.seenCqe() + + if res < 0 { + return results, fmt.Errorf("io_uring pwrite errno %d (%s), fd=%d off=%d len=%d", + -res, syscall.Errno(-res), fd, offsets[idx], len(bufs[idx])) + } + if idx >= 0 && idx < n { + results[idx] = int(res) + } + } + + return results, nil +} + // SubmitWrite submits a pwrite and waits for completion. Thread-safe. // Returns bytes written or an error. func (r *IoUring) SubmitWrite(fd int, buf []byte, offset uint64) (int, error) { diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index cd17e2f3..89475c79 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -27,6 +27,7 @@ type WrapAppendFile struct { WriteFile *os.File // write file ReadFile *os.File // read file Stat *Stat // file statistics + WriteRing *IoUring // optional io_uring ring for batched writes } func NewWrapAppendFile(config FileConfig) (*WrapAppendFile, error) { @@ -91,6 +92,79 @@ func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err er return r.PhysicalWriteOffset, nil } +// PwriteBatch writes a large buffer in chunkSize pieces using a single batched +// io_uring submission. All chunks are submitted in one io_uring_enter call so +// NVMe can process them in parallel rather than sequentially. +// Returns total bytes written and the final PhysicalWriteOffset. +// Requires WriteRing to be set; falls back to sequential Pwrite if nil. +func (r *WrapAppendFile) PwriteBatch(buf []byte, chunkSize int) (totalWritten int, fileOffset int64, err error) { + if r.WriteRing == nil { + // Fallback: sequential pwrite + for written := 0; written < len(buf); written += chunkSize { + end := written + chunkSize + if end > len(buf) { + end = len(buf) + } + fileOffset, err = r.Pwrite(buf[written:end]) + if err != nil { + return written, fileOffset, err + } + totalWritten += end - written + } + return totalWritten, fileOffset, nil + } + + if r.WriteDirectIO { + if !isAlignedBuffer(buf, r.blockSize) { + return 0, 0, ErrBufNoAlign + } + } + + numChunks := len(buf) / chunkSize + if len(buf)%chunkSize != 0 { + numChunks++ + } + + // Pre-compute all offsets, handling ring-buffer wrap + bufs := make([][]byte, numChunks) + offsets := make([]uint64, numChunks) + offset := r.PhysicalWriteOffset + wrapped := r.wrapped + + for i := 0; i < numChunks; i++ { + start := i * chunkSize + end := start + chunkSize + if end > len(buf) { + end = len(buf) + } + bufs[i] = buf[start:end] + offsets[i] = uint64(offset) + offset += int64(end - start) + if offset >= r.MaxFileSize { + wrapped = true + offset = r.PhysicalStartOffset + } + } + + startTime := time.Now() + results, err := r.WriteRing.SubmitWriteBatch(r.WriteFd, bufs, offsets) + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + if err != nil { + return 0, 0, err + } + + // Update state after successful batch write + for _, n := range results { + totalWritten += n + } + r.PhysicalWriteOffset = offset + r.wrapped = wrapped + r.LogicalCurrentOffset += int64(totalWritten) + r.Stat.WriteCount += int64(numChunks) + + return totalWritten, r.PhysicalWriteOffset, nil +} + func (r *WrapAppendFile) TrimHeadIfNeeded() bool { if r.wrapped && r.PhysicalWriteOffset == r.PhysicalStartOffset { return true diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index 1586490d..848f9c92 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -2,7 +2,6 @@ package memtables import ( "errors" - "runtime" "github.com/Meesho/BharatMLStack/flashring/internal/fs" "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" @@ -100,24 +99,20 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { return 0, 0, ErrMemtableNotReadyForFlush } - chunkSize := 16 * fs.BLOCK_SIZE - totalWritten := 0 - - for totalWritten < len(m.page.Buf) { - metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, 1, []string{}) - chunk := m.page.Buf[totalWritten : totalWritten+chunkSize] - - if err != nil { - return 0, 0, err - } - totalWritten += chunkSize - fileOffset, err = m.file.Pwrite(chunk) - if err != nil { - return 0, 0, err - } + chunkSize := fs.BLOCK_SIZE + numChunks := len(m.page.Buf) / chunkSize + if len(m.page.Buf)%chunkSize != 0 { + numChunks++ + } + metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, int64(numChunks), []string{}) - runtime.Gosched() + // PwriteBatch submits all chunks in one io_uring_enter when WriteRing is + // set, otherwise falls back to sequential pwrite internally. + totalWritten, fileOffset, err := m.file.PwriteBatch(m.page.Buf, chunkSize) + if err != nil { + return 0, 0, err } + m.currentOffset = 0 m.readyForFlush = false return totalWritten, fileOffset, nil diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 6b6caf37..40d492e4 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -95,6 +95,9 @@ type ShardCacheConfig struct { // Global batched io_uring reader (shared across all shards). // When set, disk reads go through this instead of the per-shard IOUringFile. BatchIoUringReader *fs.BatchIoUringReader + + // Dedicated io_uring ring for batched writes (shared across all shards). + WriteRing *fs.IoUring } func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { @@ -128,6 +131,11 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { } dm := indices.NewDeleteManager(ki, file, config.DeleteAmortizedStep) + // Attach the dedicated write ring so memtable flushes use batched io_uring. + if config.WriteRing != nil { + file.WriteRing = config.WriteRing + } + sc := &ShardCache{ keyIndex: ki, mm: memtableManager, diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index d618ae24..7460d29c 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -186,6 +186,15 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m batchReader = nil } + // Separate io_uring ring dedicated to batched writes (memtable flushes). + // Kept separate from the read ring to avoid mutex contention between the + // read batch loop and concurrent flushes. + writeRing, err := fs.NewIoUring(256, 0) + if err != nil { + log.Error().Err(err).Msg("Failed to create io_uring write ring, falling back to sequential pwrite") + writeRing = nil + } + batchWindow := time.Duration(0) if config.EnableBatching && config.BatchWindowMicros > 0 { batchWindow = time.Duration(config.BatchWindowMicros) * time.Microsecond @@ -213,6 +222,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m EnableLockless: config.EnableLockless, BatchIoUringReader: batchReader, + WriteRing: writeRing, }, &shardLocks[i]) } From a9cefea2282d7cbf64b5aabda139f9a576e16eb3 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Fri, 13 Feb 2026 09:07:00 +0000 Subject: [PATCH 41/53] fix iouring write --- flashring/internal/fs/wrap_file.go | 75 ++++++++++++++---------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 89475c79..1141f848 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -92,9 +92,9 @@ func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err er return r.PhysicalWriteOffset, nil } -// PwriteBatch writes a large buffer in chunkSize pieces using a single batched -// io_uring submission. All chunks are submitted in one io_uring_enter call so -// NVMe can process them in parallel rather than sequentially. +// PwriteBatch writes a large buffer in chunkSize pieces via io_uring. +// Chunks are submitted in sub-batches that fit within the ring's SQ depth, +// so arbitrarily large buffers work regardless of ring size. // Returns total bytes written and the final PhysicalWriteOffset. // Requires WriteRing to be set; falls back to sequential Pwrite if nil. func (r *WrapAppendFile) PwriteBatch(buf []byte, chunkSize int) (totalWritten int, fileOffset int64, err error) { @@ -120,47 +120,44 @@ func (r *WrapAppendFile) PwriteBatch(buf []byte, chunkSize int) (totalWritten in } } - numChunks := len(buf) / chunkSize - if len(buf)%chunkSize != 0 { - numChunks++ - } + // Maximum SQEs per submission -- capped to ring depth. + maxPerBatch := int(r.WriteRing.sqEntries) - // Pre-compute all offsets, handling ring-buffer wrap - bufs := make([][]byte, numChunks) - offsets := make([]uint64, numChunks) - offset := r.PhysicalWriteOffset - wrapped := r.wrapped - - for i := 0; i < numChunks; i++ { - start := i * chunkSize - end := start + chunkSize - if end > len(buf) { - end = len(buf) - } - bufs[i] = buf[start:end] - offsets[i] = uint64(offset) - offset += int64(end - start) - if offset >= r.MaxFileSize { - wrapped = true - offset = r.PhysicalStartOffset + for written := 0; written < len(buf); { + // Build a sub-batch that fits within the ring + var bufs [][]byte + var offsets []uint64 + + for i := 0; i < maxPerBatch && written < len(buf); i++ { + end := written + chunkSize + if end > len(buf) { + end = len(buf) + } + bufs = append(bufs, buf[written:end]) + offsets = append(offsets, uint64(r.PhysicalWriteOffset)) + + // Advance write offset, handle ring-buffer wrap + r.PhysicalWriteOffset += int64(end - written) + if r.PhysicalWriteOffset >= r.MaxFileSize { + r.wrapped = true + r.PhysicalWriteOffset = r.PhysicalStartOffset + } + written = end } - } - startTime := time.Now() - results, err := r.WriteRing.SubmitWriteBatch(r.WriteFd, bufs, offsets) - metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) - if err != nil { - return 0, 0, err - } + startTime := time.Now() + results, serr := r.WriteRing.SubmitWriteBatch(r.WriteFd, bufs, offsets) + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + if serr != nil { + return totalWritten, r.PhysicalWriteOffset, serr + } - // Update state after successful batch write - for _, n := range results { - totalWritten += n + for _, n := range results { + totalWritten += n + r.LogicalCurrentOffset += int64(n) + r.Stat.WriteCount++ + } } - r.PhysicalWriteOffset = offset - r.wrapped = wrapped - r.LogicalCurrentOffset += int64(totalWritten) - r.Stat.WriteCount += int64(numChunks) return totalWritten, r.PhysicalWriteOffset, nil } From 737e6519bb7ae09645fdc77d183efdec3e4e332f Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Fri, 13 Feb 2026 12:41:25 +0000 Subject: [PATCH 42/53] track chunked pwrite and pread latency --- flashring/internal/fs/iouring.go | 6 ++++++ flashring/internal/fs/wrap_file.go | 2 -- flashring/pkg/cache/cache.go | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/flashring/internal/fs/iouring.go b/flashring/internal/fs/iouring.go index 246e1cbd..990f6032 100644 --- a/flashring/internal/fs/iouring.go +++ b/flashring/internal/fs/iouring.go @@ -11,8 +11,10 @@ import ( "sync" "sync/atomic" "syscall" + "time" "unsafe" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "golang.org/x/sys/unix" ) @@ -488,6 +490,8 @@ func (r *IoUring) SubmitWriteBatch(fd int, bufs [][]byte, offsets []uint64) ([]i return nil, fmt.Errorf("io_uring_enter: %w", err) } + startTime := time.Now() + // Drain all CQEs (order may differ from submission) results := make([]int, n) for i := 0; i < n; i++ { @@ -506,6 +510,8 @@ func (r *IoUring) SubmitWriteBatch(fd int, bufs [][]byte, offsets []uint64) ([]i if idx >= 0 && idx < n { results[idx] = int(res) } + + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) } return results, nil diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 1141f848..16eb30bb 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -145,9 +145,7 @@ func (r *WrapAppendFile) PwriteBatch(buf []byte, chunkSize int) (totalWritten in written = end } - startTime := time.Now() results, serr := r.WriteRing.SubmitWriteBatch(r.WriteFd, bufs, offsets) - metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) if serr != nil { return totalWritten, r.PhysicalWriteOffset, serr } diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 7460d29c..061ef697 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -178,7 +178,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m batchReader, err := fs.NewBatchIoUringReader(fs.BatchIoUringConfig{ RingDepth: 256, MaxBatch: 256, - Window: time.Microsecond * 500, + Window: time.Millisecond * 2, QueueSize: 1024, }) if err != nil { From 399d7977131a36303891e2a197887452b0c036af Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 17 Feb 2026 06:26:40 +0000 Subject: [PATCH 43/53] iouring no wait fixes --- flashring/internal/fs/batch_iouring.go | 58 +++++++++++--------------- flashring/internal/fs/iouring.go | 28 ++++++++++++- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index 3381bbea..5c03b83e 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -39,12 +39,16 @@ var batchReqPool = sync.Pool{ // syscall overhead (1 io_uring_enter instead of N) and lets NVMe process // multiple commands in parallel (queue depth > 1). // -// Typical usage: create one global instance shared across all shards. +// Collection uses non-blocking channel drain: after receiving the first +// request, it drains whatever else is already queued (no timer). Under load +// this provides natural batching; under low load single requests go out +// with zero added latency. +// +// CQEs are dispatched individually as they complete (no head-of-line blocking). type BatchIoUringReader struct { ring *IoUring reqCh chan *batchReadRequest maxBatch int - window time.Duration closeCh chan struct{} wg sync.WaitGroup } @@ -53,7 +57,7 @@ type BatchIoUringReader struct { type BatchIoUringConfig struct { RingDepth uint32 // io_uring SQ/CQ size (default 256) MaxBatch int // max requests per batch (capped to RingDepth) - Window time.Duration // collection window after first request arrives + Window time.Duration // unused, kept for config compatibility QueueSize int // channel buffer size (default 1024) } @@ -66,9 +70,6 @@ func NewBatchIoUringReader(cfg BatchIoUringConfig) (*BatchIoUringReader, error) if cfg.MaxBatch == 0 || cfg.MaxBatch > int(cfg.RingDepth) { cfg.MaxBatch = int(cfg.RingDepth) } - if cfg.Window == 0 { - cfg.Window = time.Millisecond - } if cfg.QueueSize == 0 { cfg.QueueSize = 1024 } @@ -82,7 +83,6 @@ func NewBatchIoUringReader(cfg BatchIoUringConfig) (*BatchIoUringReader, error) ring: ring, reqCh: make(chan *batchReadRequest, cfg.QueueSize), maxBatch: cfg.MaxBatch, - window: cfg.Window, closeCh: make(chan struct{}), } b.wg.Add(1) @@ -129,21 +129,15 @@ func (b *BatchIoUringReader) Close() { // loop is the single background goroutine that collects and submits batches. // // Phase 1: block on first request (no timer ticking when idle). -// Phase 2: collect up to maxBatch or until the window expires. -// Phase 3: submit the batch in one io_uring_enter call. +// Phase 2: non-blocking drain of whatever else is already queued. +// Phase 3: submit the batch and dispatch CQEs as they complete. func (b *BatchIoUringReader) loop() { defer b.wg.Done() batch := make([]*batchReadRequest, 0, b.maxBatch) - // Pre-allocate and stop the timer so Reset works correctly. - collectTimer := time.NewTimer(0) - if !collectTimer.Stop() { - <-collectTimer.C - } - for { - // Phase 1: wait for first request (idle) + // Phase 1: block until the first request arrives select { case req := <-b.reqCh: batch = append(batch, req) @@ -151,35 +145,29 @@ func (b *BatchIoUringReader) loop() { return } - // Phase 2: collect more within the window - collectTimer.Reset(b.window) - collect: + // Phase 2: non-blocking drain -- grab everything already queued + // without waiting. Under load this naturally batches many requests; + // under low load the single request goes out immediately. + drain: for len(batch) < b.maxBatch { select { case req := <-b.reqCh: batch = append(batch, req) - case <-collectTimer.C: - break collect - } - } - // Drain timer if we exited early (maxBatch reached before timer fired) - if !collectTimer.Stop() { - select { - case <-collectTimer.C: default: + break drain } } - // Phase 3: submit the batch + // Phase 3: submit and dispatch b.submitBatch(batch) batch = batch[:0] } } -// submitBatch prepares N SQEs, submits them in one io_uring_enter(N, N), -// drains all CQEs, and dispatches results back to callers via done channels. +// submitBatch prepares N SQEs, submits them (fire-and-forget), then dispatches +// each CQE individually as it completes. Fast reads are dispatched immediately +// without waiting for slow reads in the same batch (no head-of-line blocking). func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { - // log.Info().Msgf("submitting batch of %d requests", len(batch)) n := len(batch) if n == 0 { return @@ -210,8 +198,10 @@ func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { return } - // Submit all at once; kernel waits for all completions before returning. - _, err := b.ring.submit(uint32(prepared)) + // Submit SQEs but do NOT wait for completions (waitNr=0). + // The kernel starts processing I/O immediately; we dispatch each CQE + // as it arrives below, so fast reads aren't blocked by slow ones. + _, err := b.ring.submit(0) if err != nil { b.ring.mu.Unlock() for i := 0; i < prepared; i++ { @@ -220,7 +210,7 @@ func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { return } - // Drain CQEs -- order may differ from submission order. + // Dispatch CQEs one-by-one as they complete. completed := 0 for completed < prepared { cqe, err := b.ring.waitCqe() diff --git a/flashring/internal/fs/iouring.go b/flashring/internal/fs/iouring.go index 990f6032..e3a3cd8c 100644 --- a/flashring/internal/fs/iouring.go +++ b/flashring/internal/fs/iouring.go @@ -329,6 +329,7 @@ func ioUringEnter(fd int, toSubmit, minComplete, flags uint32) (int, error) { } // submit flushes SQEs and calls io_uring_enter if needed. +// Retries automatically on EINTR (signal interruption). func (r *IoUring) submit(waitNr uint32) (int, error) { submitted := r.flushSq() var flags uint32 = 0 @@ -338,7 +339,13 @@ func (r *IoUring) submit(waitNr uint32) (int, error) { if waitNr > 0 { flags |= iouringEnterGetEvents } - return ioUringEnter(r.fd, submitted, waitNr, flags) + for { + ret, err := ioUringEnter(r.fd, submitted, waitNr, flags) + if err == syscall.EINTR { + continue + } + return ret, err + } } // SQPOLL: only enter if kernel thread needs wakeup @@ -349,7 +356,13 @@ func (r *IoUring) submit(waitNr uint32) (int, error) { flags |= iouringEnterGetEvents } if flags != 0 { - return ioUringEnter(r.fd, submitted, waitNr, flags) + for { + ret, err := ioUringEnter(r.fd, submitted, waitNr, flags) + if err == syscall.EINTR { + continue + } + return ret, err + } } return int(submitted), nil } @@ -367,6 +380,9 @@ func (r *IoUring) waitCqe() (*ioUringCqe, error) { // No CQE available, ask the kernel _, err := ioUringEnter(r.fd, 0, 1, iouringEnterGetEvents) if err != nil { + if err == syscall.EINTR { + continue // signal interrupted the syscall; retry + } return nil, err } } @@ -382,6 +398,10 @@ func (r *IoUring) seenCqe() { // ----------------------------------------------------------------------- func prepRead(sqe *ioUringSqe, fd int, buf []byte, offset uint64) { + if len(buf) == 0 { + sqe.Opcode = iouringOpNop + return + } sqe.Opcode = iouringOpRead sqe.Fd = int32(fd) sqe.Addr = uint64(uintptr(unsafe.Pointer(&buf[0]))) @@ -390,6 +410,10 @@ func prepRead(sqe *ioUringSqe, fd int, buf []byte, offset uint64) { } func prepWrite(sqe *ioUringSqe, fd int, buf []byte, offset uint64) { + if len(buf) == 0 { + sqe.Opcode = iouringOpNop + return + } sqe.Opcode = iouringOpWrite sqe.Fd = int32(fd) sqe.Addr = uint64(uintptr(unsafe.Pointer(&buf[0]))) From 93c716493968b58fb38b9e1bb68dfa78cf22d493 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Fri, 20 Feb 2026 07:28:54 +0000 Subject: [PATCH 44/53] remove metrics and use statsD only --- flashring/cmd/flashringtest/plan_lockless.go | 18 +- .../cmd/flashringtest/plan_random_gausian.go | 19 +- .../flashringtest/plan_readthrough_gausian.go | 23 +- .../plan_readthrough_gausian_batched.go | 19 +- flashring/go.sum | 37 ++ flashring/internal/fs/wrap_file.go | 5 +- flashring/internal/indicesV3/index_test.go | 10 +- flashring/internal/memtables/manager.go | 9 +- .../internal/memtables/manager_bench_test.go | 2 +- flashring/internal/memtables/memtable.go | 5 +- flashring/internal/pools/leaky_pool.go | 2 - flashring/internal/shard/batch_reader.go | 1 - flashring/internal/shard/batch_reader_v2.go | 1 - flashring/internal/shard/batch_tracker.go | 55 -- flashring/internal/shard/latency_tracker.go | 96 ---- flashring/internal/shard/shard_cache.go | 86 +--- flashring/pkg/cache/badger.go | 62 --- flashring/pkg/cache/cache.go | 143 +----- flashring/pkg/cache/freecache.go | 58 +-- flashring/pkg/metrics/console_logger.go | 143 ------ flashring/pkg/metrics/csv_logger.go | 293 ----------- flashring/pkg/metrics/runmetrics.go | 482 ------------------ flashring/pkg/metrics/statsd_logger.go | 125 +---- 23 files changed, 125 insertions(+), 1569 deletions(-) delete mode 100644 flashring/internal/shard/batch_tracker.go delete mode 100644 flashring/internal/shard/latency_tracker.go delete mode 100644 flashring/pkg/metrics/console_logger.go delete mode 100644 flashring/pkg/metrics/csv_logger.go delete mode 100644 flashring/pkg/metrics/runmetrics.go diff --git a/flashring/cmd/flashringtest/plan_lockless.go b/flashring/cmd/flashringtest/plan_lockless.go index e52482f7..ea7f8ede 100644 --- a/flashring/cmd/flashringtest/plan_lockless.go +++ b/flashring/cmd/flashringtest/plan_lockless.go @@ -14,7 +14,6 @@ import ( "time" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" - metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -100,22 +99,7 @@ func planLockless() { EnableLockless: true, } - metricsConfig := metrics.MetricsCollectorConfig{ - StatsEnabled: true, - CsvLogging: true, - ConsoleLogging: true, - StatsdLogging: true, - InstantMetrics: true, - AveragedMetrics: true, - Metadata: map[string]any{ - "shards": numShards, - "keys-per-shard": keysPerShard, - "read-workers": readWorkers, - "write-workers": writeWorkers, - "plan": "lockless"}, - } - metricsCollector := metrics.InitMetricsCollector(metricsConfig) - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint) if err != nil { panic(err) } diff --git a/flashring/cmd/flashringtest/plan_random_gausian.go b/flashring/cmd/flashringtest/plan_random_gausian.go index 719a8106..f906e320 100644 --- a/flashring/cmd/flashringtest/plan_random_gausian.go +++ b/flashring/cmd/flashringtest/plan_random_gausian.go @@ -13,7 +13,6 @@ import ( "time" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" - metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -96,23 +95,7 @@ func planRandomGaussian() { SampleDuration: time.Duration(sampleSecs) * time.Second, } - metricsConfig := metrics.MetricsCollectorConfig{ - StatsEnabled: true, - CsvLogging: true, - ConsoleLogging: true, - StatsdLogging: false, - InstantMetrics: false, - AveragedMetrics: true, - Metadata: map[string]any{ - "shards": numShards, - "keys-per-shard": keysPerShard, - "read-workers": readWorkers, - "write-workers": writeWorkers, - "plan": "random-gausian"}, - } - metricsCollector := metrics.InitMetricsCollector(metricsConfig) - - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint) if err != nil { panic(err) } diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian.go b/flashring/cmd/flashringtest/plan_readthrough_gausian.go index 0eac284b..a311d8f6 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian.go @@ -16,8 +16,6 @@ import ( cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" "github.com/rs/zerolog" "github.com/rs/zerolog/log" - - metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) func planReadthroughGaussian() { @@ -88,21 +86,6 @@ func planReadthroughGaussian() { memtableSizeInBytes := int32(memtableMB) * 1024 * 1024 fileSizeInBytes := int64(float64(fileSizeMultiplier) * 1024 * 1024 * 1024) // fileSizeMultiplier in GiB - metricsConfig := metrics.MetricsCollectorConfig{ - StatsEnabled: true, - CsvLogging: true, - ConsoleLogging: true, - StatsdLogging: true, - InstantMetrics: false, - AveragedMetrics: true, - Metadata: map[string]any{ - "shards": numShards, - "keys-per-shard": keysPerShard, - "read-workers": readWorkers, - "write-workers": writeWorkers, - "plan": "readthrough"}, - } - cfg := cachepkg.WrapCacheConfig{ NumShards: numShards, KeysPerShard: keysPerShard, @@ -113,9 +96,7 @@ func planReadthroughGaussian() { SampleDuration: time.Duration(sampleSecs) * time.Second, } - metricsCollector := metrics.InitMetricsCollector(metricsConfig) - - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint) if err != nil { panic(err) } @@ -196,7 +177,7 @@ func planReadthroughGaussian() { if found && string(val) != fmt.Sprintf(str1kb, randomval) { panic("value mismatch") } - if k%5000000 == 0 { + if k%50000 == 0 { fmt.Printf("----------------------------------------------read %d keys %d readerid\n", k, workerID) } } diff --git a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go index d0b5e9c2..756e0d9b 100644 --- a/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go +++ b/flashring/cmd/flashringtest/plan_readthrough_gausian_batched.go @@ -14,7 +14,6 @@ import ( "time" cachepkg "github.com/Meesho/BharatMLStack/flashring/pkg/cache" - metrics "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog" "github.com/rs/zerolog/log" ) @@ -111,23 +110,7 @@ func planReadthroughGaussianBatched() { MaxBatchSize: maxBatchSize, } - metricsConfig := metrics.MetricsCollectorConfig{ - StatsEnabled: true, - CsvLogging: true, - ConsoleLogging: true, - StatsdLogging: false, - InstantMetrics: false, - AveragedMetrics: true, - Metadata: map[string]any{ - "shards": numShards, - "keys-per-shard": keysPerShard, - "read-workers": readWorkers, - "write-workers": writeWorkers, - "plan": "readthrough-batched"}, - } - metricsCollector := metrics.InitMetricsCollector(metricsConfig) - - pc, err := cachepkg.NewWrapCache(cfg, mountPoint, metricsCollector) + pc, err := cachepkg.NewWrapCache(cfg, mountPoint) if err != nil { panic(err) } diff --git a/flashring/go.sum b/flashring/go.sum index 5d69f8d2..18bee494 100644 --- a/flashring/go.sum +++ b/flashring/go.sum @@ -1,20 +1,30 @@ github.com/DataDog/datadog-go/v5 v5.8.2 h1:9IEfH1Mw9AjWwhAMqCAkhbxjuJeMxm2ARX2VdgL+ols= github.com/DataDog/datadog-go/v5 v5.8.2/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= +github.com/DataDog/datadog-go/v5 v5.8.3 h1:s58CUJ9s8lezjhTNJO/SxkPBv2qZjS3ktpRSqGF5n0s= +github.com/DataDog/datadog-go/v5 v5.8.3/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU= github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M= github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= +github.com/coocood/freecache v1.2.5 h1:FmhRQ8cLLVq9zWhHVYODUEZ0xu6rTPrVeAnX1AEIf7I= +github.com/coocood/freecache v1.2.5/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger/v4 v4.9.0 h1:tpqWb0NewSrCYqTvywbcXOhQdWcqephkVkbBmaaqHzc= github.com/dgraph-io/badger/v4 v4.9.0/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= +github.com/dgraph-io/badger/v4 v4.9.1 h1:DocZXZkg5JJHJPtUErA0ibyHxOVUDVoXLSCV6t8NC8w= +github.com/dgraph-io/badger/v4 v4.9.1/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= +github.com/dgraph-io/ristretto/v2 v2.4.0 h1:I/w09yLjhdcVD2QV192UJcq8dPBaAJb9pOuMyNy0XlU= +github.com/dgraph-io/ristretto/v2 v2.4.0/go.mod h1:0KsrXtXvnv0EqnzyowllbVJB8yBonswa2lTCK2gGo9E= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -30,14 +40,20 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro= +github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/flatbuffers v25.12.19+incompatible h1:haMV2JRRJCe1998HeW/p0X9UaMTK6SDo0ffLn2+DbLs= +github.com/google/flatbuffers v25.12.19+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= +github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -58,11 +74,14 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= +github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4= +github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= @@ -91,14 +110,24 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= +github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= +go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= +go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= +go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= +go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= +go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= +go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -109,6 +138,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= +golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -123,11 +154,15 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= +golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= @@ -136,6 +171,8 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index 16eb30bb..f367e583 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -88,7 +88,7 @@ func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err er r.PhysicalWriteOffset = r.PhysicalStartOffset } r.LogicalCurrentOffset += int64(n) - r.Stat.WriteCount++ + return r.PhysicalWriteOffset, nil } @@ -282,6 +282,7 @@ func (f *IOUringFile) PreadAsync(fileOffset int64, buf []byte) (int, error) { startTime := time.Now() n, err := f.ring.SubmitRead(f.ReadFd, buf, uint64(fileOffset)) + metrics.Incr(metrics.KEY_PREAD_COUNT, []string{}) metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) if err != nil { return 0, err @@ -307,7 +308,7 @@ func (r *WrapAppendFile) TrimHead() (err error) { if r.PhysicalStartOffset >= r.MaxFileSize { r.PhysicalStartOffset = 0 } - r.Stat.PunchHoleCount++ + metrics.Incr(metrics.KEY_PUNCH_HOLE_COUNT, []string{}) metrics.Timing(metrics.KEY_TRIM_HEAD_LATENCY, time.Since(startTime), []string{}) return nil } diff --git a/flashring/internal/indicesV3/index_test.go b/flashring/internal/indicesV3/index_test.go index 3eecea9d..fe4ca081 100644 --- a/flashring/internal/indicesV3/index_test.go +++ b/flashring/internal/indicesV3/index_test.go @@ -2,17 +2,19 @@ package indicesv2 import ( "fmt" + "sync" "testing" ) func TestIndexAddRbMax(t *testing.T) { loadByteOrder() + mu := &sync.RWMutex{} // Use equal initial and max capacity for the fixed-size ring buffer. rbMax := 1000_000 rbInitial := rbMax hashBits := 16 - idx := NewIndex(hashBits, rbInitial, rbMax, 1) + idx := NewIndex(hashBits, rbInitial, rbMax, 1, mu) // Insert exactly rbMax distinct keys for i := 0; i < rbMax; i++ { @@ -64,7 +66,7 @@ func TestIndexDeleteAndGet(t *testing.T) { rbMax := 99 rbInitial := rbMax hashBits := 16 - idx := NewIndex(hashBits, rbInitial, rbMax, 1) + idx := NewIndex(hashBits, rbInitial, rbMax, 1, nil) // Insert exactly rbMax distinct keys in order for i := 0; i < 33; i++ { @@ -137,11 +139,13 @@ func TestIndexDeleteAndGet(t *testing.T) { func TestIndexDeleteAndGetOverlappingHash(t *testing.T) { loadByteOrder() + mu := &sync.RWMutex{} + // Keep this small and fast rbMax := 99 rbInitial := rbMax hashBits := 16 - idx := NewIndex(hashBits, rbInitial, rbMax, 1) + idx := NewIndex(hashBits, rbInitial, rbMax, 1, mu) // Insert exactly rbMax distinct keys in order for i := 0; i < 33; i++ { diff --git a/flashring/internal/memtables/manager.go b/flashring/internal/memtables/manager.go index a86fb108..41873a24 100644 --- a/flashring/internal/memtables/manager.go +++ b/flashring/internal/memtables/manager.go @@ -3,6 +3,7 @@ package memtables import ( "github.com/Meesho/BharatMLStack/flashring/internal/allocators" "github.com/Meesho/BharatMLStack/flashring/internal/fs" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog/log" ) @@ -16,11 +17,6 @@ type MemtableManager struct { nextFileOffset int64 nextId uint32 semaphore chan int - stats Stats -} - -type Stats struct { - Flushes int64 } func NewMemtableManager(file *fs.WrapAppendFile, capacity int32) (*MemtableManager, error) { @@ -62,7 +58,6 @@ func NewMemtableManager(file *fs.WrapAppendFile, capacity int32) (*MemtableManag nextFileOffset: 2 * int64(capacity), nextId: 2, semaphore: make(chan int, 1), - stats: Stats{}, } return memtableManager, nil } @@ -92,7 +87,7 @@ func (mm *MemtableManager) flushConsumer(memtable *Memtable) { memtable.Id = mm.nextId mm.nextId++ mm.nextFileOffset += int64(n) - mm.stats.Flushes++ + metrics.Incr(metrics.KEY_MEMTABLE_FLUSH_COUNT, append(metrics.GetShardTag(memtable.ShardIdx), metrics.GetMemtableTag(memtable.Id)...)) } func (mm *MemtableManager) Flush() error { diff --git a/flashring/internal/memtables/manager_bench_test.go b/flashring/internal/memtables/manager_bench_test.go index 28738185..c29c0e52 100644 --- a/flashring/internal/memtables/manager_bench_test.go +++ b/flashring/internal/memtables/manager_bench_test.go @@ -48,7 +48,7 @@ func Benchmark_Puts(b *testing.B) { } } - b.ReportMetric(float64(manager.stats.Flushes), "flushes") + // b.ReportMetric(float64(manager.stats.Flushes), "flushes") b.ReportMetric(float64(b.N*16*1024)/1024/1024, "MB/s") b.ReportAllocs() diff --git a/flashring/internal/memtables/memtable.go b/flashring/internal/memtables/memtable.go index 848f9c92..3be40e4b 100644 --- a/flashring/internal/memtables/memtable.go +++ b/flashring/internal/memtables/memtable.go @@ -4,7 +4,6 @@ import ( "errors" "github.com/Meesho/BharatMLStack/flashring/internal/fs" - "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" ) var ( @@ -25,6 +24,7 @@ type Memtable struct { readyForFlush bool next *Memtable prev *Memtable + ShardIdx uint32 } type MemtableConfig struct { @@ -32,6 +32,7 @@ type MemtableConfig struct { id uint32 page *fs.AlignedPage file *fs.WrapAppendFile + shardIdx uint32 } func NewMemtable(config MemtableConfig) (*Memtable, error) { @@ -49,6 +50,7 @@ func NewMemtable(config MemtableConfig) (*Memtable, error) { } return &Memtable{ Id: config.id, + ShardIdx: config.shardIdx, capacity: config.capacity, currentOffset: 0, file: config.file, @@ -104,7 +106,6 @@ func (m *Memtable) Flush() (n int, fileOffset int64, err error) { if len(m.page.Buf)%chunkSize != 0 { numChunks++ } - metrics.Count(metrics.KEY_MEMTABLE_FLUSH_COUNT, int64(numChunks), []string{}) // PwriteBatch submits all chunks in one io_uring_enter when WriteRing is // set, otherwise falls back to sequential pwrite internally. diff --git a/flashring/internal/pools/leaky_pool.go b/flashring/internal/pools/leaky_pool.go index b2a59487..afcd1b2e 100644 --- a/flashring/internal/pools/leaky_pool.go +++ b/flashring/internal/pools/leaky_pool.go @@ -11,7 +11,6 @@ type LeakyPool struct { usage int idx int lock sync.RWMutex - stats *Stats } type Stats struct { @@ -34,7 +33,6 @@ func NewLeakyPool(config LeakyPoolConfig) *LeakyPool { usage: 0, idx: -1, preDrefHook: nil, - stats: &Stats{Usage: 0, Capacity: config.Capacity}, } } diff --git a/flashring/internal/shard/batch_reader.go b/flashring/internal/shard/batch_reader.go index 3896834b..c6d462be 100644 --- a/flashring/internal/shard/batch_reader.go +++ b/flashring/internal/shard/batch_reader.go @@ -68,7 +68,6 @@ func (br *BatchReader) processBatches() { return case firstReq := <-br.requests: batch := br.collectBatch(firstReq) - br.shardCache.Stats.BatchTracker.RecordBatchSize(len(batch)) br.executeBatch(batch) } } diff --git a/flashring/internal/shard/batch_reader_v2.go b/flashring/internal/shard/batch_reader_v2.go index 2aa99b09..fb614321 100644 --- a/flashring/internal/shard/batch_reader_v2.go +++ b/flashring/internal/shard/batch_reader_v2.go @@ -94,7 +94,6 @@ func (br *BatchReaderV2) processBatchesV2() { return case firstReq := <-br.Requests: batch := br.collectBatchV2(firstReq) - br.shardCache.Stats.BatchTracker.RecordBatchSize(len(batch)) br.executeBatchV2(batch) } } diff --git a/flashring/internal/shard/batch_tracker.go b/flashring/internal/shard/batch_tracker.go deleted file mode 100644 index 5658d0e2..00000000 --- a/flashring/internal/shard/batch_tracker.go +++ /dev/null @@ -1,55 +0,0 @@ -package filecache - -import ( - "sort" - "sync" -) - -type BatchTracker struct { - mu sync.RWMutex - getBatch []int - maxSamples int - getIndex int -} - -// const defaultMaxSamples = 100000 - -func NewBatchTracker() *BatchTracker { - return &BatchTracker{ - getBatch: make([]int, defaultMaxSamples), - maxSamples: defaultMaxSamples, - } -} - -func (bt *BatchTracker) RecordBatchSize(batchSize int) { - bt.mu.Lock() - defer bt.mu.Unlock() - bt.getBatch[bt.getIndex] = batchSize - bt.getIndex = (bt.getIndex + 1) % bt.maxSamples -} - -func (bt *BatchTracker) GetBatchSizePercentiles() (p25, p50, p99 int) { - bt.mu.RLock() - defer bt.mu.RUnlock() - - samples := bt.getIndex - if samples > int(bt.maxSamples) { - samples = int(bt.maxSamples) - } - - if samples == 0 { - return 0, 0, 0 - } - - batchSizesCopy := make([]int, samples) - copy(batchSizesCopy, bt.getBatch[:samples]) - sort.Slice(batchSizesCopy, func(i, j int) bool { - return batchSizesCopy[i] < batchSizesCopy[j] - }) - - p25 = batchSizesCopy[int(float64(samples)*0.25)] - p50 = batchSizesCopy[int(float64(samples)*0.50)] - p99 = batchSizesCopy[int(float64(samples)*0.99)] - - return p25, p50, p99 -} diff --git a/flashring/internal/shard/latency_tracker.go b/flashring/internal/shard/latency_tracker.go deleted file mode 100644 index eeb109c8..00000000 --- a/flashring/internal/shard/latency_tracker.go +++ /dev/null @@ -1,96 +0,0 @@ -package filecache - -import ( - "sort" - "sync" - "time" -) - -type LatencyTracker struct { - mu sync.RWMutex - getLatencies []time.Duration - putLatencies []time.Duration - maxSamples int - getIndex int - putIndex int - getCount int64 - putCount int64 -} - -const defaultMaxSamples = 100000 - -func NewLatencyTracker() *LatencyTracker { - return &LatencyTracker{ - getLatencies: make([]time.Duration, defaultMaxSamples), - putLatencies: make([]time.Duration, defaultMaxSamples), - maxSamples: defaultMaxSamples, - } -} - -func (lt *LatencyTracker) RecordGet(duration time.Duration) { - lt.mu.Lock() - defer lt.mu.Unlock() - lt.getLatencies[lt.getIndex] = duration - lt.getIndex = (lt.getIndex + 1) % lt.maxSamples - lt.getCount++ -} - -func (lt *LatencyTracker) RecordPut(duration time.Duration) { - lt.mu.Lock() - defer lt.mu.Unlock() - lt.putLatencies[lt.putIndex] = duration - lt.putIndex = (lt.putIndex + 1) % lt.maxSamples - lt.putCount++ -} - -func (lt *LatencyTracker) GetLatencyPercentiles() (p25, p50, p99 time.Duration) { - lt.mu.RLock() - defer lt.mu.RUnlock() - - samples := lt.getCount - if samples > int64(lt.maxSamples) { - samples = int64(lt.maxSamples) - } - - if samples == 0 { - return 0, 0, 0 - } - - latenciesCopy := make([]time.Duration, samples) - copy(latenciesCopy, lt.getLatencies[:samples]) - sort.Slice(latenciesCopy, func(i, j int) bool { - return latenciesCopy[i] < latenciesCopy[j] - }) - - p25 = latenciesCopy[int(float64(samples)*0.25)] - p50 = latenciesCopy[int(float64(samples)*0.50)] - p99 = latenciesCopy[int(float64(samples)*0.99)] - - return p25, p50, p99 -} - -func (lt *LatencyTracker) PutLatencyPercentiles() (p25, p50, p99 time.Duration) { - lt.mu.RLock() - defer lt.mu.RUnlock() - - samples := lt.putCount - if samples > int64(lt.maxSamples) { - samples = int64(lt.maxSamples) - } - - if samples == 0 { - return 0, 0, 0 - } - - latenciesCopy := make([]time.Duration, samples) - copy(latenciesCopy, lt.putLatencies[:samples]) - sort.Slice(latenciesCopy, func(i, j int) bool { - return latenciesCopy[i] < latenciesCopy[j] - }) - - p25 = latenciesCopy[int(float64(samples)*0.25)] - p50 = latenciesCopy[int(float64(samples)*0.50)] - p99 = latenciesCopy[int(float64(samples)*0.99)] - - return p25, p50, p99 -} diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 40d492e4..6f0fef97 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -4,7 +4,6 @@ import ( "fmt" "hash/crc32" "sync" - "sync/atomic" "time" "github.com/Meesho/BharatMLStack/flashring/internal/allocators" @@ -12,6 +11,7 @@ import ( indices "github.com/Meesho/BharatMLStack/flashring/internal/indicesV3" "github.com/Meesho/BharatMLStack/flashring/internal/maths" "github.com/Meesho/BharatMLStack/flashring/internal/memtables" + "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/rs/zerolog/log" ) @@ -25,7 +25,6 @@ type ShardCache struct { dm *indices.DeleteManager predictor *maths.Predictor startAt int64 - Stats *Stats //batching reads BatchReader *BatchReaderV2 @@ -33,42 +32,8 @@ type ShardCache struct { //Lockless read and write ReadCh chan *ReadRequestV2 WriteCh chan *WriteRequestV2 -} - -type Stats struct { - KeyNotFoundCount atomic.Int64 - KeyExpiredCount atomic.Int64 - BadDataCount atomic.Int64 - BadLengthCount atomic.Int64 - BadCR32Count atomic.Int64 - BadKeyCount atomic.Int64 - MemIdCount sync.Map // key: uint32, value: *atomic.Int64 - LastDeletedMemId atomic.Uint32 - DeletedKeyCount atomic.Int64 - BadCRCMemIds sync.Map // key: uint32, value: *atomic.Int64 - BadKeyMemIds sync.Map // key: uint32, value: *atomic.Int64 - BatchTracker *BatchTracker -} -// Helper method to increment a counter in a sync.Map -func (s *Stats) incMapCounter(m *sync.Map, key uint32) { - val, _ := m.LoadOrStore(key, &atomic.Int64{}) - val.(*atomic.Int64).Add(1) -} - -// IncMemIdCount atomically increments the counter for the given memId -func (s *Stats) IncMemIdCount(memId uint32) { - s.incMapCounter(&s.MemIdCount, memId) -} - -// IncBadCRCMemIds atomically increments the bad CRC counter for the given memId -func (s *Stats) IncBadCRCMemIds(memId uint32) { - s.incMapCounter(&s.BadCRCMemIds, memId) -} - -// IncBadKeyMemIds atomically increments the bad key counter for the given memId -func (s *Stats) IncBadKeyMemIds(memId uint32) { - s.incMapCounter(&s.BadKeyMemIds, memId) + ShardIdx uint32 } type ShardCacheConfig struct { @@ -144,10 +109,6 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { dm: dm, predictor: config.Predictor, startAt: time.Now().Unix(), - Stats: &Stats{ - // sync.Map fields have zero values that are ready to use - BatchTracker: NewBatchTracker(), - }, } if config.BatchIoUringReader != nil { @@ -216,19 +177,18 @@ func (fc *ShardCache) Put(key string, value []byte, ttlMinutes uint16) error { indices.ByteOrder.PutUint32(buf[0:4], crc) fc.keyIndex.Put(key, length, ttlMinutes, mtId, uint32(offset)) fc.dm.IncMemtableKeyCount(mtId) - fc.Stats.IncMemIdCount(mtId) return nil } func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount.Add(1) + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, false, false } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount.Add(1) + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, true, false } @@ -248,7 +208,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDiskAsync(int64(fileOffset), length, buf) if n != int(length) { - fc.Stats.BadLengthCount.Add(1) + metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, append(metrics.GetShardTag(fc.ShardIdx))) return false, nil, 0, false, shouldReWrite } } else { @@ -261,13 +221,11 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { computedCR32 := crc32.ChecksumIEEE(buf[4:length]) gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count.Add(1) - fc.Stats.IncBadCRCMemIds(memId) + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) return false, nil, 0, false, shouldReWrite } if gotKey != key { - fc.Stats.BadKeyCount.Add(1) - fc.Stats.IncBadKeyMemIds(memId) + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) return false, nil, 0, false, shouldReWrite } valLen := int(length) - 4 - len(key) @@ -280,12 +238,12 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount.Add(1) + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, false, false // needsSlowPath = false (not found) } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount.Add(1) + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, true, false // needsSlowPath = false (expired) } @@ -306,8 +264,7 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count.Add(1) - fc.Stats.IncBadCRCMemIds(memId) + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) _, currMemId, _ := fc.mm.GetMemtable() shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId) _ = shouldReWrite // Not returning shouldReWrite in fast path for simplicity @@ -316,8 +273,7 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - fc.Stats.BadKeyCount.Add(1) - fc.Stats.IncBadKeyMemIds(memId) + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) return false, nil, 0, false, false } @@ -330,12 +286,12 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - fc.Stats.KeyNotFoundCount.Add(1) + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, false, false } if status == indices.StatusExpired { - fc.Stats.KeyExpiredCount.Add(1) + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, true, false } @@ -358,7 +314,7 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { - fc.Stats.BadLengthCount.Add(1) + metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, false, shouldReWrite } @@ -370,15 +326,13 @@ func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uin gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:length]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count.Add(1) - fc.Stats.IncBadCRCMemIds(memId) + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) return false, nil, 0, false, shouldReWrite } gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - fc.Stats.BadKeyCount.Add(1) - fc.Stats.IncBadKeyMemIds(memId) + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) return false, nil, 0, false, shouldReWrite } @@ -460,11 +414,11 @@ func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadR gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - fc.Stats.BadCR32Count.Add(1) + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, metrics.GetShardTag(fc.ShardIdx)) return ReadResult{Found: false, Error: fmt.Errorf("crc mismatch")} } if gotKey != key { - fc.Stats.BadKeyCount.Add(1) + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, metrics.GetShardTag(fc.ShardIdx)) return ReadResult{Found: false, Error: fmt.Errorf("key mismatch")} } @@ -477,7 +431,3 @@ func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadR Data: value, } } - -func (fc *ShardCache) GetFileStat() *fs.Stat { - return fc.file.Stat -} diff --git a/flashring/pkg/cache/badger.go b/flashring/pkg/cache/badger.go index 7ff8c691..859c4bac 100644 --- a/flashring/pkg/cache/badger.go +++ b/flashring/pkg/cache/badger.go @@ -1,17 +1,13 @@ package internal import ( - "sync/atomic" "time" - filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard" badger "github.com/dgraph-io/badger/v4" - "github.com/rs/zerolog/log" ) type Badger struct { cache *badger.DB - stats *CacheStats } func NewBadger(config WrapCacheConfig, logStats bool) (*Badger, error) { @@ -42,47 +38,6 @@ func NewBadger(config WrapCacheConfig, logStats bool) (*Badger, error) { } bc := &Badger{ cache: cache, - stats: &CacheStats{ - Hits: atomic.Uint64{}, - TotalGets: atomic.Uint64{}, - TotalPuts: atomic.Uint64{}, - ReWrites: atomic.Uint64{}, - Expired: atomic.Uint64{}, - ShardWiseActiveEntries: atomic.Uint64{}, - LatencyTracker: filecache.NewLatencyTracker(), - }, - } - - if logStats { - go func() { - sleepDuration := 10 * time.Second - var prevTotalGets, prevTotalPuts uint64 - for { - time.Sleep(sleepDuration) - - totalGets := bc.stats.TotalGets.Load() - totalPuts := bc.stats.TotalPuts.Load() - getsPerSec := float64(totalGets-prevTotalGets) / sleepDuration.Seconds() - putsPerSec := float64(totalPuts-prevTotalPuts) / sleepDuration.Seconds() - - log.Info().Msgf("Shard %d HitRate: %v", 0, cache.BlockCacheMetrics().Hits()) - log.Info().Msgf("Shard %d Expired: %v", 0, cache.BlockCacheMetrics().Misses()) - log.Info().Msgf("Shard %d Total: %v", 0, cache.BlockCacheMetrics().KeysEvicted()) - log.Info().Msgf("Gets/sec: %v", getsPerSec) - log.Info().Msgf("Puts/sec: %v", putsPerSec) - - getP25, getP50, getP99 := bc.stats.LatencyTracker.GetLatencyPercentiles() - putP25, putP50, putP99 := bc.stats.LatencyTracker.PutLatencyPercentiles() - - log.Info().Msgf("Get Count: %v", totalGets) - log.Info().Msgf("Put Count: %v", totalPuts) - log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99) - log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99) - - prevTotalGets = totalGets - prevTotalPuts = totalPuts - } - }() } return bc, nil @@ -90,12 +45,6 @@ func NewBadger(config WrapCacheConfig, logStats bool) (*Badger, error) { func (b *Badger) Put(key string, value []byte, exptimeInMinutes uint16) error { - start := time.Now() - defer func() { - b.stats.LatencyTracker.RecordPut(time.Since(start)) - }() - - b.stats.TotalPuts.Add(1) err := b.cache.Update(func(txn *badger.Txn) error { entry := badger.NewEntry([]byte(key), value).WithTTL(time.Duration(exptimeInMinutes) * time.Minute) err := txn.SetEntry(entry) @@ -106,13 +55,6 @@ func (b *Badger) Put(key string, value []byte, exptimeInMinutes uint16) error { func (b *Badger) Get(key string) ([]byte, bool, bool) { - start := time.Now() - defer func() { - b.stats.LatencyTracker.RecordGet(time.Since(start)) - }() - - b.stats.TotalGets.Add(1) - val := make([]byte, 0) err := b.cache.View(func(txn *badger.Txn) error { item, err := txn.Get([]byte(key)) @@ -121,10 +63,6 @@ func (b *Badger) Get(key string) ([]byte, bool, bool) { } val, err = item.ValueCopy(val) - if err != nil { - b.stats.Hits.Add(1) - } - return err }) return val, err != badger.ErrKeyNotFound, false diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 061ef697..86bf1b10 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -6,7 +6,6 @@ import ( "path/filepath" "strconv" "sync" - "sync/atomic" "time" "github.com/Meesho/BharatMLStack/flashring/internal/fs" @@ -42,27 +41,10 @@ var ( ) type WrapCache struct { - shards []*filecache.ShardCache - shardLocks []sync.RWMutex - predictor *maths.Predictor - stats []*CacheStats - metricsCollector *metrics.MetricsCollector - batchReader *fs.BatchIoUringReader // global batched io_uring reader -} - -type CacheStats struct { - Hits atomic.Uint64 - TotalGets atomic.Uint64 - TotalPuts atomic.Uint64 - ReWrites atomic.Uint64 - Expired atomic.Uint64 - ShardWiseActiveEntries atomic.Uint64 - LatencyTracker *filecache.LatencyTracker - BatchTracker *filecache.BatchTracker - - PrevHits atomic.Uint64 - PrevTotalGets atomic.Uint64 - timeStarted time.Time + shards []*filecache.ShardCache + shardLocks []sync.RWMutex + predictor *maths.Predictor + batchReader *fs.BatchIoUringReader // global batched io_uring reader } type WrapCacheConfig struct { @@ -82,14 +64,11 @@ type WrapCacheConfig struct { //lockless mode for PutLL/GetLL EnableLockless bool - // Optional metrics recorder - MetricsRecorder metrics.MetricsRecorder - //Badger MountPoint string } -func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *metrics.MetricsCollector) (*WrapCache, error) { +func NewWrapCache(config WrapCacheConfig, mountPoint string) (*WrapCache, error) { if config.NumShards <= 0 { return nil, ErrNumShardLessThan1 } @@ -226,72 +205,13 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string, metricsCollector *m }, &shardLocks[i]) } - stats := make([]*CacheStats, config.NumShards) - for i := 0; i < config.NumShards; i++ { - stats[i] = &CacheStats{LatencyTracker: filecache.NewLatencyTracker(), BatchTracker: filecache.NewBatchTracker()} - } wc := &WrapCache{ - shards: shards, - shardLocks: shardLocks, - predictor: predictor, - stats: stats, - metricsCollector: metricsCollector, - batchReader: batchReader, + shards: shards, + shardLocks: shardLocks, + predictor: predictor, + batchReader: batchReader, } - if metricsCollector.Config.StatsEnabled { - - go func() { - sleepDuration := 10 * time.Second - - for { - time.Sleep(sleepDuration) - - for i := 0; i < config.NumShards; i++ { - - getP25, getP50, getP99 := wc.stats[i].LatencyTracker.GetLatencyPercentiles() - putP25, putP50, putP99 := wc.stats[i].LatencyTracker.PutLatencyPercentiles() - - shardGets := wc.stats[i].TotalGets.Load() - shardPuts := wc.stats[i].TotalPuts.Load() - shardHits := wc.stats[i].Hits.Load() - shardExpired := wc.stats[i].Expired.Load() - shardReWrites := wc.stats[i].ReWrites.Load() - shardActiveEntries := wc.stats[i].ShardWiseActiveEntries.Load() - - wc.metricsCollector.RecordRP25(i, getP25) - wc.metricsCollector.RecordRP50(i, getP50) - wc.metricsCollector.RecordRP99(i, getP99) - wc.metricsCollector.RecordWP25(i, putP25) - wc.metricsCollector.RecordWP50(i, putP50) - wc.metricsCollector.RecordWP99(i, putP99) - - wc.metricsCollector.RecordActiveEntries(i, int64(shardActiveEntries)) - wc.metricsCollector.RecordExpiredEntries(i, int64(shardExpired)) - wc.metricsCollector.RecordRewrites(i, int64(shardReWrites)) - wc.metricsCollector.RecordGets(i, int64(shardGets)) - wc.metricsCollector.RecordPuts(i, int64(shardPuts)) - wc.metricsCollector.RecordHits(i, int64(shardHits)) - - //shard level index and rb data - actually send to metrics collector! - wc.metricsCollector.RecordKeyNotFoundCount(i, wc.shards[i].Stats.KeyNotFoundCount.Load()) - wc.metricsCollector.RecordKeyExpiredCount(i, wc.shards[i].Stats.KeyExpiredCount.Load()) - wc.metricsCollector.RecordBadDataCount(i, wc.shards[i].Stats.BadDataCount.Load()) - wc.metricsCollector.RecordBadLengthCount(i, wc.shards[i].Stats.BadLengthCount.Load()) - wc.metricsCollector.RecordBadCR32Count(i, wc.shards[i].Stats.BadCR32Count.Load()) - wc.metricsCollector.RecordBadKeyCount(i, wc.shards[i].Stats.BadKeyCount.Load()) - wc.metricsCollector.RecordDeletedKeyCount(i, wc.shards[i].Stats.DeletedKeyCount.Load()) - - //wrapAppendFilt stats - wc.metricsCollector.RecordWriteCount(i, wc.shards[i].GetFileStat().WriteCount) - wc.metricsCollector.RecordPunchHoleCount(i, wc.shards[i].GetFileStat().PunchHoleCount) - - } - - log.Error().Msgf("GridSearchActive: %v", wc.predictor.GridSearchEstimator.IsGridSearchActive()) - } - }() - } return wc, nil } @@ -311,14 +231,13 @@ func (wc *WrapCache) PutLL(key string, value []byte, exptimeInMinutes uint16) er } if h32%100 < 10 { - wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries())) + metrics.Incr(metrics.KEY_RINGBUFFER_ACTIVE_ENTRIES, metrics.GetShardTag(shardIdx)) } op := <-result filecache.ErrorPool.Put(result) - wc.stats[shardIdx].TotalPuts.Add(1) - wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start)) - metrics.Timing(metrics.KEY_WRITE_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) + metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) return op } @@ -355,14 +274,13 @@ func (wc *WrapCache) GetLL(key string) ([]byte, bool, bool) { filecache.ReadRequestPool.Put(req) if op.Found && !op.Expired { - wc.stats[shardIdx].Hits.Add(1) + metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) } if op.Expired { - wc.stats[shardIdx].Expired.Add(1) + metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) } - wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) - metrics.Timing(metrics.KEY_READ_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) - wc.stats[shardIdx].TotalGets.Add(1) + metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) return op.Data, op.Found, op.Expired } @@ -374,8 +292,7 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro start := time.Now() defer func() { - wc.stats[shardIdx].LatencyTracker.RecordPut(time.Since(start)) - metrics.Timing(metrics.KEY_WRITE_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) }() start = time.Now() @@ -388,9 +305,9 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro log.Error().Err(err).Msgf("Put failed for key: %s", key) return fmt.Errorf("put failed for key: %s", key) } - wc.stats[shardIdx].TotalPuts.Add(1) + metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) if h32%100 < 10 { - wc.stats[shardIdx].ShardWiseActiveEntries.Store(uint64(wc.shards[shardIdx].GetRingBufferActiveEntries())) + metrics.Incr(metrics.KEY_RINGBUFFER_ACTIVE_ENTRIES, metrics.GetShardTag(shardIdx)) } return nil @@ -402,8 +319,7 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { start := time.Now() defer func() { - wc.stats[shardIdx].LatencyTracker.RecordGet(time.Since(start)) - metrics.Timing(metrics.KEY_READ_LATENCY_STATSD, time.Since(start), metrics.BuildTag(metrics.NewTag(metrics.TAG_SHARD_IDX, strconv.Itoa(int(shardIdx))))) + metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) }() var keyFound bool @@ -442,26 +358,19 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { } if keyFound && !expired { - wc.stats[shardIdx].Hits.Add(1) + metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) } if expired { - wc.stats[shardIdx].Expired.Add(1) + metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) } - wc.stats[shardIdx].TotalGets.Add(1) + metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) if shouldReWrite { - wc.stats[shardIdx].ReWrites.Add(1) + metrics.Incr(metrics.KEY_REWRITES, metrics.GetShardTag(shardIdx)) wc.Put(key, valCopy, remainingTTL) } - if time.Since(wc.stats[shardIdx].timeStarted) > 10*time.Second { - //observing hit rate every call can be avoided because average remains the same - hitRate := float64(wc.stats[shardIdx].Hits.Load()-wc.stats[shardIdx].PrevHits.Load()) / float64(wc.stats[shardIdx].TotalGets.Load()-wc.stats[shardIdx].PrevTotalGets.Load()) - wc.predictor.Observe(hitRate) - - wc.stats[shardIdx].timeStarted = time.Now() - wc.stats[shardIdx].PrevHits.Store(wc.stats[shardIdx].Hits.Load()) - wc.stats[shardIdx].PrevTotalGets.Store(wc.stats[shardIdx].TotalGets.Load()) - } + //todo: track hit rate here using + // wc.predictor.Observe(hitRate) return val, keyFound, expired } diff --git a/flashring/pkg/cache/freecache.go b/flashring/pkg/cache/freecache.go index df0f0f75..f16191c6 100644 --- a/flashring/pkg/cache/freecache.go +++ b/flashring/pkg/cache/freecache.go @@ -2,17 +2,12 @@ package internal import ( "runtime/debug" - "sync/atomic" - "time" - filecache "github.com/Meesho/BharatMLStack/flashring/internal/shard" "github.com/coocood/freecache" - "github.com/rs/zerolog/log" ) type Freecache struct { cache *freecache.Cache - stats *CacheStats } func NewFreecache(config WrapCacheConfig, logStats bool) (*Freecache, error) { @@ -22,47 +17,6 @@ func NewFreecache(config WrapCacheConfig, logStats bool) (*Freecache, error) { fc := &Freecache{ cache: cache, - stats: &CacheStats{ - Hits: atomic.Uint64{}, - TotalGets: atomic.Uint64{}, - TotalPuts: atomic.Uint64{}, - ReWrites: atomic.Uint64{}, - Expired: atomic.Uint64{}, - ShardWiseActiveEntries: atomic.Uint64{}, - LatencyTracker: filecache.NewLatencyTracker(), - }, - } - - if logStats { - go func() { - sleepDuration := 10 * time.Second - var prevTotalGets, prevTotalPuts uint64 - for { - time.Sleep(sleepDuration) - - totalGets := fc.stats.TotalGets.Load() - totalPuts := fc.stats.TotalPuts.Load() - getsPerSec := float64(totalGets-prevTotalGets) / sleepDuration.Seconds() - putsPerSec := float64(totalPuts-prevTotalPuts) / sleepDuration.Seconds() - - log.Info().Msgf("Shard %d HitRate: %v", 0, cache.HitRate()) - log.Info().Msgf("Shard %d Expired: %v", 0, cache.ExpiredCount()) - log.Info().Msgf("Shard %d Total: %v", 0, cache.EntryCount()) - log.Info().Msgf("Gets/sec: %v", getsPerSec) - log.Info().Msgf("Puts/sec: %v", putsPerSec) - - getP25, getP50, getP99 := fc.stats.LatencyTracker.GetLatencyPercentiles() - putP25, putP50, putP99 := fc.stats.LatencyTracker.PutLatencyPercentiles() - - log.Info().Msgf("Get Count: %v", totalGets) - log.Info().Msgf("Put Count: %v", totalPuts) - log.Info().Msgf("Get Latencies - P25: %v, P50: %v, P99: %v", getP25, getP50, getP99) - log.Info().Msgf("Put Latencies - P25: %v, P50: %v, P99: %v", putP25, putP50, putP99) - - prevTotalGets = totalGets - prevTotalPuts = totalPuts - } - }() } return fc, nil @@ -70,27 +24,17 @@ func NewFreecache(config WrapCacheConfig, logStats bool) (*Freecache, error) { } func (c *Freecache) Put(key string, value []byte, exptimeInMinutes uint16) error { - start := time.Now() - defer func() { - c.stats.LatencyTracker.RecordPut(time.Since(start)) - }() - c.stats.TotalPuts.Add(1) c.cache.Set([]byte(key), value, int(exptimeInMinutes)*60) return nil } func (c *Freecache) Get(key string) ([]byte, bool, bool) { - start := time.Now() - defer func() { - c.stats.LatencyTracker.RecordGet(time.Since(start)) - }() - c.stats.TotalGets.Add(1) val, err := c.cache.Get([]byte(key)) if err != nil { return nil, false, false } - c.stats.Hits.Add(1) + return val, true, false } diff --git a/flashring/pkg/metrics/console_logger.go b/flashring/pkg/metrics/console_logger.go deleted file mode 100644 index c3a90311..00000000 --- a/flashring/pkg/metrics/console_logger.go +++ /dev/null @@ -1,143 +0,0 @@ -package metrics - -import ( - "time" - - "github.com/rs/zerolog/log" -) - -func RunConsoleLogger(metricsCollector *MetricsCollector) { - - // start a ticker to log the metrics every 30 seconds - - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - shards := metricsCollector.Config.Metadata["shards"].(int) - - prevGetsTotal := uint64(0) - prevPutsTotal := uint64(0) - prevHitsTotal := uint64(0) - prevExpiredTotal := uint64(0) - prevReWritesTotal := uint64(0) - prevActiveEntriesTotal := uint64(0) - - for { - select { - case <-metricsCollector.stopCh: - return - case <-ticker.C: - currentMetrics = metricsCollector.GetMetrics() - - getsTotal := uint64(0) - putsTotal := uint64(0) - hitsTotal := uint64(0) - expiredTotal := uint64(0) - reWritesTotal := uint64(0) - activeEntriesTotal := uint64(0) - - rp99 := time.Duration(0) - rp50 := time.Duration(0) - rp25 := time.Duration(0) - wp99 := time.Duration(0) - wp50 := time.Duration(0) - wp25 := time.Duration(0) - - for _, shard := range currentMetrics.ShardMetrics { - getsTotal += uint64(shard.Gets) - putsTotal += uint64(shard.Puts) - hitsTotal += uint64(shard.Hits) - expiredTotal += uint64(shard.ExpiredEntries) - reWritesTotal += uint64(shard.Rewrites) - activeEntriesTotal += uint64(shard.ActiveEntries) - - rp99 += shard.RP99 - rp50 += shard.RP50 - rp25 += shard.RP25 - wp99 += shard.WP99 - wp50 += shard.WP50 - wp25 += shard.WP25 - } - - rp99 = rp99 / time.Duration(shards) - rp50 = rp50 / time.Duration(shards) - rp25 = rp25 / time.Duration(shards) - wp99 = wp99 / time.Duration(shards) - wp50 = wp50 / time.Duration(shards) - wp25 = wp25 / time.Duration(shards) - - rThroughput := int(float64(getsTotal-prevGetsTotal) / float64(30)) - wThroughput := int(float64(putsTotal-prevPutsTotal) / float64(30)) - hitRate := float64(hitsTotal-prevHitsTotal) / float64(getsTotal-prevGetsTotal) - activeEntries := float64(activeEntriesTotal-prevActiveEntriesTotal) / float64(30) - expiredEntries := float64(expiredTotal - prevExpiredTotal) - reWrites := float64(reWritesTotal - prevReWritesTotal) - - log.Info().Msgf("RP99: %v", rp99) - log.Info().Msgf("RP50: %v", rp50) - log.Info().Msgf("RP25: %v", rp25) - log.Info().Msgf("WP99: %v", wp99) - log.Info().Msgf("WP50: %v", wp50) - log.Info().Msgf("WP25: %v", wp25) - log.Info().Msgf("RThroughput: %v/s", rThroughput) - log.Info().Msgf("WThroughput: %v/s", wThroughput) - log.Info().Msgf("HitRate: %v", hitRate) - log.Info().Msgf("ActiveEntries: %v", activeEntries) - log.Info().Msgf("ExpiredEntries: %v", expiredEntries) - log.Info().Msgf("ReWrites: %v", reWrites) - - keyNotFoundTotal := int64(0) - keyExpiredTotal := int64(0) - badDataTotal := int64(0) - badLengthTotal := int64(0) - badCR32Total := int64(0) - badKeyTotal := int64(0) - deletedKeyTotal := int64(0) - writeTotal := int64(0) - punchHoleTotal := int64(0) - - for _, shard := range currentMetrics.ShardIndexMetrics { - keyNotFoundTotal += shard.KeyNotFoundCount - keyExpiredTotal += shard.KeyExpiredCount - badDataTotal += shard.BadDataCount - badLengthTotal += shard.BadLengthCount - badCR32Total += shard.BadCR32Count - badKeyTotal += shard.BadKeyCount - deletedKeyTotal += shard.DeletedKeyCount - writeTotal += shard.WriteCount - punchHoleTotal += shard.PunchHoleCount - } - - log.Info().Msgf("KeyNotFoundTotal: %v", keyNotFoundTotal) - log.Info().Msgf("KeyExpiredTotal: %v", keyExpiredTotal) - log.Info().Msgf("BadDataTotal: %v", badDataTotal) - log.Info().Msgf("BadLengthTotal: %v", badLengthTotal) - log.Info().Msgf("BadCR32Total: %v", badCR32Total) - log.Info().Msgf("BadKeyTotal: %v", badKeyTotal) - log.Info().Msgf("DeletedKeyTotal: %v", deletedKeyTotal) - log.Info().Msgf("WriteTotal: %v", writeTotal) - log.Info().Msgf("PunchHoleTotal: %v", punchHoleTotal) - - // Debug: Log cumulative totals to understand the issue - log.Info().Msgf("DEBUG - GetsTotal: %v, HitsTotal: %v, PutsTotal: %v, ActiveEntriesTotal: %v", getsTotal, hitsTotal, putsTotal, activeEntriesTotal) - - // Debug: Log per-shard ActiveEntries to check distribution (first 5 shards) - if len(currentMetrics.ShardMetrics) >= 5 { - log.Info().Msgf("DEBUG PER-SHARD ActiveEntries - shard0: %d, shard1: %d, shard2: %d, shard3: %d, shard4: %d", - currentMetrics.ShardMetrics[0].ActiveEntries, - currentMetrics.ShardMetrics[1].ActiveEntries, - currentMetrics.ShardMetrics[2].ActiveEntries, - currentMetrics.ShardMetrics[3].ActiveEntries, - currentMetrics.ShardMetrics[4].ActiveEntries) - } - - // Update prev values for next iteration - prevGetsTotal = getsTotal - prevPutsTotal = putsTotal - prevHitsTotal = hitsTotal - prevExpiredTotal = expiredTotal - prevReWritesTotal = reWritesTotal - prevActiveEntriesTotal = activeEntriesTotal - } - } -} diff --git a/flashring/pkg/metrics/csv_logger.go b/flashring/pkg/metrics/csv_logger.go deleted file mode 100644 index 95c54fea..00000000 --- a/flashring/pkg/metrics/csv_logger.go +++ /dev/null @@ -1,293 +0,0 @@ -package metrics - -import ( - "bufio" - "encoding/csv" - "fmt" - "log" - "os" - "os/signal" - "runtime" - "strconv" - "strings" - "syscall" - "time" -) - -// --- CSV Configuration --- -const CSVFileName = "performance_results.csv" - -type CsvLogger struct { - prevGetsTotal uint64 - prevPutsTotal uint64 - prevHitsTotal uint64 - prevExpiredTotal uint64 - prevReWritesTotal uint64 - prevActiveEntriesTotal uint64 - - samplesRthroguhput []float64 - samplesWthroguhput []float64 - samplesHitRate []float64 - samplesActiveEntries []float64 - samplesExpiredEntries []float64 - samplesReWrites []float64 - samplesRP99 []time.Duration - samplesRP50 []time.Duration - samplesRP25 []time.Duration - samplesWP99 []time.Duration - samplesWP50 []time.Duration - samplesWP25 []time.Duration - - totalSamples int - - metricsCollector *MetricsCollector -} - -func (c *CsvLogger) collectMetrics() *time.Ticker { - - //tickered every 30 seconds - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - for range ticker.C { - shards := metricsCollector.Config.Metadata["shards"].(int) - currentMetrics = metricsCollector.GetMetrics() - - getsTotal := uint64(0) - putsTotal := uint64(0) - hitsTotal := uint64(0) - expiredTotal := uint64(0) - reWritesTotal := uint64(0) - activeEntriesTotal := uint64(0) - - rp99 := time.Duration(0) - rp50 := time.Duration(0) - rp25 := time.Duration(0) - wp99 := time.Duration(0) - wp50 := time.Duration(0) - wp25 := time.Duration(0) - - for _, shard := range currentMetrics.ShardMetrics { - getsTotal += uint64(shard.Gets) - putsTotal += uint64(shard.Puts) - hitsTotal += uint64(shard.Hits) - expiredTotal += uint64(shard.ExpiredEntries) - reWritesTotal += uint64(shard.Rewrites) - activeEntriesTotal += uint64(shard.ActiveEntries) - - rp99 += shard.RP99 - rp50 += shard.RP50 - rp25 += shard.RP25 - wp99 += shard.WP99 - wp50 += shard.WP50 - wp25 += shard.WP25 - } - - rThroughput := float64(getsTotal-c.prevGetsTotal) / float64(30) - wThroughput := float64(putsTotal-c.prevPutsTotal) / float64(30) - hitRate := float64(hitsTotal-c.prevHitsTotal) / float64(getsTotal-c.prevGetsTotal) - activeEntries := float64(activeEntriesTotal - c.prevActiveEntriesTotal) - expiredEntries := float64(expiredTotal - c.prevExpiredTotal) - reWrites := float64(reWritesTotal - c.prevReWritesTotal) - - rp99 = rp99 / time.Duration(shards) - rp50 = rp50 / time.Duration(shards) - rp25 = rp25 / time.Duration(shards) - wp99 = wp99 / time.Duration(shards) - wp50 = wp50 / time.Duration(shards) - wp25 = wp25 / time.Duration(shards) - - c.samplesRthroguhput = append(c.samplesRthroguhput, rThroughput) - c.samplesWthroguhput = append(c.samplesWthroguhput, wThroughput) - c.samplesHitRate = append(c.samplesHitRate, hitRate) - c.samplesActiveEntries = append(c.samplesActiveEntries, activeEntries) - c.samplesExpiredEntries = append(c.samplesExpiredEntries, expiredEntries) - c.samplesReWrites = append(c.samplesReWrites, reWrites) - c.samplesRP99 = append(c.samplesRP99, rp99) - c.samplesRP50 = append(c.samplesRP50, rp50) - c.samplesRP25 = append(c.samplesRP25, rp25) - c.samplesWP99 = append(c.samplesWP99, wp99) - c.samplesWP50 = append(c.samplesWP50, wp50) - c.samplesWP25 = append(c.samplesWP25, wp25) - - c.prevGetsTotal = getsTotal - c.prevPutsTotal = putsTotal - c.prevHitsTotal = hitsTotal - c.prevExpiredTotal = expiredTotal - c.prevReWritesTotal = reWritesTotal - c.prevActiveEntriesTotal = activeEntriesTotal - } - - return ticker - -} - -// RunCSVLoggerWaitForShutdown waits for shutdown signal and logs final metrics to CSV -func (c *CsvLogger) RunCSVLoggerWaitForShutdown() { - - ticker := c.collectMetrics() - // --- Set up Signal Handling --- - stopChan := make(chan os.Signal, 1) - signal.Notify(stopChan, syscall.SIGINT, syscall.SIGTERM) - - fmt.Println("Program running. Press Ctrl+C to stop and log results to CSV...") - - // --- Wait for Stop Signal --- - <-stopChan - fmt.Println("\nTermination signal received. Stopping work and logging results...") - - // Stop the metrics collector - if metricsCollector != nil { - ticker.Stop() - metricsCollector.Stop() - } - - // --- Log Data to CSV --- - if err := c.LogResultsToCSV(); err != nil { - log.Fatalf("FATAL: Failed to log results to CSV: %v", err) - } - - fmt.Printf("Successfully logged results to %s.\n", CSVFileName) - - // Exit the program since we're running in a goroutine - os.Exit(0) -} - -func (c *CsvLogger) LogResultsToCSV() error { - // 1. Check if the file exists to determine if we need a header row. - file, err := os.OpenFile(CSVFileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return fmt.Errorf("failed to open CSV file: %w", err) - } - defer file.Close() - - writer := csv.NewWriter(file) - defer writer.Flush() // Crucial to ensure data is written to the file before exiting. - - // The list of all your column headers (per-shard metrics) - header := []string{ - "SHARDS", "KEYS_PER_SHARD", "READ_WORKERS", "WRITE_WORKERS", "PLAN", - "R_THROUGHPUT", "R_P99", "R_P50", "R_P25", "W_THROUGHPUT", "W_P99", "W_P50", "W_P25", - "HIT_RATE", "CPU", "MEMORY", "TIME", - } - - // Determine if the file is new (or empty) and needs the header - fileInfo, _ := file.Stat() - if fileInfo.Size() == 0 { - if err := writer.Write(header); err != nil { - return fmt.Errorf("error writing CSV header: %w", err) - } - } - - metadata := c.metricsCollector.Config.Metadata - timestamp := time.Now().In(time.FixedZone("IST", 5*60*60+30*60)).Format("2006-01-02 15:04:05") - - dataRow := []string{ - // Input Parameters - strconv.Itoa(metadata["shards"].(int)), - strconv.Itoa(metadata["keys_per_shard"].(int)), - strconv.Itoa(metadata["read_workers"].(int)), - strconv.Itoa(metadata["write_workers"].(int)), - metadata["plan"].(string), - - // averaged observation parameters - //sum sample and divide by total samples - fmt.Sprintf("%v", averageFloat64(c.samplesRthroguhput)), - fmt.Sprintf("%v", averageDuration(c.samplesRP99)), - fmt.Sprintf("%v", averageDuration(c.samplesRP50)), - fmt.Sprintf("%v", averageDuration(c.samplesRP25)), - fmt.Sprintf("%v", averageFloat64(c.samplesWthroguhput)), - fmt.Sprintf("%v", averageDuration(c.samplesWP99)), - fmt.Sprintf("%v", averageDuration(c.samplesWP50)), - fmt.Sprintf("%v", averageDuration(c.samplesWP25)), - fmt.Sprintf("%v", averageFloat64(c.samplesHitRate)), - fmt.Sprintf("%v", getCPUUsagePercent()), - fmt.Sprintf("%v", getMemoryUsageMB()), - timestamp, - } - - if err := writer.Write(dataRow); err != nil { - return fmt.Errorf("error writing CSV data row: %w", err) - } - - return nil -} - -func averageFloat64(samples []float64) float64 { - sum := 0.0 - for _, sample := range samples { - sum += sample - } - return sum / float64(len(samples)) -} - -func averageDuration(samples []time.Duration) time.Duration { - sum := time.Duration(0) - for _, sample := range samples { - sum += sample - } - return sum / time.Duration(len(samples)) -} - -// getMemoryUsageMB returns the current memory usage of this process in MB -func getMemoryUsageMB() float64 { - var m runtime.MemStats - runtime.ReadMemStats(&m) - // Alloc is bytes of allocated heap objects - return float64(m.Alloc) / 1024 / 1024 -} - -// getCPUUsagePercent returns the CPU usage percentage for this process -// It measures CPU usage over a short interval -func getCPUUsagePercent() float64 { - // Read initial CPU stats - idle1, total1 := getCPUStats() - time.Sleep(100 * time.Millisecond) - // Read CPU stats again - idle2, total2 := getCPUStats() - - idleDelta := float64(idle2 - idle1) - totalDelta := float64(total2 - total1) - - if totalDelta == 0 { - return 0 - } - - cpuUsage := (1.0 - idleDelta/totalDelta) * 100.0 - return cpuUsage -} - -// getCPUStats reads /proc/stat and returns idle and total CPU time -func getCPUStats() (idle, total uint64) { - file, err := os.Open("/proc/stat") - if err != nil { - return 0, 0 - } - defer file.Close() - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "cpu ") { - fields := strings.Fields(line) - if len(fields) < 5 { - return 0, 0 - } - // fields: cpu user nice system idle iowait irq softirq steal guest guest_nice - var values []uint64 - for _, field := range fields[1:] { - val, err := strconv.ParseUint(field, 10, 64) - if err != nil { - continue - } - values = append(values, val) - total += val - } - if len(values) >= 4 { - idle = values[3] // idle is the 4th value - } - break - } - } - return idle, total -} diff --git a/flashring/pkg/metrics/runmetrics.go b/flashring/pkg/metrics/runmetrics.go deleted file mode 100644 index 1587ce12..00000000 --- a/flashring/pkg/metrics/runmetrics.go +++ /dev/null @@ -1,482 +0,0 @@ -package metrics - -import ( - "sync" - "time" -) - -// Global variable to hold runtime data -var currentMetrics RunMetrics -var metricsCollector *MetricsCollector - -// MetricsRecorder is an interface for recording metrics from the cache -// Implement this interface to receive per-shard metrics from the cache layer -type MetricsRecorder interface { - RecordGets(shardIdx int, value int64) - RecordPuts(shardIdx int, value int64) - RecordHits(shardIdx int, value int64) - RecordActiveEntries(shardIdx int, value int64) - RecordExpiredEntries(shardIdx int, value int64) - RecordRewrites(shardIdx int, value int64) - - // Per-shard observation metrics - RecordRP99(shardIdx int, value time.Duration) - RecordRP50(shardIdx int, value time.Duration) - RecordRP25(shardIdx int, value time.Duration) - RecordWP99(shardIdx int, value time.Duration) - RecordWP50(shardIdx int, value time.Duration) - RecordWP25(shardIdx int, value time.Duration) - - //shard level index and rb data - RecordKeyNotFoundCount(shardIdx int, value int64) - RecordKeyExpiredCount(shardIdx int, value int64) - RecordBadDataCount(shardIdx int, value int64) - RecordBadLengthCount(shardIdx int, value int64) - RecordBadCR32Count(shardIdx int, value int64) - RecordBadKeyCount(shardIdx int, value int64) - RecordDeletedKeyCount(shardIdx int, value int64) -} - -type MetricsCollectorConfig struct { - StatsEnabled bool //Stats enabled - global flag - - CsvLogging bool //Log to CSV enabled - ConsoleLogging bool //Log to console enabled - StatsdLogging bool //Log to Statsd enabled - - InstantMetrics bool //Metrics at every instant - AveragedMetrics bool //Metrics averaged over a period of time - - // Metadata for external systems to use - // must include shards, keys_per_shard, read_workers, write_workers, plan - Metadata map[string]any -} - -// ShardMetrics holds observation metrics for a single shard -type ShardMetrics struct { - Gets int64 - Puts int64 - Hits int64 - ActiveEntries int64 - ExpiredEntries int64 - Rewrites int64 - RP99 time.Duration - RP50 time.Duration - RP25 time.Duration - WP99 time.Duration - WP50 time.Duration - WP25 time.Duration -} - -type ShardIndexMetrics struct { - KeyNotFoundCount int64 - KeyExpiredCount int64 - BadDataCount int64 - BadLengthCount int64 - BadCR32Count int64 - BadKeyCount int64 - DeletedKeyCount int64 - - WriteCount int64 - PunchHoleCount int64 -} - -// Define your parameter structure -type RunMetrics struct { - // Per-shard observation parameters - ShardMetrics []ShardMetrics - ShardIndexMetrics []ShardIndexMetrics -} - -// ShardMetricValue represents a metric value for a specific shard -type ShardMetricValue struct { - ShardIdx int - value int64 -} - -// MetricChannels holds separate channels for each metric type (per-shard) -type MetricChannels struct { - Gets chan ShardMetricValue - Puts chan ShardMetricValue - Hits chan ShardMetricValue - ActiveEntries chan ShardMetricValue - ExpiredEntries chan ShardMetricValue - Rewrites chan ShardMetricValue - RP99 chan ShardMetricValue - RP50 chan ShardMetricValue - RP25 chan ShardMetricValue - WP99 chan ShardMetricValue - WP50 chan ShardMetricValue - WP25 chan ShardMetricValue - - KeyNotFoundCount chan ShardMetricValue - KeyExpiredCount chan ShardMetricValue - BadDataCount chan ShardMetricValue - BadLengthCount chan ShardMetricValue - BadCR32Count chan ShardMetricValue - BadKeyCount chan ShardMetricValue - DeletedKeyCount chan ShardMetricValue - BadCRCMemIds chan ShardMetricValue - - WriteCount chan ShardMetricValue - PunchHoleCount chan ShardMetricValue -} - -// MetricsCollector collects and averages all metrics (per-shard) -type MetricsCollector struct { - Config MetricsCollectorConfig - channels MetricChannels //channels for each metric type (per-shard) - instantMetrics map[int]map[string]int64 // shardIdx -> metricName -> value - stopCh chan struct{} //channel to stop the collector when running from console - wg sync.WaitGroup - mu sync.RWMutex -} - -// InitMetricsCollector creates and starts the metrics collector, returning it -// so it can be passed to other components (e.g., cache config) -func InitMetricsCollector(config MetricsCollectorConfig) *MetricsCollector { - Init() - metricsCollector = NewMetricsCollector(config, 100) - - shouldLog := config.StatsEnabled && (config.CsvLogging || config.ConsoleLogging || config.StatsdLogging) - - if shouldLog { - metricsCollector.Start() - } - - if config.CsvLogging { - csvLogger := CsvLogger{metricsCollector: metricsCollector} - go csvLogger.RunCSVLoggerWaitForShutdown() - } - - if config.StatsdLogging { - go RunStatsdLogger(metricsCollector) - } - - if config.ConsoleLogging { - go RunConsoleLogger(metricsCollector) - } - - return metricsCollector -} - -// NewMetricsCollector creates a new metrics collector with channels -func NewMetricsCollector(config MetricsCollectorConfig, bufferSize int) *MetricsCollector { - mc := &MetricsCollector{ - Config: config, - channels: MetricChannels{ - Gets: make(chan ShardMetricValue, bufferSize), - Puts: make(chan ShardMetricValue, bufferSize), - Hits: make(chan ShardMetricValue, bufferSize), - ActiveEntries: make(chan ShardMetricValue, bufferSize), - ExpiredEntries: make(chan ShardMetricValue, bufferSize), - Rewrites: make(chan ShardMetricValue, bufferSize), - RP99: make(chan ShardMetricValue, bufferSize), - RP50: make(chan ShardMetricValue, bufferSize), - RP25: make(chan ShardMetricValue, bufferSize), - WP99: make(chan ShardMetricValue, bufferSize), - WP50: make(chan ShardMetricValue, bufferSize), - WP25: make(chan ShardMetricValue, bufferSize), - - KeyNotFoundCount: make(chan ShardMetricValue, bufferSize), - KeyExpiredCount: make(chan ShardMetricValue, bufferSize), - BadDataCount: make(chan ShardMetricValue, bufferSize), - BadLengthCount: make(chan ShardMetricValue, bufferSize), - BadCR32Count: make(chan ShardMetricValue, bufferSize), - BadKeyCount: make(chan ShardMetricValue, bufferSize), - DeletedKeyCount: make(chan ShardMetricValue, bufferSize), - - WriteCount: make(chan ShardMetricValue, bufferSize), - PunchHoleCount: make(chan ShardMetricValue, bufferSize), - }, - - instantMetrics: make(map[int]map[string]int64), - stopCh: make(chan struct{}), - } - - // Initialize averagedMetrics with MetricAverager instances - metricNames := []string{"RP99", "RP50", "RP25", "WP99", "WP50", "WP25", "Gets", "Puts", "Hits", "ActiveEntries", "ExpiredEntries", "Rewrites"} - - // Initialize instantMetrics for each shard with MetricAverager instances - shards := config.Metadata["shards"].(int) - for shardIdx := 0; shardIdx < shards; shardIdx++ { - mc.instantMetrics[shardIdx] = make(map[string]int64) - for _, name := range metricNames { - mc.instantMetrics[shardIdx][name] = 0 - } - - mc.instantMetrics[shardIdx]["KeyNotFoundCount"] = 0 - mc.instantMetrics[shardIdx]["KeyExpiredCount"] = 0 - mc.instantMetrics[shardIdx]["BadDataCount"] = 0 - mc.instantMetrics[shardIdx]["BadLengthCount"] = 0 - mc.instantMetrics[shardIdx]["BadCR32Count"] = 0 - mc.instantMetrics[shardIdx]["BadKeyCount"] = 0 - mc.instantMetrics[shardIdx]["DeletedKeyCount"] = 0 - - mc.instantMetrics[shardIdx]["WriteCount"] = 0 - mc.instantMetrics[shardIdx]["PunchHoleCount"] = 0 - } - - return mc -} - -// Start begins collecting metrics from all channels -func (mc *MetricsCollector) Start() { - // Start a goroutine for each metric channel - mc.wg.Add(12) - - go mc.collectShardMetric(mc.channels.RP99, "RP99") - go mc.collectShardMetric(mc.channels.RP50, "RP50") - go mc.collectShardMetric(mc.channels.RP25, "RP25") - go mc.collectShardMetric(mc.channels.WP99, "WP99") - go mc.collectShardMetric(mc.channels.WP50, "WP50") - go mc.collectShardMetric(mc.channels.WP25, "WP25") - - go mc.collectShardMetric(mc.channels.ActiveEntries, "ActiveEntries") - go mc.collectShardMetric(mc.channels.ExpiredEntries, "ExpiredEntries") - go mc.collectShardMetric(mc.channels.Rewrites, "Rewrites") - go mc.collectShardMetric(mc.channels.Gets, "Gets") - go mc.collectShardMetric(mc.channels.Puts, "Puts") - go mc.collectShardMetric(mc.channels.Hits, "Hits") - - go mc.collectShardMetric(mc.channels.KeyNotFoundCount, "KeyNotFoundCount") - go mc.collectShardMetric(mc.channels.KeyExpiredCount, "KeyExpiredCount") - go mc.collectShardMetric(mc.channels.BadDataCount, "BadDataCount") - go mc.collectShardMetric(mc.channels.BadLengthCount, "BadLengthCount") - go mc.collectShardMetric(mc.channels.BadCR32Count, "BadCR32Count") - go mc.collectShardMetric(mc.channels.BadKeyCount, "BadKeyCount") - go mc.collectShardMetric(mc.channels.DeletedKeyCount, "DeletedKeyCount") - - go mc.collectShardMetric(mc.channels.WriteCount, "WriteCount") - go mc.collectShardMetric(mc.channels.PunchHoleCount, "PunchHoleCount") -} - -func (mc *MetricsCollector) collectShardMetric(ch chan ShardMetricValue, name string) { - defer mc.wg.Done() - for { - select { - case <-mc.stopCh: - return - case sv, ok := <-ch: - if !ok { - return - } - mc.mu.Lock() - mc.instantMetrics[sv.ShardIdx][name] = sv.value - mc.mu.Unlock() - } - } -} - -// RecordRP99 sends a value to the RP99 channel for a specific shard -func (mc *MetricsCollector) RecordRP99(shardIdx int, value time.Duration) { - select { - case mc.channels.RP99 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: // Don't block if channel is full - } -} - -// RecordRP50 sends a value to the RP50 channel for a specific shard -func (mc *MetricsCollector) RecordRP50(shardIdx int, value time.Duration) { - select { - case mc.channels.RP50 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: - } -} - -// RecordRP25 sends a value to the RP25 channel for a specific shard -func (mc *MetricsCollector) RecordRP25(shardIdx int, value time.Duration) { - select { - case mc.channels.RP25 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: - } -} - -// RecordWP99 sends a value to the WP99 channel for a specific shard -func (mc *MetricsCollector) RecordWP99(shardIdx int, value time.Duration) { - select { - case mc.channels.WP99 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: - } -} - -// RecordWP50 sends a value to the WP50 channel for a specific shard -func (mc *MetricsCollector) RecordWP50(shardIdx int, value time.Duration) { - select { - case mc.channels.WP50 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: - } -} - -// RecordWP25 sends a value to the WP25 channel for a specific shard -func (mc *MetricsCollector) RecordWP25(shardIdx int, value time.Duration) { - select { - case mc.channels.WP25 <- ShardMetricValue{ShardIdx: shardIdx, value: int64(value)}: - default: - } -} - -// RecordGets sends a value to the Gets channel for a specific shard -func (mc *MetricsCollector) RecordGets(shardIdx int, value int64) { - select { - case mc.channels.Gets <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -// RecordPuts sends a value to the Puts channel for a specific shard -func (mc *MetricsCollector) RecordPuts(shardIdx int, value int64) { - select { - case mc.channels.Puts <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -// RecordHits sends a value to the Hits channel for a specific shard -func (mc *MetricsCollector) RecordHits(shardIdx int, value int64) { - select { - case mc.channels.Hits <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -// RecordActiveEntries sends a value to the ActiveEntries channel for a specific shard -func (mc *MetricsCollector) RecordActiveEntries(shardIdx int, value int64) { - select { - case mc.channels.ActiveEntries <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -// RecordExpiredEntries sends a value to the ExpiredEntries channel for a specific shard -func (mc *MetricsCollector) RecordExpiredEntries(shardIdx int, value int64) { - select { - case mc.channels.ExpiredEntries <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -// RecordRewrites sends a value to the Rewrites channel for a specific shard -func (mc *MetricsCollector) RecordRewrites(shardIdx int, value int64) { - select { - case mc.channels.Rewrites <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordKeyNotFoundCount(shardIdx int, value int64) { - select { - case mc.channels.KeyNotFoundCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordKeyExpiredCount(shardIdx int, value int64) { - select { - case mc.channels.KeyExpiredCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordBadDataCount(shardIdx int, value int64) { - select { - case mc.channels.BadDataCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordBadLengthCount(shardIdx int, value int64) { - select { - case mc.channels.BadLengthCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordBadCR32Count(shardIdx int, value int64) { - select { - case mc.channels.BadCR32Count <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordBadKeyCount(shardIdx int, value int64) { - select { - case mc.channels.BadKeyCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordDeletedKeyCount(shardIdx int, value int64) { - select { - case mc.channels.DeletedKeyCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordWriteCount(shardIdx int, value int64) { - select { - case mc.channels.WriteCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) RecordPunchHoleCount(shardIdx int, value int64) { - - select { - case mc.channels.PunchHoleCount <- ShardMetricValue{ShardIdx: shardIdx, value: value}: - default: - } -} - -func (mc *MetricsCollector) GetMetrics() RunMetrics { - mc.mu.RLock() - defer mc.mu.RUnlock() - - shards := mc.Config.Metadata["shards"].(int) - - // Build per-shard metrics - shardMetrics := make([]ShardMetrics, shards) - shardIndexMetrics := make([]ShardIndexMetrics, shards) - for shardIdx := 0; shardIdx < shards; shardIdx++ { - if instants, exists := mc.instantMetrics[shardIdx]; exists { - shardMetrics[shardIdx] = ShardMetrics{ - RP99: time.Duration(instants["RP99"]), - RP50: time.Duration(instants["RP50"]), - RP25: time.Duration(instants["RP25"]), - WP99: time.Duration(instants["WP99"]), - WP50: time.Duration(instants["WP50"]), - WP25: time.Duration(instants["WP25"]), - Gets: instants["Gets"], - Puts: instants["Puts"], - Hits: instants["Hits"], - ActiveEntries: instants["ActiveEntries"], - ExpiredEntries: instants["ExpiredEntries"], - Rewrites: instants["Rewrites"], - } - - shardIndexMetrics[shardIdx] = ShardIndexMetrics{ - KeyNotFoundCount: instants["KeyNotFoundCount"], - KeyExpiredCount: instants["KeyExpiredCount"], - BadDataCount: instants["BadDataCount"], - BadLengthCount: instants["BadLengthCount"], - BadCR32Count: instants["BadCR32Count"], - BadKeyCount: instants["BadKeyCount"], - DeletedKeyCount: instants["DeletedKeyCount"], - - WriteCount: instants["WriteCount"], - PunchHoleCount: instants["PunchHoleCount"], - } - } - } - - return RunMetrics{ - ShardMetrics: shardMetrics, - ShardIndexMetrics: shardIndexMetrics, - } -} - -// Stop stops all collector goroutines -func (mc *MetricsCollector) Stop() { - close(mc.stopCh) - mc.wg.Wait() -} diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index 2bf0d1fe..c0636076 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -1,24 +1,19 @@ package metrics -import ( - "strconv" - "time" -) +import "strconv" const ( - KEY_READ_LATENCY = "flashringread_latency" - KEY_READ_LATENCY_STATSD = "flashringread_latency_statsd" - KEY_WRITE_LATENCY = "flashringwrite_latency" - KEY_WRITE_LATENCY_STATSD = "flashringwrite_latency_statsd" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" - KEY_ACTIVE_ENTRIES = "flashring_active_entries" - KEY_EXPIRED_ENTRIES = "flashring_expired_entries" - KEY_REWRITES = "flashring_rewrites" - KEY_GETS = "flashring_gets" - KEY_PUTS = "flashring_puts" - KEY_HITS = "flashring_hits" + KEY_GET_LATENCY = "flashring_get_latency" + KEY_PUT_LATENCY = "flashring_put_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" + KEY_EXPIRED_ENTRIES = "flashring_expired_entries" + KEY_REWRITES = "flashring_rewrites" + KEY_GETS = "flashring_gets" + KEY_PUTS = "flashring_puts" + KEY_HITS = "flashring_hits" KEY_KEY_NOT_FOUND_COUNT = "flashring_key_not_found_count" KEY_KEY_EXPIRED_COUNT = "flashring_key_expired_count" @@ -33,9 +28,11 @@ const ( TAG_VALUE_P50 = "p50" TAG_VALUE_P99 = "p99" TAG_SHARD_IDX = "shard_idx" + TAG_MEMTABLE_ID = "memtable_id" KEY_WRITE_COUNT = "flashring_write_count" KEY_PUNCH_HOLE_COUNT = "flashring_punch_hole_count" + KEY_PREAD_COUNT = "flashring_pread_count" KEY_TRIM_HEAD_LATENCY = "flashring_wrap_file_trim_head_latency" KEY_PREAD_LATENCY = "flashring_pread_latency" @@ -45,94 +42,16 @@ const ( LATENCY_RLOCK = "flashring_rlock_latency" LATENCY_WLOCK = "flashring_wlock_latency" -) - -func RunStatsdLogger(metricsCollector *MetricsCollector) { - - // start a ticker to log the metrics every 30 seconds - - ticker := time.NewTicker(30 * time.Second) - defer ticker.Stop() - - //prev values per shard - prevActiveEntries := make(map[int]int64) - prevExpiredEntries := make(map[int]int64) - prevRewrites := make(map[int]int64) - prevGets := make(map[int]int64) - prevPuts := make(map[int]int64) - prevHits := make(map[int]int64) - - prevKeyNotFoundCount := make(map[int]int64) - prevKeyExpiredCount := make(map[int]int64) - prevBadDataCount := make(map[int]int64) - prevBadLengthCount := make(map[int]int64) - prevBadCR32Count := make(map[int]int64) - prevBadKeyCount := make(map[int]int64) - prevDeletedKeyCount := make(map[int]int64) - - prevWriteCount := make(map[int]int64) - prevPunchHoleCount := make(map[int]int64) - - for { - select { - case <-metricsCollector.stopCh: - return - case <-ticker.C: - currentMetrics = metricsCollector.GetMetrics() - for idx, shard := range currentMetrics.ShardMetrics { + KEY_RINGBUFFER_ACTIVE_ENTRIES = "flashring_ringbuffer_active_entries" - shardIdx := strconv.Itoa(idx) - shardBuildTag := NewTag(TAG_SHARD_IDX, shardIdx) - - Count(KEY_ACTIVE_ENTRIES, shard.ActiveEntries-prevActiveEntries[idx], BuildTag(shardBuildTag)) - Count(KEY_EXPIRED_ENTRIES, shard.ExpiredEntries-prevExpiredEntries[idx], BuildTag(shardBuildTag)) - Count(KEY_REWRITES, shard.Rewrites-prevRewrites[idx], BuildTag(shardBuildTag)) - Count(KEY_GETS, shard.Gets-prevGets[idx], BuildTag(shardBuildTag)) - Count(KEY_PUTS, shard.Puts-prevPuts[idx], BuildTag(shardBuildTag)) - Count(KEY_HITS, shard.Hits-prevHits[idx], BuildTag(shardBuildTag)) - - Timing(KEY_READ_LATENCY, shard.RP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), shardBuildTag)) - Timing(KEY_READ_LATENCY, shard.RP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) - Timing(KEY_READ_LATENCY, shard.RP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), shardBuildTag)) - Timing(KEY_WRITE_LATENCY, shard.WP99, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P99), shardBuildTag)) - Timing(KEY_WRITE_LATENCY, shard.WP50, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P50), shardBuildTag)) - Timing(KEY_WRITE_LATENCY, shard.WP25, BuildTag(NewTag(TAG_LATENCY_PERCENTILE, TAG_VALUE_P25), shardBuildTag)) - - prevActiveEntries[idx] = shard.ActiveEntries - prevExpiredEntries[idx] = shard.ExpiredEntries - prevRewrites[idx] = shard.Rewrites - prevGets[idx] = shard.Gets - prevPuts[idx] = shard.Puts - prevHits[idx] = shard.Hits - - } - - for idx, shard := range currentMetrics.ShardIndexMetrics { - shardIdx := strconv.Itoa(idx) - shardBuildTag := NewTag(TAG_SHARD_IDX, shardIdx) - - Count(KEY_KEY_NOT_FOUND_COUNT, shard.KeyNotFoundCount-prevKeyNotFoundCount[idx], BuildTag(shardBuildTag)) - Count(KEY_KEY_EXPIRED_COUNT, shard.KeyExpiredCount-prevKeyExpiredCount[idx], BuildTag(shardBuildTag)) - Count(KEY_BAD_DATA_COUNT, shard.BadDataCount-prevBadDataCount[idx], BuildTag(shardBuildTag)) - Count(KEY_BAD_LENGTH_COUNT, shard.BadLengthCount-prevBadLengthCount[idx], BuildTag(shardBuildTag)) - Count(KEY_BAD_CR32_COUNT, shard.BadCR32Count-prevBadCR32Count[idx], BuildTag(shardBuildTag)) - Count(KEY_BAD_KEY_COUNT, shard.BadKeyCount-prevBadKeyCount[idx], BuildTag(shardBuildTag)) - Count(KEY_DELETED_KEY_COUNT, shard.DeletedKeyCount-prevDeletedKeyCount[idx], BuildTag(shardBuildTag)) - Count(KEY_WRITE_COUNT, shard.WriteCount-prevWriteCount[idx], BuildTag(shardBuildTag)) - Count(KEY_PUNCH_HOLE_COUNT, shard.PunchHoleCount-prevPunchHoleCount[idx], BuildTag(shardBuildTag)) + KEY_MEMTABLE_ENTRY_COUNT = "flashring_memtable_entry_count" +) - prevKeyNotFoundCount[idx] = shard.KeyNotFoundCount - prevKeyExpiredCount[idx] = shard.KeyExpiredCount - prevBadDataCount[idx] = shard.BadDataCount - prevBadLengthCount[idx] = shard.BadLengthCount - prevBadCR32Count[idx] = shard.BadCR32Count - prevBadKeyCount[idx] = shard.BadKeyCount - prevDeletedKeyCount[idx] = shard.DeletedKeyCount - prevWriteCount[idx] = shard.WriteCount - prevPunchHoleCount[idx] = shard.PunchHoleCount - } +func GetShardTag(shardIdx uint32) []string { + return BuildTag(NewTag(TAG_SHARD_IDX, strconv.Itoa(int(shardIdx)))) +} - } - } +func GetMemtableTag(memtableId uint32) []string { + return BuildTag(NewTag(TAG_MEMTABLE_ID, strconv.Itoa(int(memtableId)))) } From 7025143b84af46ee77f3d90de6674026ec99feeb Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Sun, 22 Feb 2026 04:45:53 +0000 Subject: [PATCH 45/53] add more metrics --- flashring/internal/shard/shard_cache.go | 4 ++++ flashring/pkg/metrics/statsd_logger.go | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 6f0fef97..2b31393b 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -187,6 +187,8 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { return false, nil, 0, false, false } + metrics.Timing(metrics.KEY_DATA_LENGTH, time.Duration(length), metrics.GetShardTag(fc.ShardIdx)) + if status == indices.StatusExpired { metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) return false, nil, 0, true, false @@ -203,6 +205,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { memtableExists = false } if !memtableExists { + metrics.Incr(metrics.KEY_MEMTABLE_MISS, metrics.GetShardTag(fc.ShardIdx)) // Allocate buffer of exact size needed - no pool since readFromDisk already copies once buf = make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) @@ -212,6 +215,7 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { return false, nil, 0, false, shouldReWrite } } else { + metrics.Incr(metrics.KEY_MEMTABLE_HIT, metrics.GetShardTag(fc.ShardIdx)) buf, exists = mt.GetBufForRead(int(offset), length) if !exists { panic("memtable exists but buf not found") diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index c0636076..fbddadce 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -46,6 +46,11 @@ const ( KEY_RINGBUFFER_ACTIVE_ENTRIES = "flashring_ringbuffer_active_entries" KEY_MEMTABLE_ENTRY_COUNT = "flashring_memtable_entry_count" + + KEY_MEMTABLE_HIT = "flashring_memtable_hit" + KEY_MEMTABLE_MISS = "flashring_memtable_miss" + + KEY_DATA_LENGTH = "flashring_data_length" ) func GetShardTag(shardIdx uint32) []string { From 213d00c11bb7208eed3a0cbef06f14c8b4175baf Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Sun, 22 Feb 2026 19:55:00 +0000 Subject: [PATCH 46/53] parallelize iourings --- flashring/internal/fs/batch_iouring.go | 43 +++++++++++++++++++++++++ flashring/internal/shard/shard_cache.go | 4 +-- flashring/pkg/cache/cache.go | 6 ++-- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index 5c03b83e..35eaa087 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -6,6 +6,7 @@ package fs import ( "fmt" "sync" + "sync/atomic" "syscall" "time" @@ -247,3 +248,45 @@ func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { b.ring.mu.Unlock() } + +// ParallelBatchIoUringReader distributes pread requests across N independent +// BatchIoUringReader instances (each with its own io_uring ring and goroutine) +// using round-robin. This removes the single-ring serialization bottleneck and +// lets NVMe service requests across multiple hardware queues in parallel. +type ParallelBatchIoUringReader struct { + readers []*BatchIoUringReader + next atomic.Uint64 +} + +// NewParallelBatchIoUringReader creates numRings independent batch readers. +// Each ring gets its own io_uring instance and background goroutine. +func NewParallelBatchIoUringReader(cfg BatchIoUringConfig, numRings int) (*ParallelBatchIoUringReader, error) { + if numRings <= 0 { + numRings = 1 + } + readers := make([]*BatchIoUringReader, numRings) + for i := 0; i < numRings; i++ { + r, err := NewBatchIoUringReader(cfg) + if err != nil { + for j := 0; j < i; j++ { + readers[j].Close() + } + return nil, fmt.Errorf("parallel batch reader ring %d: %w", i, err) + } + readers[i] = r + } + return &ParallelBatchIoUringReader{readers: readers}, nil +} + +// Submit routes the pread to the next ring via round-robin. Thread-safe. +func (p *ParallelBatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, error) { + idx := p.next.Add(1) % uint64(len(p.readers)) + return p.readers[idx].Submit(fd, buf, offset) +} + +// Close shuts down all underlying batch readers. +func (p *ParallelBatchIoUringReader) Close() { + for _, r := range p.readers { + r.Close() + } +} diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 2b31393b..2e24ffe4 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -19,7 +19,7 @@ type ShardCache struct { keyIndex *indices.Index file *fs.WrapAppendFile ioFile *fs.IOUringFile - batchReader *fs.BatchIoUringReader // global batched io_uring reader (shared across shards) + batchReader *fs.ParallelBatchIoUringReader // global batched io_uring reader (shared across shards) mm *memtables.MemtableManager readPageAllocator *allocators.SlabAlignedPageAllocator dm *indices.DeleteManager @@ -59,7 +59,7 @@ type ShardCacheConfig struct { // Global batched io_uring reader (shared across all shards). // When set, disk reads go through this instead of the per-shard IOUringFile. - BatchIoUringReader *fs.BatchIoUringReader + BatchIoUringReader *fs.ParallelBatchIoUringReader // Dedicated io_uring ring for batched writes (shared across all shards). WriteRing *fs.IoUring diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 86bf1b10..b8e4e63c 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -44,7 +44,7 @@ type WrapCache struct { shards []*filecache.ShardCache shardLocks []sync.RWMutex predictor *maths.Predictor - batchReader *fs.BatchIoUringReader // global batched io_uring reader + batchReader *fs.ParallelBatchIoUringReader // global batched io_uring reader } type WrapCacheConfig struct { @@ -154,12 +154,12 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string) (*WrapCache, error) // Create a single global batched io_uring reader shared across all shards. // All disk reads funnel into one channel; the background goroutine collects // them for up to 1ms and submits them in a single io_uring_enter call. - batchReader, err := fs.NewBatchIoUringReader(fs.BatchIoUringConfig{ + batchReader, err := fs.NewParallelBatchIoUringReader(fs.BatchIoUringConfig{ RingDepth: 256, MaxBatch: 256, Window: time.Millisecond * 2, QueueSize: 1024, - }) + }, 4) if err != nil { log.Error().Err(err).Msg("Failed to create batched io_uring reader, falling back to per-shard rings") batchReader = nil From 08c3475586ecdf1d08dbd5d8dbd81fee87424579 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Sun, 22 Feb 2026 20:01:14 +0000 Subject: [PATCH 47/53] add iouring size metrics --- flashring/internal/fs/batch_iouring.go | 1 + flashring/pkg/metrics/statsd_logger.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index 35eaa087..e197d9d8 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -169,6 +169,7 @@ func (b *BatchIoUringReader) loop() { // each CQE individually as it completes. Fast reads are dispatched immediately // without waiting for slow reads in the same batch (no head-of-line blocking). func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { + metrics.Timing(metrics.KEY_IOURING_SIZE, time.Duration(len(batch))*time.Millisecond, []string{}) n := len(batch) if n == 0 { return diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go index fbddadce..9f12e24a 100644 --- a/flashring/pkg/metrics/statsd_logger.go +++ b/flashring/pkg/metrics/statsd_logger.go @@ -51,6 +51,8 @@ const ( KEY_MEMTABLE_MISS = "flashring_memtable_miss" KEY_DATA_LENGTH = "flashring_data_length" + + KEY_IOURING_SIZE = "flashring_iouring_size" ) func GetShardTag(shardIdx uint32) []string { From fa4e31cdf8d9ecb56fe08686403915ba767cab8c Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Mon, 23 Feb 2026 04:10:09 +0000 Subject: [PATCH 48/53] disable metrics by default --- flashring/go.sum | 37 -------- flashring/internal/fs/batch_iouring.go | 13 ++- flashring/internal/fs/iouring.go | 9 +- flashring/internal/fs/wrap_file.go | 40 ++++++--- flashring/internal/indicesV3/index.go | 3 - flashring/internal/memtables/manager.go | 4 +- flashring/internal/shard/shard_cache.go | 81 +++++++++++++----- flashring/pkg/cache/cache.go | 80 ++++++++++------- flashring/pkg/metrics/metric.go | 109 ++++++++++++++++++++++-- flashring/pkg/metrics/statsd_logger.go | 64 -------------- 10 files changed, 260 insertions(+), 180 deletions(-) delete mode 100644 flashring/pkg/metrics/statsd_logger.go diff --git a/flashring/go.sum b/flashring/go.sum index 18bee494..5d69f8d2 100644 --- a/flashring/go.sum +++ b/flashring/go.sum @@ -1,30 +1,20 @@ github.com/DataDog/datadog-go/v5 v5.8.2 h1:9IEfH1Mw9AjWwhAMqCAkhbxjuJeMxm2ARX2VdgL+ols= github.com/DataDog/datadog-go/v5 v5.8.2/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= -github.com/DataDog/datadog-go/v5 v5.8.3 h1:s58CUJ9s8lezjhTNJO/SxkPBv2qZjS3ktpRSqGF5n0s= -github.com/DataDog/datadog-go/v5 v5.8.3/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU= github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/coocood/freecache v1.2.4 h1:UdR6Yz/X1HW4fZOuH0Z94KwG851GWOSknua5VUbb/5M= github.com/coocood/freecache v1.2.4/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= -github.com/coocood/freecache v1.2.5 h1:FmhRQ8cLLVq9zWhHVYODUEZ0xu6rTPrVeAnX1AEIf7I= -github.com/coocood/freecache v1.2.5/go.mod h1:RBUWa/Cy+OHdfTGFEhEuE1pMCMX51Ncizj7rthiQ3vk= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgraph-io/badger/v4 v4.9.0 h1:tpqWb0NewSrCYqTvywbcXOhQdWcqephkVkbBmaaqHzc= github.com/dgraph-io/badger/v4 v4.9.0/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= -github.com/dgraph-io/badger/v4 v4.9.1 h1:DocZXZkg5JJHJPtUErA0ibyHxOVUDVoXLSCV6t8NC8w= -github.com/dgraph-io/badger/v4 v4.9.1/go.mod h1:5/MEx97uzdPUHR4KtkNt8asfI2T4JiEiQlV7kWUo8c0= github.com/dgraph-io/ristretto/v2 v2.2.0 h1:bkY3XzJcXoMuELV8F+vS8kzNgicwQFAaGINAEJdWGOM= github.com/dgraph-io/ristretto/v2 v2.2.0/go.mod h1:RZrm63UmcBAaYWC1DotLYBmTvgkrs0+XhBd7Npn7/zI= -github.com/dgraph-io/ristretto/v2 v2.4.0 h1:I/w09yLjhdcVD2QV192UJcq8dPBaAJb9pOuMyNy0XlU= -github.com/dgraph-io/ristretto/v2 v2.4.0/go.mod h1:0KsrXtXvnv0EqnzyowllbVJB8yBonswa2lTCK2gGo9E= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38= github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= @@ -40,20 +30,14 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= -github.com/go-viper/mapstructure/v2 v2.5.0 h1:vM5IJoUAy3d7zRSVtIwQgBj7BiWtMPfmPEgAXnvj1Ro= -github.com/go-viper/mapstructure/v2 v2.5.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/google/flatbuffers v25.2.10+incompatible h1:F3vclr7C3HpB1k9mxCGRMXq6FdUalZ6H/pNX4FP1v0Q= github.com/google/flatbuffers v25.2.10+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/flatbuffers v25.12.19+incompatible h1:haMV2JRRJCe1998HeW/p0X9UaMTK6SDo0ffLn2+DbLs= -github.com/google/flatbuffers v25.12.19+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= -github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= -github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -74,14 +58,11 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= -github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= -github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4= -github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= @@ -110,24 +91,14 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs= -github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= -go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ= go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I= -go.opentelemetry.io/otel v1.40.0 h1:oA5YeOcpRTXq6NN7frwmwFR0Cn3RhTVZvXsP4duvCms= -go.opentelemetry.io/otel v1.40.0/go.mod h1:IMb+uXZUKkMXdPddhwAHm6UfOwJyh4ct1ybIlV14J0g= go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE= go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E= -go.opentelemetry.io/otel/metric v1.40.0 h1:rcZe317KPftE2rstWIBitCdVp89A2HqjkxR3c11+p9g= -go.opentelemetry.io/otel/metric v1.40.0/go.mod h1:ib/crwQH7N3r5kfiBZQbwrTge743UDc7DTFVZrrXnqc= go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4= go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0= -go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZYblVjw= -go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -138,8 +109,6 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= -golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60= -golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -154,15 +123,11 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= -golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= -golang.org/x/text v0.34.0 h1:oL/Qq0Kdaqxa1KbNeMKwQq0reLCCaFtqu2eNuSeNHbk= -golang.org/x/text v0.34.0/go.mod h1:homfLqTYRFyVYemLBFl5GgL/DWEiH5wcsQ5gSh1yziA= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= @@ -171,8 +136,6 @@ golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= -google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= -google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index e197d9d8..d0f2b0aa 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -98,7 +98,10 @@ func (b *BatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, err return 0, nil } - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } req := batchReqPool.Get().(*batchReadRequest) req.fd = fd @@ -109,7 +112,9 @@ func (b *BatchIoUringReader) Submit(fd int, buf []byte, offset uint64) (int, err result := <-req.done n, err := result.N, result.Err - metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + } // Reset and return to pool req.fd = 0 @@ -169,7 +174,9 @@ func (b *BatchIoUringReader) loop() { // each CQE individually as it completes. Fast reads are dispatched immediately // without waiting for slow reads in the same batch (no head-of-line blocking). func (b *BatchIoUringReader) submitBatch(batch []*batchReadRequest) { - metrics.Timing(metrics.KEY_IOURING_SIZE, time.Duration(len(batch))*time.Millisecond, []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_IOURING_SIZE, time.Duration(len(batch))*time.Millisecond, []string{}) + } n := len(batch) if n == 0 { return diff --git a/flashring/internal/fs/iouring.go b/flashring/internal/fs/iouring.go index e3a3cd8c..4b5b18b3 100644 --- a/flashring/internal/fs/iouring.go +++ b/flashring/internal/fs/iouring.go @@ -514,7 +514,10 @@ func (r *IoUring) SubmitWriteBatch(fd int, bufs [][]byte, offsets []uint64) ([]i return nil, fmt.Errorf("io_uring_enter: %w", err) } - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } // Drain all CQEs (order may differ from submission) results := make([]int, n) @@ -535,7 +538,9 @@ func (r *IoUring) SubmitWriteBatch(fd int, bufs [][]byte, offsets []uint64) ([]i results[idx] = int(res) } - metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + } } return results, nil diff --git a/flashring/internal/fs/wrap_file.go b/flashring/internal/fs/wrap_file.go index f367e583..3ef52fa8 100644 --- a/flashring/internal/fs/wrap_file.go +++ b/flashring/internal/fs/wrap_file.go @@ -75,9 +75,14 @@ func (r *WrapAppendFile) Pwrite(buf []byte) (currentPhysicalOffset int64, err er return 0, ErrBufNoAlign } } - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } n, err := syscall.Pwrite(r.WriteFd, buf, r.PhysicalWriteOffset) - metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_PWRITE_LATENCY, time.Since(startTime), []string{}) + } if err != nil { return 0, err } @@ -200,9 +205,14 @@ func (r *WrapAppendFile) Pread(fileOffset int64, buf []byte) (int32, error) { return 0, ErrFileOffsetOutOfRange } - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } n, err := syscall.Pread(r.ReadFd, buf, fileOffset) - metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + } // flags := unix.RWF_HIPRI // optionally: | unix.RWF_NOWAIT // n, err := preadv2(r.ReadFd, buf, fileOffset, flags) if err != nil { @@ -280,10 +290,15 @@ func (f *IOUringFile) PreadAsync(fileOffset int64, buf []byte) (int, error) { return 0, ErrFileOffsetOutOfRange } - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } n, err := f.ring.SubmitRead(f.ReadFd, buf, uint64(fileOffset)) - metrics.Incr(metrics.KEY_PREAD_COUNT, []string{}) - metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_PREAD_COUNT, []string{}) + metrics.Timing(metrics.KEY_PREAD_LATENCY, time.Since(startTime), []string{}) + } if err != nil { return 0, err } @@ -294,7 +309,10 @@ func (f *IOUringFile) PreadAsync(fileOffset int64, buf []byte) (int, error) { func (r *WrapAppendFile) TrimHead() (err error) { - startTime := time.Now() + var startTime time.Time + if metrics.Enabled() { + startTime = time.Now() + } if r.WriteDirectIO { if !isAlignedOffset(r.PhysicalStartOffset, r.blockSize) { return ErrOffsetNotAligned @@ -308,8 +326,10 @@ func (r *WrapAppendFile) TrimHead() (err error) { if r.PhysicalStartOffset >= r.MaxFileSize { r.PhysicalStartOffset = 0 } - metrics.Incr(metrics.KEY_PUNCH_HOLE_COUNT, []string{}) - metrics.Timing(metrics.KEY_TRIM_HEAD_LATENCY, time.Since(startTime), []string{}) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_PUNCH_HOLE_COUNT, []string{}) + metrics.Timing(metrics.KEY_TRIM_HEAD_LATENCY, time.Since(startTime), []string{}) + } return nil } diff --git a/flashring/internal/indicesV3/index.go b/flashring/internal/indicesV3/index.go index 5f93f7b3..aa4b3556 100644 --- a/flashring/internal/indicesV3/index.go +++ b/flashring/internal/indicesV3/index.go @@ -6,7 +6,6 @@ import ( "time" "github.com/Meesho/BharatMLStack/flashring/internal/maths" - "github.com/Meesho/BharatMLStack/flashring/pkg/metrics" "github.com/cespare/xxhash/v2" "github.com/zeebo/xxh3" ) @@ -71,11 +70,9 @@ func (i *Index) Put(key string, length, ttlInMinutes uint16, memId, offset uint3 func (i *Index) Get(key string) (length, lastAccess, remainingTTL uint16, freq uint64, memId, offset uint32, status Status) { hhi, hlo := hash128(key) - start := time.Now() i.mu.RLock() idx, ok := i.rm[hlo] i.mu.RUnlock() - metrics.Timing(metrics.LATENCY_RLOCK, time.Since(start), []string{}) if ok { for { diff --git a/flashring/internal/memtables/manager.go b/flashring/internal/memtables/manager.go index 41873a24..3c313017 100644 --- a/flashring/internal/memtables/manager.go +++ b/flashring/internal/memtables/manager.go @@ -87,7 +87,9 @@ func (mm *MemtableManager) flushConsumer(memtable *Memtable) { memtable.Id = mm.nextId mm.nextId++ mm.nextFileOffset += int64(n) - metrics.Incr(metrics.KEY_MEMTABLE_FLUSH_COUNT, append(metrics.GetShardTag(memtable.ShardIdx), metrics.GetMemtableTag(memtable.Id)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_MEMTABLE_FLUSH_COUNT, append(metrics.GetShardTag(memtable.ShardIdx), metrics.GetMemtableTag(memtable.Id)...)) + } } func (mm *MemtableManager) Flush() error { diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index 2e24ffe4..dcecaf97 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -183,14 +183,20 @@ func (fc *ShardCache) Put(key string, value []byte, ttlMinutes uint16) error { func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, false, false } - metrics.Timing(metrics.KEY_DATA_LENGTH, time.Duration(length), metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Timing(metrics.KEY_DATA_LENGTH, time.Duration(length), metrics.GetShardTag(fc.ShardIdx)) + } if status == indices.StatusExpired { - metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, true, false } @@ -205,17 +211,22 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { memtableExists = false } if !memtableExists { - metrics.Incr(metrics.KEY_MEMTABLE_MISS, metrics.GetShardTag(fc.ShardIdx)) - // Allocate buffer of exact size needed - no pool since readFromDisk already copies once + if metrics.Enabled() { + metrics.Incr(metrics.KEY_MEMTABLE_MISS, metrics.GetShardTag(fc.ShardIdx)) + } buf = make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDiskAsync(int64(fileOffset), length, buf) if n != int(length) { - metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, append(metrics.GetShardTag(fc.ShardIdx))) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, false, shouldReWrite } } else { - metrics.Incr(metrics.KEY_MEMTABLE_HIT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_MEMTABLE_HIT, metrics.GetShardTag(fc.ShardIdx)) + } buf, exists = mt.GetBufForRead(int(offset), length) if !exists { panic("memtable exists but buf not found") @@ -225,11 +236,15 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { computedCR32 := crc32.ChecksumIEEE(buf[4:length]) gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } return false, nil, 0, false, shouldReWrite } if gotKey != key { - metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } return false, nil, 0, false, shouldReWrite } valLen := int(length) - 4 - len(key) @@ -242,12 +257,16 @@ func (fc *ShardCache) Get(key string) (bool, []byte, uint16, bool, bool) { func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, false, false // needsSlowPath = false (not found) } if status == indices.StatusExpired { - metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, true, false // needsSlowPath = false (expired) } @@ -268,16 +287,20 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:]) if gotCR32 != computedCR32 { - metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } _, currMemId, _ := fc.mm.GetMemtable() shouldReWrite := fc.predictor.Predict(uint64(freq), uint64(lastAccess), memId, currMemId) - _ = shouldReWrite // Not returning shouldReWrite in fast path for simplicity + _ = shouldReWrite return false, nil, 0, false, false } gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } return false, nil, 0, false, false } @@ -290,12 +313,16 @@ func (fc *ShardCache) GetFastPath(key string) (bool, []byte, uint16, bool, bool) func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) { length, lastAccess, remainingTTL, freq, memId, offset, status := fc.keyIndex.Get(key) if status == indices.StatusNotFound { - metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_NOT_FOUND_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, false, false } if status == indices.StatusExpired { - metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_KEY_EXPIRED_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, true, false } @@ -305,7 +332,6 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) // Check memtable again (might have changed since fast path check) mt := fc.mm.GetMemtableById(memId) if mt != nil { - // Data is now in memtable, use fast path logic buf, exists := mt.GetBufForRead(int(offset), length) if !exists { panic("memtable exists but buf not found") @@ -313,12 +339,13 @@ func (fc *ShardCache) GetSlowPath(key string) (bool, []byte, uint16, bool, bool) return fc.validateAndReturnBuffer(key, buf, length, memId, remainingTTL, shouldReWrite) } - // Read from disk - allocate buffer of exact size needed (no pool since readFromDisk already copies once) buf := make([]byte, length) fileOffset := uint64(memId)*uint64(fc.mm.Capacity) + uint64(offset) n := fc.readFromDisk(int64(fileOffset), length, buf) if n != int(length) { - metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_LENGTH_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return false, nil, 0, false, shouldReWrite } @@ -330,13 +357,17 @@ func (fc *ShardCache) validateAndReturnBuffer(key string, buf []byte, length uin gotCR32 := indices.ByteOrder.Uint32(buf[0:4]) computedCR32 := crc32.ChecksumIEEE(buf[4:length]) if gotCR32 != computedCR32 { - metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } return false, nil, 0, false, shouldReWrite } gotKey := string(buf[4 : 4+len(key)]) if gotKey != key { - metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, append(metrics.GetShardTag(fc.ShardIdx), metrics.GetMemtableTag(memId)...)) + } return false, nil, 0, false, shouldReWrite } @@ -418,11 +449,15 @@ func (fc *ShardCache) processBuffer(key string, buf []byte, length uint16) ReadR gotKey := string(buf[4 : 4+len(key)]) if gotCR32 != computedCR32 { - metrics.Incr(metrics.KEY_BAD_CR32_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_CR32_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return ReadResult{Found: false, Error: fmt.Errorf("crc mismatch")} } if gotKey != key { - metrics.Incr(metrics.KEY_BAD_KEY_COUNT, metrics.GetShardTag(fc.ShardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_BAD_KEY_COUNT, metrics.GetShardTag(fc.ShardIdx)) + } return ReadResult{Found: false, Error: fmt.Errorf("key mismatch")} } diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index b8e4e63c..b77f67b9 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -230,14 +230,16 @@ func (wc *WrapCache) PutLL(key string, value []byte, exptimeInMinutes uint16) er Result: result, } - if h32%100 < 10 { + if metrics.Enabled() && h32%100 < 10 { metrics.Incr(metrics.KEY_RINGBUFFER_ACTIVE_ENTRIES, metrics.GetShardTag(shardIdx)) } op := <-result filecache.ErrorPool.Put(result) - metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) - metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) + metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + } return op } @@ -273,14 +275,16 @@ func (wc *WrapCache) GetLL(key string) ([]byte, bool, bool) { filecache.ReadResultPool.Put(result) filecache.ReadRequestPool.Put(req) - if op.Found && !op.Expired { - metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) - } - if op.Expired { - metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) + if metrics.Enabled() { + if op.Found && !op.Expired { + metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) + } + if op.Expired { + metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) + } + metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) } - metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) - metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) return op.Data, op.Found, op.Expired } @@ -290,14 +294,21 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro h32 := wc.Hash(key) shardIdx := h32 % uint32(len(wc.shards)) - start := time.Now() - defer func() { - metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) - }() + var start time.Time + if metrics.Enabled() { + start = time.Now() + defer func() { + metrics.Timing(metrics.KEY_PUT_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + }() + } - start = time.Now() + if metrics.Enabled() { + start = time.Now() + } wc.shardLocks[shardIdx].Lock() - metrics.Timing(metrics.LATENCY_WLOCK, time.Since(start), []string{}) + if metrics.Enabled() { + metrics.Timing(metrics.LATENCY_WLOCK, time.Since(start), []string{}) + } defer wc.shardLocks[shardIdx].Unlock() err := wc.shards[shardIdx].Put(key, value, exptimeInMinutes) @@ -305,9 +316,11 @@ func (wc *WrapCache) Put(key string, value []byte, exptimeInMinutes uint16) erro log.Error().Err(err).Msgf("Put failed for key: %s", key) return fmt.Errorf("put failed for key: %s", key) } - metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) - if h32%100 < 10 { - metrics.Incr(metrics.KEY_RINGBUFFER_ACTIVE_ENTRIES, metrics.GetShardTag(shardIdx)) + if metrics.Enabled() { + metrics.Incr(metrics.KEY_PUTS, metrics.GetShardTag(shardIdx)) + if h32%100 < 10 { + metrics.Incr(metrics.KEY_RINGBUFFER_ACTIVE_ENTRIES, metrics.GetShardTag(shardIdx)) + } } return nil @@ -317,10 +330,13 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { h32 := wc.Hash(key) shardIdx := h32 % uint32(len(wc.shards)) - start := time.Now() - defer func() { - metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) - }() + var start time.Time + if metrics.Enabled() { + start = time.Now() + defer func() { + metrics.Timing(metrics.KEY_GET_LATENCY, time.Since(start), metrics.GetShardTag(shardIdx)) + }() + } var keyFound bool var val []byte @@ -357,15 +373,19 @@ func (wc *WrapCache) Get(key string) ([]byte, bool, bool) { } - if keyFound && !expired { - metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) - } - if expired { - metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) + if metrics.Enabled() { + if keyFound && !expired { + metrics.Incr(metrics.KEY_HITS, metrics.GetShardTag(shardIdx)) + } + if expired { + metrics.Incr(metrics.KEY_EXPIRED_ENTRIES, metrics.GetShardTag(shardIdx)) + } + metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) + if shouldReWrite { + metrics.Incr(metrics.KEY_REWRITES, metrics.GetShardTag(shardIdx)) + } } - metrics.Incr(metrics.KEY_GETS, metrics.GetShardTag(shardIdx)) if shouldReWrite { - metrics.Incr(metrics.KEY_REWRITES, metrics.GetShardTag(shardIdx)) wc.Put(key, valCopy, remainingTTL) } diff --git a/flashring/pkg/metrics/metric.go b/flashring/pkg/metrics/metric.go index db127882..971f4bec 100644 --- a/flashring/pkg/metrics/metric.go +++ b/flashring/pkg/metrics/metric.go @@ -1,6 +1,9 @@ package metrics import ( + "os" + "strconv" + "strings" "sync" "time" @@ -9,6 +12,60 @@ import ( "github.com/spf13/viper" ) +// Flashring metric keys +const ( + KEY_GET_LATENCY = "flashring_get_latency" + KEY_PUT_LATENCY = "flashring_put_latency" + KEY_RTHROUGHPUT = "flashring_rthroughput" + KEY_WTHROUGHPUT = "flashring_wthroughput" + KEY_HITRATE = "flashring_hitrate" + KEY_ACTIVE_ENTRIES = "flashring_active_entries" + KEY_EXPIRED_ENTRIES = "flashring_expired_entries" + KEY_REWRITES = "flashring_rewrites" + KEY_GETS = "flashring_gets" + KEY_PUTS = "flashring_puts" + KEY_HITS = "flashring_hits" + + KEY_KEY_NOT_FOUND_COUNT = "flashring_key_not_found_count" + KEY_KEY_EXPIRED_COUNT = "flashring_key_expired_count" + KEY_BAD_DATA_COUNT = "flashring_bad_data_count" + KEY_BAD_LENGTH_COUNT = "flashring_bad_length_count" + KEY_BAD_CR32_COUNT = "flashring_bad_cr32_count" + KEY_BAD_KEY_COUNT = "flashring_bad_key_count" + KEY_DELETED_KEY_COUNT = "flashring_deleted_key_count" + + KEY_WRITE_COUNT = "flashring_write_count" + KEY_PUNCH_HOLE_COUNT = "flashring_punch_hole_count" + KEY_PREAD_COUNT = "flashring_pread_count" + + KEY_TRIM_HEAD_LATENCY = "flashring_wrap_file_trim_head_latency" + KEY_PREAD_LATENCY = "flashring_pread_latency" + KEY_PWRITE_LATENCY = "flashring_pwrite_latency" + + KEY_MEMTABLE_FLUSH_COUNT = "flashring_memtable_flush_count" + + LATENCY_RLOCK = "flashring_rlock_latency" + LATENCY_WLOCK = "flashring_wlock_latency" + + KEY_RINGBUFFER_ACTIVE_ENTRIES = "flashring_ringbuffer_active_entries" + KEY_MEMTABLE_ENTRY_COUNT = "flashring_memtable_entry_count" + KEY_MEMTABLE_HIT = "flashring_memtable_hit" + KEY_MEMTABLE_MISS = "flashring_memtable_miss" + KEY_DATA_LENGTH = "flashring_data_length" + KEY_IOURING_SIZE = "flashring_iouring_size" +) + +// Flashring tag keys +const ( + TAG_LATENCY_PERCENTILE = "latency_percentile" + TAG_VALUE_P25 = "p25" + TAG_VALUE_P50 = "p50" + TAG_VALUE_P99 = "p99" + TAG_SHARD_IDX = "shard_idx" + TAG_MEMTABLE_ID = "memtable_id" +) + +// Application-level metric keys const ( ApiRequestCount = "api_request_count" ApiRequestLatency = "api_request_latency" @@ -21,16 +78,27 @@ const ( ) var ( - // it is safe to use one client from multiple goroutines simultaneously - statsDClient = getDefaultClient() - // by default full sampling (1.0 = 100%) + statsDClient = getDefaultClient() samplingRate = 0.1 telegrafAddress = "localhost:8125" appName = "" initialized = false once sync.Once + + // When false, all Timing/Count/Incr/Gauge calls are no-ops (zero allocations). + // Controlled by FLASHRING_METRICS_ENABLED env var ("true"/"1" to enable). + // Defaults to true for backward compatibility. + metricsEnabled = loadMetricsEnabled() ) +func loadMetricsEnabled() bool { + v := os.Getenv("FLASHRING_METRICS_ENABLED") + if v == "" { + return true + } + return strings.EqualFold(v, "true") || v == "1" +} + // Init initializes the metrics client func Init() { if initialized { @@ -52,7 +120,7 @@ func Init() { log.Panic().AnErr("StatsD client initialization failed", err) } log.Info().Msgf("Metrics client initialized with telegraf address - %s, global tags - %v, and "+ - "sampling rate - %f", telegrafAddress, globalTags, samplingRate) + "sampling rate - %f, flashring metrics enabled - %v", telegrafAddress, globalTags, samplingRate, metricsEnabled) initialized = true }) } @@ -77,8 +145,11 @@ func getGlobalTags() []string { } } -// Timing sends timing information +// Timing sends timing information. No-op when metrics are disabled. func Timing(name string, value time.Duration, tags []string) { + if !metricsEnabled { + return + } tags = append(tags, TagAsString(TagService, appName)) err := statsDClient.Timing(name, value, tags, samplingRate) if err != nil { @@ -86,8 +157,11 @@ func Timing(name string, value time.Duration, tags []string) { } } -// Count Increases metric counter by value +// Count increases metric counter by value. No-op when metrics are disabled. func Count(name string, value int64, tags []string) { + if !metricsEnabled { + return + } tags = append(tags, TagAsString(TagService, appName)) err := statsDClient.Count(name, value, tags, samplingRate) if err != nil { @@ -95,15 +169,36 @@ func Count(name string, value int64, tags []string) { } } -// Incr Increases metric counter by 1 +// Incr increases metric counter by 1. No-op when metrics are disabled. func Incr(name string, tags []string) { + if !metricsEnabled { + return + } Count(name, 1, tags) } +// Gauge sets a gauge value. No-op when metrics are disabled. func Gauge(name string, value float64, tags []string) { + if !metricsEnabled { + return + } tags = append(tags, TagAsString(TagService, appName)) err := statsDClient.Gauge(name, value, tags, samplingRate) if err != nil { log.Warn().AnErr("Error occurred while doing statsd gauge", err) } } + +// Enabled returns whether flashring metrics are enabled. +// Call sites should check this before allocating tags to avoid heap allocations. +func Enabled() bool { + return metricsEnabled +} + +func GetShardTag(shardIdx uint32) []string { + return BuildTag(NewTag(TAG_SHARD_IDX, strconv.Itoa(int(shardIdx)))) +} + +func GetMemtableTag(memtableId uint32) []string { + return BuildTag(NewTag(TAG_MEMTABLE_ID, strconv.Itoa(int(memtableId)))) +} diff --git a/flashring/pkg/metrics/statsd_logger.go b/flashring/pkg/metrics/statsd_logger.go deleted file mode 100644 index 9f12e24a..00000000 --- a/flashring/pkg/metrics/statsd_logger.go +++ /dev/null @@ -1,64 +0,0 @@ -package metrics - -import "strconv" - -const ( - KEY_GET_LATENCY = "flashring_get_latency" - KEY_PUT_LATENCY = "flashring_put_latency" - KEY_RTHROUGHPUT = "flashring_rthroughput" - KEY_WTHROUGHPUT = "flashring_wthroughput" - KEY_HITRATE = "flashring_hitrate" - KEY_ACTIVE_ENTRIES = "flashring_active_entries" - KEY_EXPIRED_ENTRIES = "flashring_expired_entries" - KEY_REWRITES = "flashring_rewrites" - KEY_GETS = "flashring_gets" - KEY_PUTS = "flashring_puts" - KEY_HITS = "flashring_hits" - - KEY_KEY_NOT_FOUND_COUNT = "flashring_key_not_found_count" - KEY_KEY_EXPIRED_COUNT = "flashring_key_expired_count" - KEY_BAD_DATA_COUNT = "flashring_bad_data_count" - KEY_BAD_LENGTH_COUNT = "flashring_bad_length_count" - KEY_BAD_CR32_COUNT = "flashring_bad_cr32_count" - KEY_BAD_KEY_COUNT = "flashring_bad_key_count" - KEY_DELETED_KEY_COUNT = "flashring_deleted_key_count" - - TAG_LATENCY_PERCENTILE = "latency_percentile" - TAG_VALUE_P25 = "p25" - TAG_VALUE_P50 = "p50" - TAG_VALUE_P99 = "p99" - TAG_SHARD_IDX = "shard_idx" - TAG_MEMTABLE_ID = "memtable_id" - - KEY_WRITE_COUNT = "flashring_write_count" - KEY_PUNCH_HOLE_COUNT = "flashring_punch_hole_count" - KEY_PREAD_COUNT = "flashring_pread_count" - - KEY_TRIM_HEAD_LATENCY = "flashring_wrap_file_trim_head_latency" - KEY_PREAD_LATENCY = "flashring_pread_latency" - KEY_PWRITE_LATENCY = "flashring_pwrite_latency" - - KEY_MEMTABLE_FLUSH_COUNT = "flashring_memtable_flush_count" - - LATENCY_RLOCK = "flashring_rlock_latency" - LATENCY_WLOCK = "flashring_wlock_latency" - - KEY_RINGBUFFER_ACTIVE_ENTRIES = "flashring_ringbuffer_active_entries" - - KEY_MEMTABLE_ENTRY_COUNT = "flashring_memtable_entry_count" - - KEY_MEMTABLE_HIT = "flashring_memtable_hit" - KEY_MEMTABLE_MISS = "flashring_memtable_miss" - - KEY_DATA_LENGTH = "flashring_data_length" - - KEY_IOURING_SIZE = "flashring_iouring_size" -) - -func GetShardTag(shardIdx uint32) []string { - return BuildTag(NewTag(TAG_SHARD_IDX, strconv.Itoa(int(shardIdx)))) -} - -func GetMemtableTag(memtableId uint32) []string { - return BuildTag(NewTag(TAG_MEMTABLE_ID, strconv.Itoa(int(memtableId)))) -} From af09e730dae354a028420a211a541e99c687cdab Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Mon, 23 Feb 2026 04:10:20 +0000 Subject: [PATCH 49/53] disable metrics by default --- flashring/pkg/metrics/metric.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/pkg/metrics/metric.go b/flashring/pkg/metrics/metric.go index 971f4bec..e977bb77 100644 --- a/flashring/pkg/metrics/metric.go +++ b/flashring/pkg/metrics/metric.go @@ -94,7 +94,7 @@ var ( func loadMetricsEnabled() bool { v := os.Getenv("FLASHRING_METRICS_ENABLED") if v == "" { - return true + return false } return strings.EqualFold(v, "true") || v == "1" } From 36de0fa60fe3e140fb512ccbbc002957e7303718 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Mon, 23 Feb 2026 12:29:12 +0000 Subject: [PATCH 50/53] wait 1 milli second --- flashring/internal/fs/batch_iouring.go | 40 ++++++++++++++++++++------ flashring/pkg/cache/cache.go | 4 +-- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/flashring/internal/fs/batch_iouring.go b/flashring/internal/fs/batch_iouring.go index d0f2b0aa..13c8267f 100644 --- a/flashring/internal/fs/batch_iouring.go +++ b/flashring/internal/fs/batch_iouring.go @@ -50,6 +50,7 @@ type BatchIoUringReader struct { ring *IoUring reqCh chan *batchReadRequest maxBatch int + window time.Duration // wait up to this for more requests before submit (0 = drain only) closeCh chan struct{} wg sync.WaitGroup } @@ -58,7 +59,7 @@ type BatchIoUringReader struct { type BatchIoUringConfig struct { RingDepth uint32 // io_uring SQ/CQ size (default 256) MaxBatch int // max requests per batch (capped to RingDepth) - Window time.Duration // unused, kept for config compatibility + Window time.Duration // wait up to this for requests to accumulate before submit (e.g. 500*time.Microsecond); 0 = drain only, no wait QueueSize int // channel buffer size (default 1024) } @@ -84,6 +85,7 @@ func NewBatchIoUringReader(cfg BatchIoUringConfig) (*BatchIoUringReader, error) ring: ring, reqCh: make(chan *batchReadRequest, cfg.QueueSize), maxBatch: cfg.MaxBatch, + window: cfg.Window, closeCh: make(chan struct{}), } b.wg.Add(1) @@ -151,18 +153,38 @@ func (b *BatchIoUringReader) loop() { return } - // Phase 2: non-blocking drain -- grab everything already queued - // without waiting. Under load this naturally batches many requests; - // under low load the single request goes out immediately. + // Phase 2: drain with optional wait — if window > 0, wait up to window + // for more requests; otherwise non-blocking drain only. + var timer *time.Timer + if b.window > 0 { + timer = time.NewTimer(b.window) + } drain: for len(batch) < b.maxBatch { - select { - case req := <-b.reqCh: - batch = append(batch, req) - default: - break drain + if b.window > 0 { + select { + case req := <-b.reqCh: + batch = append(batch, req) + case <-timer.C: + break drain + case <-b.closeCh: + if timer != nil { + timer.Stop() + } + return + } + } else { + select { + case req := <-b.reqCh: + batch = append(batch, req) + default: + break drain + } } } + if timer != nil { + timer.Stop() + } // Phase 3: submit and dispatch b.submitBatch(batch) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index b77f67b9..8424c38e 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -157,9 +157,9 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string) (*WrapCache, error) batchReader, err := fs.NewParallelBatchIoUringReader(fs.BatchIoUringConfig{ RingDepth: 256, MaxBatch: 256, - Window: time.Millisecond * 2, + Window: time.Millisecond, QueueSize: 1024, - }, 4) + }, 1) if err != nil { log.Error().Err(err).Msg("Failed to create batched io_uring reader, falling back to per-shard rings") batchReader = nil From 0602210bc574b94b9fa96ab585ae939458c7ba0f Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 24 Feb 2026 06:12:04 +0000 Subject: [PATCH 51/53] remove profiling and reduce aligned page count --- flashring/internal/fs/aligned_page.go | 16 +++++++--------- flashring/internal/shard/shard_cache.go | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/flashring/internal/fs/aligned_page.go b/flashring/internal/fs/aligned_page.go index c499ae36..099ccd9d 100644 --- a/flashring/internal/fs/aligned_page.go +++ b/flashring/internal/fs/aligned_page.go @@ -4,8 +4,6 @@ package fs import ( - "runtime/pprof" - "golang.org/x/sys/unix" ) @@ -16,7 +14,7 @@ const ( MAP_ANON = unix.MAP_ANON ) -var mmapProf = pprof.NewProfile("mmap") // will show up in /debug/pprof/ +// var mmapProf = pprof.NewProfile("mmap") // will show up in /debug/pprof/ type AlignedPage struct { Buf []byte @@ -28,9 +26,9 @@ func NewAlignedPage(pageSize int) *AlignedPage { if err != nil { panic(err) } - if pageSize > 0 { - mmapProf.Add(&b[0], pageSize) // attribute sz bytes to this callsite - } + // if pageSize > 0 { + // mmapProf.Add(&b[0], pageSize) // attribute sz bytes to this callsite + // } return &AlignedPage{ Buf: b, mmap: b, @@ -38,9 +36,9 @@ func NewAlignedPage(pageSize int) *AlignedPage { } func Unmap(p *AlignedPage) error { - if len(p.mmap) > 0 { - mmapProf.Remove(&p.mmap[0]) // release from custom profile - } + // if len(p.mmap) > 0 { + // mmapProf.Remove(&p.mmap[0]) // release from custom profile + // } if p.mmap != nil { err := unix.Munmap(p.mmap) if err != nil { diff --git a/flashring/internal/shard/shard_cache.go b/flashring/internal/shard/shard_cache.go index dcecaf97..4796be8b 100644 --- a/flashring/internal/shard/shard_cache.go +++ b/flashring/internal/shard/shard_cache.go @@ -87,7 +87,7 @@ func NewShardCache(config ShardCacheConfig, sl *sync.RWMutex) *ShardCache { i := fs.BLOCK_SIZE iMax := (1 << 16) for i < iMax { - sizeClasses = append(sizeClasses, allocators.SizeClass{Size: i, MinCount: 1000}) + sizeClasses = append(sizeClasses, allocators.SizeClass{Size: i, MinCount: 20}) i *= 2 } readPageAllocator, err := allocators.NewSlabAlignedPageAllocator(allocators.SlabAlignedPageAllocatorConfig{SizeClasses: sizeClasses}) From ccb8465b419422a426ca3afcec3e2a5873af5220 Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 24 Feb 2026 08:05:02 +0000 Subject: [PATCH 52/53] 4 iorings --- flashring/pkg/cache/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index 8424c38e..c3a067b3 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -159,7 +159,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string) (*WrapCache, error) MaxBatch: 256, Window: time.Millisecond, QueueSize: 1024, - }, 1) + }, 4) if err != nil { log.Error().Err(err).Msg("Failed to create batched io_uring reader, falling back to per-shard rings") batchReader = nil From cc7b1ce1e5a6df28b922ea3f1191d184d80959dd Mon Sep 17 00:00:00 2001 From: Nilesh Solanki Date: Tue, 24 Feb 2026 08:31:02 +0000 Subject: [PATCH 53/53] 2 io urings --- flashring/pkg/cache/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashring/pkg/cache/cache.go b/flashring/pkg/cache/cache.go index c3a067b3..96324381 100644 --- a/flashring/pkg/cache/cache.go +++ b/flashring/pkg/cache/cache.go @@ -159,7 +159,7 @@ func NewWrapCache(config WrapCacheConfig, mountPoint string) (*WrapCache, error) MaxBatch: 256, Window: time.Millisecond, QueueSize: 1024, - }, 4) + }, 2) if err != nil { log.Error().Err(err).Msg("Failed to create batched io_uring reader, falling back to per-shard rings") batchReader = nil