From b377c703eab2b4c7e1350eeacb752a7ebbdef262 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 23 Sep 2025 16:13:58 +0500 Subject: [PATCH 01/15] Codeq settings --- pkg/codec/decode.go | 17 +++++++++++++++++ pkg/codec/raptorq.go | 2 +- supernode/services/cascade/download.go | 10 ++++++++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pkg/codec/decode.go b/pkg/codec/decode.go index beeed5a8..fe77d752 100644 --- a/pkg/codec/decode.go +++ b/pkg/codec/decode.go @@ -39,11 +39,28 @@ func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeRespons defer processor.Free() symbolsDir := filepath.Join(rq.symbolsBaseDir, req.ActionID) + // Ensure a clean scratch directory (avoid contamination from previous attempts) + if err := os.RemoveAll(symbolsDir); err != nil { + fields[logtrace.FieldError] = err.Error() + return DecodeResponse{}, fmt.Errorf("cleanup decode dir %s: %w", symbolsDir, err) + } if err := os.MkdirAll(symbolsDir, 0o755); err != nil { fields[logtrace.FieldError] = err.Error() return DecodeResponse{}, fmt.Errorf("mkdir %s: %w", symbolsDir, err) } + // Validate layout before writing any symbols + if len(req.Layout.Blocks) == 0 { + fields[logtrace.FieldError] = "empty layout" + return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present") + } + for _, blk := range req.Layout.Blocks { + if len(blk.Symbols) == 0 { + fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID) + return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID) + } + } + // Build symbol->block mapping from layout and ensure block directories exist symbolToBlock := make(map[string]int) for _, blk := range req.Layout.Blocks { diff --git a/pkg/codec/raptorq.go b/pkg/codec/raptorq.go index 8e0c1c6c..4564bc1b 100644 --- a/pkg/codec/raptorq.go +++ b/pkg/codec/raptorq.go @@ -17,7 +17,7 @@ const ( // Limit RaptorQ processor memory usage to ~2 GiB rqMaxMemoryMB uint64 = 2 * 1024 // MB // Concurrency tuned for 2 GiB limit and typical 8+ core CPUs - rqConcurrency uint64 = 6 + rqConcurrency uint64 = 1 // Target single-block output for up to 1 GiB files with padding headroom (~1.25 GiB) rqBlockSize int = 1280 * 1024 * 1024 // bytes (1,280 MiB) ) diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go index b8220045..363834bc 100644 --- a/supernode/services/cascade/download.go +++ b/supernode/services/cascade/download.go @@ -163,7 +163,7 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( if targetRequiredCount < 1 && totalSymbols > 0 { targetRequiredCount = 1 } - logtrace.Info(ctx, "Retrieving all symbols for decode", fields) + logtrace.Info(ctx, "Retrieving target-required symbols for decode", fields) // Enable retrieve metrics capture for this action cm.StartRetrieveCapture(actionID) @@ -173,7 +173,13 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( retrieveStart := time.Now() // Tag context with metrics task ID (actionID) ctxRetrieve := cm.WithTaskID(ctx, actionID) - symbols, err := task.P2PClient.BatchRetrieve(ctxRetrieve, allSymbols, totalSymbols, actionID) + // Retrieve only a fraction of symbols (targetRequiredCount) based on redundancy + // The DHT will short-circuit once it finds the required number across the provided keys + reqCount := targetRequiredCount + if reqCount > totalSymbols { + reqCount = totalSymbols + } + symbols, err := task.P2PClient.BatchRetrieve(ctxRetrieve, allSymbols, reqCount, actionID) if err != nil { fields[logtrace.FieldError] = err.Error() logtrace.Error(ctx, "batch retrieve failed", fields) From 6a225d3fbf1a4e299a9ec811d9c5635efe62c793 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 23 Sep 2025 20:22:39 +0500 Subject: [PATCH 02/15] Fix concurrent write panic in metrics --- pkg/p2pmetrics/metrics.go | 104 ++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/pkg/p2pmetrics/metrics.go b/pkg/p2pmetrics/metrics.go index 945225db..165f0eaa 100644 --- a/pkg/p2pmetrics/metrics.go +++ b/pkg/p2pmetrics/metrics.go @@ -197,6 +197,7 @@ func BuildStoreEventPayloadFromCollector(taskID string) map[string]any { // Retrieve session type retrieveSession struct { + mu sync.RWMutex CallsByIP map[string][]Call FoundLocal int FoundNet int @@ -208,33 +209,94 @@ type retrieveSession struct { var retrieveSessions = struct{ m map[string]*retrieveSession }{m: map[string]*retrieveSession{}} -// RegisterRetrieveBridge hooks retrieve callbacks into the retrieve collector. +// internal event channel for retrieve metrics (per task) +type retrieveEvent struct { + typ int // 0: per-node call, 1: found-local update + call Call + n int +} + +var retrieveEventChans = struct { + mu sync.Mutex + m map[string]chan retrieveEvent +}{m: map[string]chan retrieveEvent{}} + +// StartRetrieveCapture hooks retrieve callbacks into a buffered channel and a +// single goroutine that serializes updates to avoid concurrent map writes. func StartRetrieveCapture(taskID string) { - RegisterRetrieveHook(taskID, func(c Call) { - s := retrieveSessions.m[taskID] - if s == nil { - s = &retrieveSession{CallsByIP: map[string][]Call{}} - retrieveSessions.m[taskID] = s + // Create or get session upfront + s := retrieveSessions.m[taskID] + if s == nil { + s = &retrieveSession{CallsByIP: map[string][]Call{}} + retrieveSessions.m[taskID] = s + } + + // Per-task buffered channel + ch := make(chan retrieveEvent, 4096) + retrieveEventChans.mu.Lock() + retrieveEventChans.m[taskID] = ch + retrieveEventChans.mu.Unlock() + + // Worker goroutine to serialize writes + go func(taskID string, ch <-chan retrieveEvent) { + for ev := range ch { + sess := retrieveSessions.m[taskID] + if sess == nil { + sess = &retrieveSession{CallsByIP: map[string][]Call{}} + retrieveSessions.m[taskID] = sess + } + switch ev.typ { + case 0: // per-node call + key := ev.call.IP + if key == "" { + key = ev.call.Address + } + sess.mu.Lock() + if sess.CallsByIP == nil { + sess.CallsByIP = map[string][]Call{} + } + sess.CallsByIP[key] = append(sess.CallsByIP[key], ev.call) + sess.mu.Unlock() + case 1: // found-local update + sess.FoundLocal = ev.n + } } - key := c.IP - if key == "" { - key = c.Address + }(taskID, ch) + + // Register hooks that enqueue events (non-blocking) + RegisterRetrieveHook(taskID, func(c Call) { + retrieveEventChans.mu.Lock() + ch, ok := retrieveEventChans.m[taskID] + retrieveEventChans.mu.Unlock() + if ok { + select { + case ch <- retrieveEvent{typ: 0, call: c}: + default: // drop if buffer is full + } } - s.CallsByIP[key] = append(s.CallsByIP[key], c) }) RegisterFoundLocalHook(taskID, func(n int) { - s := retrieveSessions.m[taskID] - if s == nil { - s = &retrieveSession{CallsByIP: map[string][]Call{}} - retrieveSessions.m[taskID] = s + retrieveEventChans.mu.Lock() + ch, ok := retrieveEventChans.m[taskID] + retrieveEventChans.mu.Unlock() + if ok { + select { + case ch <- retrieveEvent{typ: 1, n: n}: + default: + } } - s.FoundLocal = n }) } func StopRetrieveCapture(taskID string) { UnregisterRetrieveHook(taskID) UnregisterFoundLocalHook(taskID) + retrieveEventChans.mu.Lock() + if ch, ok := retrieveEventChans.m[taskID]; ok { + delete(retrieveEventChans.m, taskID) + close(ch) + } + retrieveEventChans.mu.Unlock() } // SetRetrieveBatchSummary sets counts for a retrieval attempt. @@ -284,6 +346,16 @@ func BuildDownloadEventPayloadFromCollector(taskID string) map[string]any { }, } } + // Create a snapshot copy of CallsByIP to avoid concurrent map access + s.mu.RLock() + callsCopy := make(map[string][]Call, len(s.CallsByIP)) + for k, v := range s.CallsByIP { + vv := make([]Call, len(v)) + copy(vv, v) + callsCopy[k] = vv + } + s.mu.RUnlock() + return map[string]any{ "retrieve": map[string]any{ "keys": s.Keys, @@ -292,7 +364,7 @@ func BuildDownloadEventPayloadFromCollector(taskID string) map[string]any { "found_net": s.FoundNet, "retrieve_ms": s.RetrieveMS, "decode_ms": s.DecodeMS, - "calls_by_ip": s.CallsByIP, + "calls_by_ip": callsCopy, }, } } From 306d295615f67130f86689f9a6dc8738fed3cf02 Mon Sep 17 00:00:00 2001 From: j-rafique Date: Wed, 24 Sep 2025 15:06:58 +0500 Subject: [PATCH 03/15] Enable Pprof --- supernode/cmd/start.go | 37 +++++++++++++++++++++++++++++++++++++ supernode/config/config.go | 7 +++++++ 2 files changed, 44 insertions(+) diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index befaf85d..c8914270 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -3,9 +3,12 @@ package cmd import ( "context" "fmt" + "net/http" + _ "net/http/pprof" "os" "os/signal" "path/filepath" + "strings" "syscall" "github.com/LumeraProtocol/supernode/v2/p2p" @@ -139,6 +142,40 @@ The supernode will connect to the Lumera network and begin participating in the return fmt.Errorf("failed to create gateway server: %w", err) } + // Start profiling server if enabled or on testnet + isTestnet := strings.Contains(strings.ToLower(appConfig.LumeraClientConfig.ChainID), "testnet") + shouldEnableProfiling := appConfig.ProfilingConfig.Enabled || isTestnet + + if shouldEnableProfiling { + bindAddr := appConfig.ProfilingConfig.BindAddress + if bindAddr == "" { + if isTestnet { + bindAddr = "0.0.0.0" // Allow external access on testnet + } else { + bindAddr = "127.0.0.1" // Default to localhost + } + } + port := appConfig.ProfilingConfig.Port + if port == 0 { + port = 6060 // Default pprof port + } + + profilingAddr := fmt.Sprintf("%s:%d", bindAddr, port) + + logtrace.Info(ctx, "Starting profiling server", logtrace.Fields{ + "address": profilingAddr, + "chain_id": appConfig.LumeraClientConfig.ChainID, + "is_testnet": isTestnet, + "auto_enabled": isTestnet && !appConfig.ProfilingConfig.Enabled, + }) + + go func() { + if err := http.ListenAndServe(profilingAddr, nil); err != nil { + logtrace.Error(ctx, "Profiling server error", logtrace.Fields{"error": err.Error()}) + } + }() + } + // Start the services go func() { if err := RunServices(ctx, grpcServer, cService, *p2pService, gatewayServer); err != nil { diff --git a/supernode/config/config.go b/supernode/config/config.go index e3910ac2..a09190ce 100644 --- a/supernode/config/config.go +++ b/supernode/config/config.go @@ -44,12 +44,19 @@ type LogConfig struct { Level string `yaml:"level"` } +type ProfilingConfig struct { + Enabled bool `yaml:"enabled"` + Port uint16 `yaml:"port"` + BindAddress string `yaml:"bind_address,omitempty"` +} + type Config struct { SupernodeConfig `yaml:"supernode"` KeyringConfig `yaml:"keyring"` P2PConfig `yaml:"p2p"` LumeraClientConfig `yaml:"lumera"` RaptorQConfig `yaml:"raptorq"` + ProfilingConfig `yaml:"profiling"` // Store base directory (not from YAML) BaseDir string `yaml:"-"` From bca91a118ad642a84414407d55cfd65d6988e3f7 Mon Sep 17 00:00:00 2001 From: j-rafique Date: Wed, 24 Sep 2025 15:17:05 +0500 Subject: [PATCH 04/15] Remove config --- supernode/cmd/start.go | 27 ++++++--------------------- supernode/config/config.go | 7 ------- 2 files changed, 6 insertions(+), 28 deletions(-) diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index c8914270..31c19b2a 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -142,31 +142,16 @@ The supernode will connect to the Lumera network and begin participating in the return fmt.Errorf("failed to create gateway server: %w", err) } - // Start profiling server if enabled or on testnet + // Start profiling server on testnet only isTestnet := strings.Contains(strings.ToLower(appConfig.LumeraClientConfig.ChainID), "testnet") - shouldEnableProfiling := appConfig.ProfilingConfig.Enabled || isTestnet - - if shouldEnableProfiling { - bindAddr := appConfig.ProfilingConfig.BindAddress - if bindAddr == "" { - if isTestnet { - bindAddr = "0.0.0.0" // Allow external access on testnet - } else { - bindAddr = "127.0.0.1" // Default to localhost - } - } - port := appConfig.ProfilingConfig.Port - if port == 0 { - port = 6060 // Default pprof port - } - profilingAddr := fmt.Sprintf("%s:%d", bindAddr, port) + if isTestnet { + profilingAddr := "0.0.0.0:6060" logtrace.Info(ctx, "Starting profiling server", logtrace.Fields{ - "address": profilingAddr, - "chain_id": appConfig.LumeraClientConfig.ChainID, - "is_testnet": isTestnet, - "auto_enabled": isTestnet && !appConfig.ProfilingConfig.Enabled, + "address": profilingAddr, + "chain_id": appConfig.LumeraClientConfig.ChainID, + "is_testnet": isTestnet, }) go func() { diff --git a/supernode/config/config.go b/supernode/config/config.go index a09190ce..e3910ac2 100644 --- a/supernode/config/config.go +++ b/supernode/config/config.go @@ -44,19 +44,12 @@ type LogConfig struct { Level string `yaml:"level"` } -type ProfilingConfig struct { - Enabled bool `yaml:"enabled"` - Port uint16 `yaml:"port"` - BindAddress string `yaml:"bind_address,omitempty"` -} - type Config struct { SupernodeConfig `yaml:"supernode"` KeyringConfig `yaml:"keyring"` P2PConfig `yaml:"p2p"` LumeraClientConfig `yaml:"lumera"` RaptorQConfig `yaml:"raptorq"` - ProfilingConfig `yaml:"profiling"` // Store base directory (not from YAML) BaseDir string `yaml:"-"` From efbe176dc7ad28547096f49093827eaa801c043a Mon Sep 17 00:00:00 2001 From: Matee ullah Malik <46045452+mateeullahmalik@users.noreply.github.com> Date: Wed, 24 Sep 2025 16:46:36 +0500 Subject: [PATCH 05/15] Codec callback (#188) * prepare decode * Test Fixes --- Makefile | 6 +- go.mod | 1 - p2p/kademlia/network.go | 16 +- pkg/codec/codec_mock.go | 12 +- pkg/codec/decode.go | 257 ++++++++++++++---- pkg/net/grpc/client/client.go | 6 +- pkg/net/grpc/client/client_test.go | 4 +- pkg/net/grpc/server/server.go | 8 +- pkg/net/grpc/server/server_test.go | 8 +- .../cascade/cascade_action_server_test.go | 2 +- .../cascade/adaptors/mocks/lumera_mock.go | 48 ++-- .../cascade/adaptors/mocks/p2p_mock.go | 10 +- .../cascade/adaptors/mocks/rq_mock.go | 31 ++- supernode/services/cascade/adaptors/rq.go | 7 +- .../cascade/mocks/cascade_interfaces_mock.go | 15 +- supernode/services/cascade/register_test.go | 10 +- supernode/services/cascade/service_test.go | 2 +- tests/system/go.mod | 1 + tests/system/go.sum | 4 + 19 files changed, 288 insertions(+), 160 deletions(-) diff --git a/Makefile b/Makefile index fe5a9852..01272fbf 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .PHONY: build build-release build-sncli build-sn-manager -.PHONY: install-lumera setup-supernodes system-test-setup +.PHONY: install-lumera setup-supernodes system-test-setup install-deps .PHONY: gen-cascade gen-supernode .PHONY: test-e2e test-unit test-integration test-system @@ -140,9 +140,9 @@ test-e2e: # Run cascade e2e tests only test-cascade: @echo "Running cascade e2e tests..." - @cd tests/system && go test -tags=system_test -v -run TestCascadeE2E . + @cd tests/system && go mod tidy && go test -tags=system_test -v -run TestCascadeE2E . # Run sn-manager e2e tests only test-sn-manager: @echo "Running sn-manager e2e tests..." - @cd tests/system && go test -tags=system_test -v -run '^TestSNManager' . \ No newline at end of file + @cd tests/system && go test -tags=system_test -v -run '^TestSNManager' . diff --git a/go.mod b/go.mod index b7f53ec1..46091df2 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/LumeraProtocol/supernode/v2 go 1.24.1 replace ( - github.com/LumeraProtocol/supernode/v2/supernode => ./supernode github.com/bytedance/sonic => github.com/bytedance/sonic v1.14.0 github.com/bytedance/sonic/loader => github.com/bytedance/sonic/loader v0.3.0 ) diff --git a/p2p/kademlia/network.go b/p2p/kademlia/network.go index a2322ff7..935d1583 100644 --- a/p2p/kademlia/network.go +++ b/p2p/kademlia/network.go @@ -2,7 +2,6 @@ package kademlia import ( "context" - "encoding/hex" "fmt" "io" "net" @@ -951,7 +950,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, i++ } - values, count, err := s.dht.store.RetrieveBatchValues(ctx, keys, true) + values, count, err := s.dht.store.RetrieveBatchValues(ctx, keys, false) if err != nil { err = errors.Errorf("batch find values: %w", err) s.appendRetrieveEntry(message.Sender.IP, RecentBatchRetrieveEntry{ @@ -976,17 +975,8 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, for i, key := range keys { val := KeyValWithClosest{ - Value: values[i], - } - if len(val.Value) == 0 { - decodedKey, err := hex.DecodeString(keys[i]) - if err != nil { - err = errors.Errorf("batch find vals: decode key: %w - key %s", err, keys[i]) - return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error()) - } - - nodes, _ := s.dht.ht.closestContacts(Alpha, decodedKey, []*Node{message.Sender}) - val.Closest = nodes.Nodes + Value: values[i], + Closest: make([]*Node, 0), // for compatibility, not used - each node now has full view of the whole network } request.Data[key] = val diff --git a/pkg/codec/codec_mock.go b/pkg/codec/codec_mock.go index 9c3cf864..09484cee 100644 --- a/pkg/codec/codec_mock.go +++ b/pkg/codec/codec_mock.go @@ -1,10 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. // Source: codec.go -// -// Generated by this command: -// -// mockgen -destination=codec_mock.go -package=codec -source=codec.go -// // Package codec is a generated GoMock package. package codec @@ -13,14 +8,13 @@ import ( context "context" reflect "reflect" - gomock "go.uber.org/mock/gomock" + gomock "github.com/golang/mock/gomock" ) // MockCodec is a mock of Codec interface. type MockCodec struct { ctrl *gomock.Controller recorder *MockCodecMockRecorder - isgomock struct{} } // MockCodecMockRecorder is the mock recorder for MockCodec. @@ -50,7 +44,7 @@ func (m *MockCodec) Decode(ctx context.Context, req DecodeRequest) (DecodeRespon } // Decode indicates an expected call of Decode. -func (mr *MockCodecMockRecorder) Decode(ctx, req any) *gomock.Call { +func (mr *MockCodecMockRecorder) Decode(ctx, req interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Decode", reflect.TypeOf((*MockCodec)(nil).Decode), ctx, req) } @@ -65,7 +59,7 @@ func (m *MockCodec) Encode(ctx context.Context, req EncodeRequest) (EncodeRespon } // Encode indicates an expected call of Encode. -func (mr *MockCodecMockRecorder) Encode(ctx, req any) *gomock.Call { +func (mr *MockCodecMockRecorder) Encode(ctx, req interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Encode", reflect.TypeOf((*MockCodec)(nil).Encode), ctx, req) } diff --git a/pkg/codec/decode.go b/pkg/codec/decode.go index fe77d752..bd3b0231 100644 --- a/pkg/codec/decode.go +++ b/pkg/codec/decode.go @@ -5,9 +5,13 @@ import ( "encoding/json" "fmt" "os" + "path" "path/filepath" + "strings" + "sync" raptorq "github.com/LumeraProtocol/rq-go" + "github.com/LumeraProtocol/supernode/v2/pkg/errors" "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" ) @@ -18,81 +22,160 @@ type DecodeRequest struct { } type DecodeResponse struct { - Path string + FilePath string DecodeTmpDir string } -func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) { +// Workspace holds paths & reverse index for prepared decoding. +type Workspace struct { + ActionID string + SymbolsDir string // ...// + BlockDirs []string // index = blockID (or 0 if single block) + symbolToBlock map[string]int + mu sync.RWMutex // protects symbolToBlock reads if you expand it later +} + +// PrepareDecode creates the on-disk workspace for decoding and returns: +// - blockPaths[0] => where to write symbols for block 0 (your single-block case) +// - Write(block, id, data) callback that writes symbol bytes directly to disk +// - Cleanup() to remove the workspace on abort (no-op if you want to keep it) +func (rq *raptorQ) PrepareDecode( + ctx context.Context, + actionID string, + layout Layout, +) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *Workspace, err error) { fields := logtrace.Fields{ - logtrace.FieldMethod: "Decode", + logtrace.FieldMethod: "PrepareDecode", logtrace.FieldModule: "rq", - logtrace.FieldActionID: req.ActionID, + logtrace.FieldActionID: actionID, } - logtrace.Info(ctx, "RaptorQ decode request received", fields) - // Use deterministic processor settings (matching encoder) - processor, err := raptorq.NewRaptorQProcessor(rqSymbolSize, rqRedundancyFactor, rqMaxMemoryMB, rqConcurrency) - if err != nil { + // Create root symbols dir for this action + symbolsDir := filepath.Join(rq.symbolsBaseDir, actionID) + if err := os.MkdirAll(symbolsDir, 0o755); err != nil { fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err) + logtrace.Error(ctx, "mkdir symbols base dir failed", fields) + return nil, nil, nil, nil, fmt.Errorf("mkdir %s: %w", symbolsDir, err) } - defer processor.Free() - symbolsDir := filepath.Join(rq.symbolsBaseDir, req.ActionID) - // Ensure a clean scratch directory (avoid contamination from previous attempts) - if err := os.RemoveAll(symbolsDir); err != nil { - fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("cleanup decode dir %s: %w", symbolsDir, err) + // Ensure block directories exist; build reverse index symbol -> block + maxBlockID := 0 + for _, b := range layout.Blocks { + if int(b.BlockID) > maxBlockID { + maxBlockID = int(b.BlockID) + } } - if err := os.MkdirAll(symbolsDir, 0o755); err != nil { - fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("mkdir %s: %w", symbolsDir, err) + blockDirs := make([]string, maxBlockID+1) + s2b := make(map[string]int, 1024) + + for _, b := range layout.Blocks { + dir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", b.BlockID)) + if err := os.MkdirAll(dir, 0o755); err != nil { + fields[logtrace.FieldError] = err.Error() + logtrace.Error(ctx, "mkdir block dir failed", fields) + return nil, nil, nil, nil, fmt.Errorf("mkdir %s: %w", dir, err) + } + blockDirs[b.BlockID] = dir + for _, sym := range b.Symbols { + s2b[sym] = b.BlockID + } } - // Validate layout before writing any symbols - if len(req.Layout.Blocks) == 0 { - fields[logtrace.FieldError] = "empty layout" - return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present") + ws = &Workspace{ + ActionID: actionID, + SymbolsDir: symbolsDir, + BlockDirs: blockDirs, + symbolToBlock: s2b, } - for _, blk := range req.Layout.Blocks { - if len(blk.Symbols) == 0 { - fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID) - return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID) + + // Helper: atomic write (tmp file + rename) to avoid partials on crash + writeFileAtomic := func(path string, data []byte) error { + tmp := path + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return err } + return os.Rename(tmp, path) } - // Build symbol->block mapping from layout and ensure block directories exist - symbolToBlock := make(map[string]int) - for _, blk := range req.Layout.Blocks { - blockDir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", blk.BlockID)) - if err := os.MkdirAll(blockDir, 0o755); err != nil { - fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("mkdir %s: %w", blockDir, err) + // Write callback; if block < 0, resolve via layout reverse index; default to 0. + Write = func(block int, symbolID string, data []byte) (string, error) { + // Quick cancellation check + select { + case <-ctx.Done(): + return "", ctx.Err() + default: } - for _, sym := range blk.Symbols { - symbolToBlock[sym] = blk.BlockID + + // Resolve block if caller passes default + if block < 0 { + ws.mu.RLock() + bid, ok := ws.symbolToBlock[symbolID] + ws.mu.RUnlock() + if !ok { + // Single-block simplification: default to block 0 if layout maps are absent + if len(ws.BlockDirs) == 0 || ws.BlockDirs[0] == "" { + return "", errors.Errorf("no block directories prepared") + } + bid = 0 + } + block = bid } - } - // Write symbols to their respective block directories - for id, data := range req.Symbols { - blkID, ok := symbolToBlock[id] - if !ok { - fields[logtrace.FieldError] = "symbol not present in layout" - return DecodeResponse{}, fmt.Errorf("symbol %s not present in layout", id) + if block < 0 || block >= len(ws.BlockDirs) || ws.BlockDirs[block] == "" { + return "", errors.Errorf("invalid block index %d", block) } - blockDir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", blkID)) - symbolPath := filepath.Join(blockDir, id) - if err := os.WriteFile(symbolPath, data, 0o644); err != nil { - fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("write symbol %s: %w", id, err) + + // sanitize symbolID to a basename (prevents traversal) + clean := path.Clean("/" + symbolID) + base := strings.TrimPrefix(clean, "/") + if base == "" || strings.Contains(base, "/") { + return "", errors.Errorf("invalid symbol id %q", symbolID) + + } + + dest := filepath.Join(ws.BlockDirs[block], base) + if err := writeFileAtomic(dest, data); err != nil { + return "", fmt.Errorf("write symbol %s (block %d): %w", base, block, err) } + return dest, nil + } + + Cleanup = func() error { + // Remove the whole workspace directory (symbols + layout + output if any) + return os.RemoveAll(symbolsDir) + } + + logtrace.Info(ctx, "prepare decode workspace created", logtrace.Fields{ + "symbols_dir": symbolsDir, + "blocks": len(blockDirs), + }) + return blockDirs, Write, Cleanup, ws, nil +} + +// DecodeFromPrepared performs RaptorQ decode using an already-prepared workspace. +// It writes layout.json under the workspace, runs decode, and returns output paths. +func (rq *raptorQ) DecodeFromPrepared( + ctx context.Context, + ws *Workspace, + layout Layout, +) (DecodeResponse, error) { + fields := logtrace.Fields{ + logtrace.FieldMethod: "DecodeFromPrepared", + logtrace.FieldModule: "rq", + logtrace.FieldActionID: ws.ActionID, + } + logtrace.Info(ctx, "RaptorQ decode (prepared) requested", fields) + + processor, err := raptorq.NewRaptorQProcessor(rqSymbolSize, rqRedundancyFactor, rqMaxMemoryMB, rqConcurrency) + if err != nil { + fields[logtrace.FieldError] = err.Error() + return DecodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err) } - logtrace.Info(ctx, "symbols written to block directories", fields) + defer processor.Free() - // ---------- write layout.json ---------- - layoutPath := filepath.Join(symbolsDir, "layout.json") - layoutBytes, err := json.Marshal(req.Layout) + // Write layout.json (idempotent) + layoutPath := filepath.Join(ws.SymbolsDir, "layout.json") + layoutBytes, err := json.Marshal(layout) if err != nil { fields[logtrace.FieldError] = err.Error() return DecodeResponse{}, fmt.Errorf("marshal layout: %w", err) @@ -101,16 +184,74 @@ func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeRespons fields[logtrace.FieldError] = err.Error() return DecodeResponse{}, fmt.Errorf("write layout file: %w", err) } - logtrace.Info(ctx, "layout.json written", fields) + logtrace.Info(ctx, "layout.json written (prepared)", fields) - // Decode - outputPath := filepath.Join(symbolsDir, "output") - if err := processor.DecodeSymbols(symbolsDir, outputPath, layoutPath); err != nil { + // Decode to output (idempotent-safe: overwrite on success) + outputPath := filepath.Join(ws.SymbolsDir, "output") + if err := processor.DecodeSymbols(ws.SymbolsDir, outputPath, layoutPath); err != nil { fields[logtrace.FieldError] = err.Error() - _ = os.Remove(outputPath) + _ = os.Remove(outputPath) // best-effort cleanup of partial output return DecodeResponse{}, fmt.Errorf("raptorq decode: %w", err) } - logtrace.Info(ctx, "RaptorQ decoding completed successfully", fields) - return DecodeResponse{Path: outputPath, DecodeTmpDir: symbolsDir}, nil + logtrace.Info(ctx, "RaptorQ decoding completed successfully (prepared)", logtrace.Fields{ + "output_path": outputPath, + }) + return DecodeResponse{FilePath: outputPath, DecodeTmpDir: ws.SymbolsDir}, nil +} + +func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) { + fields := logtrace.Fields{ + logtrace.FieldMethod: "Decode", + logtrace.FieldModule: "rq", + logtrace.FieldActionID: req.ActionID, + } + logtrace.Info(ctx, "RaptorQ decode request received", fields) + + // 1) Validate layout (the check) + if len(req.Layout.Blocks) == 0 { + fields[logtrace.FieldError] = "empty layout" + return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present") + } + for _, blk := range req.Layout.Blocks { + if len(blk.Symbols) == 0 { + fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID) + return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID) + } + } + + // 2) Prepare workspace (functionality) + _, Write, Cleanup, ws, err := rq.PrepareDecode(ctx, req.ActionID, req.Layout) + if err != nil { + fields[logtrace.FieldError] = err.Error() + return DecodeResponse{}, fmt.Errorf("prepare decode workspace: %w", err) + } + + // Ensure workspace cleanup on failure. On success, caller cleans up via returned path. + success := false + defer func() { + if !success && Cleanup != nil { + _ = Cleanup() + } + }() + + // 3) Persist provided in-memory symbols via Write (functionality) + if len(req.Symbols) > 0 { + for id, data := range req.Symbols { + if _, werr := Write(-1, id, data); werr != nil { + fields[logtrace.FieldError] = werr.Error() + return DecodeResponse{}, werr + } + } + logtrace.Info(ctx, "symbols persisted via Write()", fields) + } + + // 4) Decode using the prepared workspace (functionality) + resp, derr := rq.DecodeFromPrepared(ctx, ws, req.Layout) + if derr != nil { + fields[logtrace.FieldError] = derr.Error() + return DecodeResponse{}, derr + } + success = true + return resp, nil } diff --git a/pkg/net/grpc/client/client.go b/pkg/net/grpc/client/client.go index dc4f45de..907c5b58 100644 --- a/pkg/net/grpc/client/client.go +++ b/pkg/net/grpc/client/client.go @@ -112,9 +112,9 @@ var defaultBackoffConfig = backoff.Config{ func DefaultClientOptions() *ClientOptions { return &ClientOptions{ MaxRecvMsgSize: 100 * MB, - MaxSendMsgSize: 100 * MB, // 100MB - InitialWindowSize: (int32)(1 * MB), // 1MB - controls initial frame size for streams - InitialConnWindowSize: (int32)(1 * MB), // 1MB - controls initial frame size for connection + MaxSendMsgSize: 100 * MB, // 100MB + InitialWindowSize: (int32)(32 * MB), // 32MB - controls initial frame size for streams + InitialConnWindowSize: (int32)(128 * MB), // 128MB - controls initial frame size for connection ConnWaitTime: defaultConnWaitTime, KeepAliveTime: 30 * time.Minute, diff --git a/pkg/net/grpc/client/client_test.go b/pkg/net/grpc/client/client_test.go index 12b196fa..0b4a10e1 100644 --- a/pkg/net/grpc/client/client_test.go +++ b/pkg/net/grpc/client/client_test.go @@ -86,8 +86,8 @@ func TestDefaultClientOptions(t *testing.T) { assert.NotNil(t, opts, "ClientOptions should be initialized") assert.Equal(t, 100*MB, opts.MaxRecvMsgSize, "MaxRecvMsgSize should be 100 MB") assert.Equal(t, 100*MB, opts.MaxSendMsgSize, "MaxSendMsgSize should be 100 MB") - assert.Equal(t, int32(1*MB), opts.InitialWindowSize, "InitialWindowSize should be 1 MB") - assert.Equal(t, int32(1*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 1 MB") + assert.Equal(t, int32(32*MB), opts.InitialWindowSize, "InitialWindowSize should be 32 MB") + assert.Equal(t, int32(128*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 128 MB") assert.Equal(t, defaultConnWaitTime, opts.ConnWaitTime, "ConnWaitTime should be 10 seconds") assert.True(t, opts.EnableRetries, "EnableRetries should be true") assert.Equal(t, maxRetries, opts.MaxRetries, "MaxRetries should be 5") diff --git a/pkg/net/grpc/server/server.go b/pkg/net/grpc/server/server.go index 10a2452c..64dfe0f2 100644 --- a/pkg/net/grpc/server/server.go +++ b/pkg/net/grpc/server/server.go @@ -94,7 +94,7 @@ func DefaultServerOptions() *ServerOptions { return &ServerOptions{ MaxRecvMsgSize: 100 * MB, MaxSendMsgSize: 100 * MB, - InitialWindowSize: (int32)(1 * MB), + InitialWindowSize: (int32)(32 * MB), InitialConnWindowSize: (int32)(1 * MB), MaxConcurrentStreams: 1000, GracefulShutdownTime: defaultGracefulShutdownTimeout, @@ -102,13 +102,13 @@ func DefaultServerOptions() *ServerOptions { MaxConnectionIdle: 2 * time.Hour, MaxConnectionAge: 2 * time.Hour, MaxConnectionAgeGrace: 1 * time.Hour, - Time: 1 * time.Hour, + Time: 30 * time.Minute, Timeout: 30 * time.Minute, MinTime: 5 * time.Minute, PermitWithoutStream: true, - WriteBufferSize: 32 * KB, - ReadBufferSize: 32 * KB, + WriteBufferSize: 512 * KB, + ReadBufferSize: 512 * KB, } } diff --git a/pkg/net/grpc/server/server_test.go b/pkg/net/grpc/server/server_test.go index 4c6067e0..f2308436 100644 --- a/pkg/net/grpc/server/server_test.go +++ b/pkg/net/grpc/server/server_test.go @@ -55,18 +55,18 @@ func TestDefaultServerOptions(t *testing.T) { assert.NotNil(t, opts, "Server options should be initialized") assert.Equal(t, 100*MB, opts.MaxRecvMsgSize, "MaxRecvMsgSize should be 100 MB") assert.Equal(t, 100*MB, opts.MaxSendMsgSize, "MaxSendMsgSize should be 100 MB") - assert.Equal(t, int32(1*MB), opts.InitialWindowSize, "InitialWindowSize should be 1 MB") + assert.Equal(t, int32(32*MB), opts.InitialWindowSize, "InitialWindowSize should be 32 MB") assert.Equal(t, int32(1*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 1 MB") assert.Equal(t, uint32(1000), opts.MaxConcurrentStreams, "MaxConcurrentStreams should be 1000") assert.Equal(t, defaultGracefulShutdownTimeout, opts.GracefulShutdownTime, fmt.Sprintf("GracefulShutdownTimeout should be %v", defaultGracefulShutdownTimeout)) assert.Equal(t, uint32(0), opts.NumServerWorkers, "NumServerWorkers should be 0") - assert.Equal(t, 32*KB, opts.WriteBufferSize, "WriteBufferSize should be 32 KB") - assert.Equal(t, 32*KB, opts.ReadBufferSize, "ReadBufferSize should be 32 KB") + assert.Equal(t, 512*KB, opts.WriteBufferSize, "WriteBufferSize should be 512 KB") + assert.Equal(t, 512*KB, opts.ReadBufferSize, "ReadBufferSize should be 512 KB") assert.Equal(t, 2*time.Hour, opts.MaxConnectionIdle, "MaxConnectionIdle should be 2 hours") assert.Equal(t, 2*time.Hour, opts.MaxConnectionAge, "MaxConnectionAge should be 2 hours") assert.Equal(t, 1*time.Hour, opts.MaxConnectionAgeGrace, "MaxConnectionAgeGrace should be 1 hour") - assert.Equal(t, 1*time.Hour, opts.Time, "Time should be 1 hour") + assert.Equal(t, 30*time.Minute, opts.Time, "Time should be 30 minutes") assert.Equal(t, 30*time.Minute, opts.Timeout, "Timeout should be 30 minutes") assert.Equal(t, 5*time.Minute, opts.MinTime, "MinTime should be 5 minutes") assert.True(t, opts.PermitWithoutStream, "PermitWithoutStream should be true") diff --git a/supernode/node/action/server/cascade/cascade_action_server_test.go b/supernode/node/action/server/cascade/cascade_action_server_test.go index eca121d8..ff2738b3 100644 --- a/supernode/node/action/server/cascade/cascade_action_server_test.go +++ b/supernode/node/action/server/cascade/cascade_action_server_test.go @@ -10,7 +10,7 @@ import ( cascademocks "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/mocks" "github.com/stretchr/testify/assert" - "go.uber.org/mock/gomock" + "github.com/golang/mock/gomock" ) func TestRegister_Success(t *testing.T) { diff --git a/supernode/services/cascade/adaptors/mocks/lumera_mock.go b/supernode/services/cascade/adaptors/mocks/lumera_mock.go index 15a6c901..29cdd48f 100644 --- a/supernode/services/cascade/adaptors/mocks/lumera_mock.go +++ b/supernode/services/cascade/adaptors/mocks/lumera_mock.go @@ -1,10 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. // Source: lumera.go -// -// Generated by this command: -// -// mockgen -destination=mocks/lumera_mock.go -package=cascadeadaptormocks -source=lumera.go -// // Package cascadeadaptormocks is a generated GoMock package. package cascadeadaptormocks @@ -16,14 +11,13 @@ import ( types "github.com/LumeraProtocol/lumera/x/action/v1/types" types0 "github.com/LumeraProtocol/lumera/x/supernode/v1/types" tx "github.com/cosmos/cosmos-sdk/types/tx" - gomock "go.uber.org/mock/gomock" + gomock "github.com/golang/mock/gomock" ) // MockLumeraClient is a mock of LumeraClient interface. type MockLumeraClient struct { ctrl *gomock.Controller recorder *MockLumeraClientMockRecorder - isgomock struct{} } // MockLumeraClientMockRecorder is the mock recorder for MockLumeraClient. @@ -53,26 +47,11 @@ func (m *MockLumeraClient) FinalizeAction(ctx context.Context, actionID string, } // FinalizeAction indicates an expected call of FinalizeAction. -func (mr *MockLumeraClientMockRecorder) FinalizeAction(ctx, actionID, rqids any) *gomock.Call { +func (mr *MockLumeraClientMockRecorder) FinalizeAction(ctx, actionID, rqids interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).FinalizeAction), ctx, actionID, rqids) } -// SimulateFinalizeAction mocks base method. -func (m *MockLumeraClient) SimulateFinalizeAction(ctx context.Context, actionID string, rqids []string) (*tx.SimulateResponse, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "SimulateFinalizeAction", ctx, actionID, rqids) - ret0, _ := ret[0].(*tx.SimulateResponse) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// SimulateFinalizeAction indicates an expected call of SimulateFinalizeAction. -func (mr *MockLumeraClientMockRecorder) SimulateFinalizeAction(ctx, actionID, rqids any) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SimulateFinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).SimulateFinalizeAction), ctx, actionID, rqids) -} - // GetAction mocks base method. func (m *MockLumeraClient) GetAction(ctx context.Context, actionID string) (*types.QueryGetActionResponse, error) { m.ctrl.T.Helper() @@ -83,7 +62,7 @@ func (m *MockLumeraClient) GetAction(ctx context.Context, actionID string) (*typ } // GetAction indicates an expected call of GetAction. -func (mr *MockLumeraClientMockRecorder) GetAction(ctx, actionID any) *gomock.Call { +func (mr *MockLumeraClientMockRecorder) GetAction(ctx, actionID interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAction", reflect.TypeOf((*MockLumeraClient)(nil).GetAction), ctx, actionID) } @@ -98,7 +77,7 @@ func (m *MockLumeraClient) GetActionFee(ctx context.Context, dataSize string) (* } // GetActionFee indicates an expected call of GetActionFee. -func (mr *MockLumeraClientMockRecorder) GetActionFee(ctx, dataSize any) *gomock.Call { +func (mr *MockLumeraClientMockRecorder) GetActionFee(ctx, dataSize interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetActionFee", reflect.TypeOf((*MockLumeraClient)(nil).GetActionFee), ctx, dataSize) } @@ -113,11 +92,26 @@ func (m *MockLumeraClient) GetTopSupernodes(ctx context.Context, height uint64) } // GetTopSupernodes indicates an expected call of GetTopSupernodes. -func (mr *MockLumeraClientMockRecorder) GetTopSupernodes(ctx, height any) *gomock.Call { +func (mr *MockLumeraClientMockRecorder) GetTopSupernodes(ctx, height interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTopSupernodes", reflect.TypeOf((*MockLumeraClient)(nil).GetTopSupernodes), ctx, height) } +// SimulateFinalizeAction mocks base method. +func (m *MockLumeraClient) SimulateFinalizeAction(ctx context.Context, actionID string, rqids []string) (*tx.SimulateResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SimulateFinalizeAction", ctx, actionID, rqids) + ret0, _ := ret[0].(*tx.SimulateResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// SimulateFinalizeAction indicates an expected call of SimulateFinalizeAction. +func (mr *MockLumeraClientMockRecorder) SimulateFinalizeAction(ctx, actionID, rqids interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SimulateFinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).SimulateFinalizeAction), ctx, actionID, rqids) +} + // Verify mocks base method. func (m *MockLumeraClient) Verify(ctx context.Context, creator string, file, sigBytes []byte) error { m.ctrl.T.Helper() @@ -127,7 +121,7 @@ func (m *MockLumeraClient) Verify(ctx context.Context, creator string, file, sig } // Verify indicates an expected call of Verify. -func (mr *MockLumeraClientMockRecorder) Verify(ctx, creator, file, sigBytes any) *gomock.Call { +func (mr *MockLumeraClientMockRecorder) Verify(ctx, creator, file, sigBytes interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Verify", reflect.TypeOf((*MockLumeraClient)(nil).Verify), ctx, creator, file, sigBytes) } diff --git a/supernode/services/cascade/adaptors/mocks/p2p_mock.go b/supernode/services/cascade/adaptors/mocks/p2p_mock.go index 4f62a440..ec99d92a 100644 --- a/supernode/services/cascade/adaptors/mocks/p2p_mock.go +++ b/supernode/services/cascade/adaptors/mocks/p2p_mock.go @@ -1,10 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. // Source: p2p.go -// -// Generated by this command: -// -// mockgen -destination=mocks/p2p_mock.go -package=cascadeadaptormocks -source=p2p.go -// // Package cascadeadaptormocks is a generated GoMock package. package cascadeadaptormocks @@ -15,14 +10,13 @@ import ( logtrace "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" adaptors "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors" - gomock "go.uber.org/mock/gomock" + gomock "github.com/golang/mock/gomock" ) // MockP2PService is a mock of P2PService interface. type MockP2PService struct { ctrl *gomock.Controller recorder *MockP2PServiceMockRecorder - isgomock struct{} } // MockP2PServiceMockRecorder is the mock recorder for MockP2PService. @@ -51,7 +45,7 @@ func (m *MockP2PService) StoreArtefacts(ctx context.Context, req adaptors.StoreA } // StoreArtefacts indicates an expected call of StoreArtefacts. -func (mr *MockP2PServiceMockRecorder) StoreArtefacts(ctx, req, f any) *gomock.Call { +func (mr *MockP2PServiceMockRecorder) StoreArtefacts(ctx, req, f interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StoreArtefacts", reflect.TypeOf((*MockP2PService)(nil).StoreArtefacts), ctx, req, f) } diff --git a/supernode/services/cascade/adaptors/mocks/rq_mock.go b/supernode/services/cascade/adaptors/mocks/rq_mock.go index 4c53c1dd..f45f2eb5 100644 --- a/supernode/services/cascade/adaptors/mocks/rq_mock.go +++ b/supernode/services/cascade/adaptors/mocks/rq_mock.go @@ -1,10 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. // Source: rq.go -// -// Generated by this command: -// -// mockgen -destination=mocks/rq_mock.go -package=cascadeadaptormocks -source=rq.go -// // Package cascadeadaptormocks is a generated GoMock package. package cascadeadaptormocks @@ -13,15 +8,15 @@ import ( context "context" reflect "reflect" + codec "github.com/LumeraProtocol/supernode/v2/pkg/codec" adaptors "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors" - gomock "go.uber.org/mock/gomock" + gomock "github.com/golang/mock/gomock" ) // MockCodecService is a mock of CodecService interface. type MockCodecService struct { ctrl *gomock.Controller recorder *MockCodecServiceMockRecorder - isgomock struct{} } // MockCodecServiceMockRecorder is the mock recorder for MockCodecService. @@ -51,7 +46,7 @@ func (m *MockCodecService) Decode(ctx context.Context, req adaptors.DecodeReques } // Decode indicates an expected call of Decode. -func (mr *MockCodecServiceMockRecorder) Decode(ctx, req any) *gomock.Call { +func (mr *MockCodecServiceMockRecorder) Decode(ctx, req interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Decode", reflect.TypeOf((*MockCodecService)(nil).Decode), ctx, req) } @@ -66,7 +61,25 @@ func (m *MockCodecService) EncodeInput(ctx context.Context, taskID, path string, } // EncodeInput indicates an expected call of EncodeInput. -func (mr *MockCodecServiceMockRecorder) EncodeInput(ctx, taskID, path, dataSize any) *gomock.Call { +func (mr *MockCodecServiceMockRecorder) EncodeInput(ctx, taskID, path, dataSize interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EncodeInput", reflect.TypeOf((*MockCodecService)(nil).EncodeInput), ctx, taskID, path, dataSize) } + +// PrepareDecode mocks base method. +func (m *MockCodecService) PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) ([]string, func(int, string, []byte) (string, error), func() error, *codec.Workspace, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PrepareDecode", ctx, actionID, layout) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(func(int, string, []byte) (string, error)) + ret2, _ := ret[2].(func() error) + ret3, _ := ret[3].(*codec.Workspace) + ret4, _ := ret[4].(error) + return ret0, ret1, ret2, ret3, ret4 +} + +// PrepareDecode indicates an expected call of PrepareDecode. +func (mr *MockCodecServiceMockRecorder) PrepareDecode(ctx, actionID, layout interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PrepareDecode", reflect.TypeOf((*MockCodecService)(nil).PrepareDecode), ctx, actionID, layout) +} diff --git a/supernode/services/cascade/adaptors/rq.go b/supernode/services/cascade/adaptors/rq.go index 5f4443cf..92e89819 100644 --- a/supernode/services/cascade/adaptors/rq.go +++ b/supernode/services/cascade/adaptors/rq.go @@ -11,6 +11,7 @@ import ( //go:generate mockgen -destination=mocks/rq_mock.go -package=cascadeadaptormocks -source=rq.go type CodecService interface { EncodeInput(ctx context.Context, taskID string, path string, dataSize int) (EncodeResult, error) + PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *codec.Workspace, err error) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) } @@ -70,7 +71,11 @@ func (c *codecImpl) Decode(ctx context.Context, req DecodeRequest) (DecodeRespon } return DecodeResponse{ - FilePath: resp.Path, + FilePath: resp.FilePath, DecodeTmpDir: resp.DecodeTmpDir, }, nil } + +func (c *codecImpl) PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *codec.Workspace, err error) { + return +} diff --git a/supernode/services/cascade/mocks/cascade_interfaces_mock.go b/supernode/services/cascade/mocks/cascade_interfaces_mock.go index 497497c3..44d3189c 100644 --- a/supernode/services/cascade/mocks/cascade_interfaces_mock.go +++ b/supernode/services/cascade/mocks/cascade_interfaces_mock.go @@ -1,10 +1,5 @@ // Code generated by MockGen. DO NOT EDIT. // Source: interfaces.go -// -// Generated by this command: -// -// mockgen -destination=mocks/cascade_interfaces_mock.go -package=cascademocks -source=interfaces.go -// // Package cascademocks is a generated GoMock package. package cascademocks @@ -14,14 +9,13 @@ import ( reflect "reflect" cascade "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade" - gomock "go.uber.org/mock/gomock" + gomock "github.com/golang/mock/gomock" ) // MockCascadeServiceFactory is a mock of CascadeServiceFactory interface. type MockCascadeServiceFactory struct { ctrl *gomock.Controller recorder *MockCascadeServiceFactoryMockRecorder - isgomock struct{} } // MockCascadeServiceFactoryMockRecorder is the mock recorder for MockCascadeServiceFactory. @@ -59,7 +53,6 @@ func (mr *MockCascadeServiceFactoryMockRecorder) NewCascadeRegistrationTask() *g type MockCascadeTask struct { ctrl *gomock.Controller recorder *MockCascadeTaskMockRecorder - isgomock struct{} } // MockCascadeTaskMockRecorder is the mock recorder for MockCascadeTask. @@ -88,7 +81,7 @@ func (m *MockCascadeTask) CleanupDownload(ctx context.Context, actionID string) } // CleanupDownload indicates an expected call of CleanupDownload. -func (mr *MockCascadeTaskMockRecorder) CleanupDownload(ctx, actionID any) *gomock.Call { +func (mr *MockCascadeTaskMockRecorder) CleanupDownload(ctx, actionID interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CleanupDownload", reflect.TypeOf((*MockCascadeTask)(nil).CleanupDownload), ctx, actionID) } @@ -102,7 +95,7 @@ func (m *MockCascadeTask) Download(ctx context.Context, req *cascade.DownloadReq } // Download indicates an expected call of Download. -func (mr *MockCascadeTaskMockRecorder) Download(ctx, req, send any) *gomock.Call { +func (mr *MockCascadeTaskMockRecorder) Download(ctx, req, send interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Download", reflect.TypeOf((*MockCascadeTask)(nil).Download), ctx, req, send) } @@ -116,7 +109,7 @@ func (m *MockCascadeTask) Register(ctx context.Context, req *cascade.RegisterReq } // Register indicates an expected call of Register. -func (mr *MockCascadeTaskMockRecorder) Register(ctx, req, send any) *gomock.Call { +func (mr *MockCascadeTaskMockRecorder) Register(ctx, req, send interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Register", reflect.TypeOf((*MockCascadeTask)(nil).Register), ctx, req, send) } diff --git a/supernode/services/cascade/register_test.go b/supernode/services/cascade/register_test.go index c73b96b7..6f56791a 100644 --- a/supernode/services/cascade/register_test.go +++ b/supernode/services/cascade/register_test.go @@ -21,8 +21,8 @@ import ( "github.com/cosmos/gogoproto/proto" "lukechampine.com/blake3" + "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" - "go.uber.org/mock/gomock" ) func TestCascadeRegistrationTask_Register(t *testing.T) { @@ -104,10 +104,10 @@ func TestCascadeRegistrationTask_Register(t *testing.T) { Metadata: codecpkg.Layout{Blocks: []codecpkg.Block{{BlockID: 1, Hash: "abc"}}}, }, nil) - // 8. Store artefacts (no metrics returned; recorded centrally) - p2p.EXPECT(). - StoreArtefacts(gomock.Any(), gomock.Any(), gomock.Any()). - Return(nil) + // 8. Store artefacts (no metrics returned; recorded centrally) + p2p.EXPECT(). + StoreArtefacts(gomock.Any(), gomock.Any(), gomock.Any()). + Return(nil) }, expectedError: "", expectedEvents: 12, diff --git a/supernode/services/cascade/service_test.go b/supernode/services/cascade/service_test.go index eaa7bf7f..bc2998ad 100644 --- a/supernode/services/cascade/service_test.go +++ b/supernode/services/cascade/service_test.go @@ -8,8 +8,8 @@ import ( "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade" cascadeadaptormocks "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors/mocks" "github.com/LumeraProtocol/supernode/v2/supernode/services/common" + "github.com/golang/mock/gomock" "github.com/stretchr/testify/assert" - "go.uber.org/mock/gomock" ) func TestNewCascadeService(t *testing.T) { diff --git a/tests/system/go.mod b/tests/system/go.mod index 15b8212d..e6eb3bba 100644 --- a/tests/system/go.mod +++ b/tests/system/go.mod @@ -95,6 +95,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/glog v1.2.4 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/mock v1.6.0 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect github.com/google/btree v1.1.3 // indirect github.com/google/flatbuffers v1.12.1 // indirect diff --git a/tests/system/go.sum b/tests/system/go.sum index 9ff0158a..6e9c0112 100644 --- a/tests/system/go.sum +++ b/tests/system/go.sum @@ -803,6 +803,7 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM= github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4= github.com/zondax/hid v0.9.2 h1:WCJFnEDMiqGF64nlZz28E9qLVZ0KSJ7xpc5DLEyma2U= @@ -888,6 +889,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -933,6 +935,7 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610= golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1025,6 +1028,7 @@ golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 74fb97edfb96133285a2552defb024bd2f2d6a31 Mon Sep 17 00:00:00 2001 From: j-rafique Date: Wed, 24 Sep 2025 17:31:22 +0500 Subject: [PATCH 06/15] Change test port --- supernode/cmd/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index 31c19b2a..9c186d1a 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -146,7 +146,7 @@ The supernode will connect to the Lumera network and begin participating in the isTestnet := strings.Contains(strings.ToLower(appConfig.LumeraClientConfig.ChainID), "testnet") if isTestnet { - profilingAddr := "0.0.0.0:6060" + profilingAddr := "0.0.0.0:6062" logtrace.Info(ctx, "Starting profiling server", logtrace.Fields{ "address": profilingAddr, From cf7f877fe0ceeb68fbcdab70ada6c60f8101ebeb Mon Sep 17 00:00:00 2001 From: Matee ullah Malik <46045452+mateeullahmalik@users.noreply.github.com> Date: Wed, 24 Sep 2025 17:53:31 +0500 Subject: [PATCH 07/15] Async events (#190) --- sdk/adapters/supernodeservice/adapter.go | 32 ++++++++- sdk/net/factory.go | 7 +- sdk/task/download.go | 66 +++---------------- .../server/cascade/cascade_action_server.go | 32 +++++++-- 4 files changed, 71 insertions(+), 66 deletions(-) diff --git a/sdk/adapters/supernodeservice/adapter.go b/sdk/adapters/supernodeservice/adapter.go index f9e9e6da..0d326a17 100644 --- a/sdk/adapters/supernodeservice/adapter.go +++ b/sdk/adapters/supernodeservice/adapter.go @@ -446,6 +446,7 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( bytesWritten int64 chunkIndex int startedEmitted bool + downloadStart time.Time ) // 3. Receive streamed responses @@ -509,7 +510,11 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( } } } - in.EventLogger(ctx, toSdkEvent(x.Event.EventType), x.Event.Message, edata) + // Avoid blocking Recv loop on event handling; dispatch asynchronously + evtType := toSdkEvent(x.Event.EventType) + go func(ed event.EventData, et event.EventType, msg string) { + in.EventLogger(ctx, et, msg, ed) + }(edata, evtType, x.Event.Message) } // 3b. Actual data chunk @@ -520,7 +525,10 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( } if !startedEmitted { if in.EventLogger != nil { - in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID}) + // mark start to compute throughput at completion + downloadStart = time.Now() + // Emit started asynchronously to avoid blocking + go in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID}) } startedEmitted = true } @@ -538,7 +546,25 @@ func (a *cascadeAdapter) CascadeSupernodeDownload( a.logger.Info(ctx, "download complete", "bytes_written", bytesWritten, "path", in.OutputPath, "action_id", in.ActionID) if in.EventLogger != nil { - in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{event.KeyActionID: in.ActionID, event.KeyOutputPath: in.OutputPath}) + // Compute metrics if we marked a start + var elapsed float64 + var throughput float64 + if !downloadStart.IsZero() { + elapsed = time.Since(downloadStart).Seconds() + mb := float64(bytesWritten) / (1024.0 * 1024.0) + if elapsed > 0 { + throughput = mb / elapsed + } + } + // Emit completion asynchronously with metrics + go in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{ + event.KeyActionID: in.ActionID, + event.KeyOutputPath: in.OutputPath, + event.KeyBytesTotal: bytesWritten, + event.KeyChunks: chunkIndex, + event.KeyElapsedSeconds: elapsed, + event.KeyThroughputMBS: throughput, + }) } return &CascadeSupernodeDownloadResponse{ Success: true, diff --git a/sdk/net/factory.go b/sdk/net/factory.go index b9fad9fd..f3486780 100644 --- a/sdk/net/factory.go +++ b/sdk/net/factory.go @@ -39,9 +39,10 @@ func NewClientFactory(ctx context.Context, logger log.Logger, keyring keyring.Ke // Tuned for 1GB max files with 4MB chunks // Reduce in-flight memory by aligning windows and msg sizes to chunk size. opts := client.DefaultClientOptions() - opts.MaxRecvMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead - opts.MaxSendMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead - opts.InitialWindowSize = 4 * 1024 * 1024 // 4MB per-stream window ≈ chunk size + opts.MaxRecvMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead + opts.MaxSendMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead + // Increase per-stream window to provide headroom for first data chunk + events + opts.InitialWindowSize = 12 * 1024 * 1024 // 8MB per-stream window opts.InitialConnWindowSize = 64 * 1024 * 1024 // 64MB per-connection window return &ClientFactory{ diff --git a/sdk/task/download.go b/sdk/task/download.go index 3e85007a..2c727ae9 100644 --- a/sdk/task/download.go +++ b/sdk/task/download.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "os" - "sort" "time" "github.com/LumeraProtocol/supernode/v2/sdk/adapters/lumera" @@ -77,51 +76,6 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern } } - // Optionally rank supernodes by available memory to improve success for large files - // We keep a short timeout per status fetch to avoid delaying downloads. - type rankedSN struct { - sn lumera.Supernode - availGB float64 - hasStatus bool - } - ranked := make([]rankedSN, 0, len(supernodes)) - for _, sn := range supernodes { - ranked = append(ranked, rankedSN{sn: sn}) - } - - // Probe supernode status with short timeouts and close clients promptly - for i := range ranked { - sn := ranked[i].sn - // 2s status timeout to keep this pass fast - stx, cancel := context.WithTimeout(ctx, 2*time.Second) - client, err := clientFactory.CreateClient(stx, sn) - if err != nil { - cancel() - continue - } - status, err := client.GetSupernodeStatus(stx) - _ = client.Close(stx) - cancel() - if err != nil { - continue - } - ranked[i].hasStatus = true - ranked[i].availGB = status.Resources.Memory.AvailableGB - } - - // Sort: nodes with status first, higher available memory first - sort.Slice(ranked, func(i, j int) bool { - if ranked[i].hasStatus != ranked[j].hasStatus { - return ranked[i].hasStatus && !ranked[j].hasStatus - } - return ranked[i].availGB > ranked[j].availGB - }) - - // Rebuild the supernodes list in the sorted order - for i := range ranked { - supernodes[i] = ranked[i].sn - } - // Try supernodes sequentially, one by one (now sorted) var lastErr error for idx, sn := range supernodes { @@ -146,8 +100,8 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern continue } - // Success; return to caller - return nil + // Success; return to caller + return nil } if lastErr != nil { @@ -176,15 +130,15 @@ func (t *CascadeDownloadTask) attemptDownload( t.LogEvent(ctx, evt, msg, data) } - resp, err := client.Download(ctx, req) - if err != nil { - return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err) - } - if !resp.Success { - return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message) - } + resp, err := client.Download(ctx, req) + if err != nil { + return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err) + } + if !resp.Success { + return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message) + } - return nil + return nil } // downloadResult holds the result of a successful download attempt diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go index a99fbf0a..449f4c42 100644 --- a/supernode/node/action/server/cascade/cascade_action_server.go +++ b/supernode/node/action/server/cascade/cascade_action_server.go @@ -313,7 +313,14 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS "chunk_size": chunkSize, }) - // Announce: file is ready to be served to the client + // Pre-read first chunk to avoid any delay between SERVE_READY and first data + buf := make([]byte, chunkSize) + n, readErr := f.Read(buf) + if readErr != nil && readErr != io.EOF { + return fmt.Errorf("chunked read failed: %w", readErr) + } + + // Announce: file is ready to be served to the client (right before first data) if err := stream.Send(&pb.DownloadResponse{ ResponseType: &pb.DownloadResponse_Event{ Event: &pb.DownloadEvent{ @@ -326,10 +333,27 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS return err } - // Stream the file in fixed-size chunks - buf := make([]byte, chunkSize) + // Send pre-read first chunk if available + if n > 0 { + if err := stream.Send(&pb.DownloadResponse{ + ResponseType: &pb.DownloadResponse_Chunk{ + Chunk: &pb.DataChunk{Data: buf[:n]}, + }, + }); err != nil { + logtrace.Error(ctx, "failed to stream first chunk", logtrace.Fields{logtrace.FieldError: err.Error()}) + return err + } + } + + // If EOF after first read, we're done + if readErr == io.EOF { + logtrace.Info(ctx, "completed streaming all chunks", fields) + return nil + } + + // Continue streaming remaining chunks for { - n, readErr := f.Read(buf) + n, readErr = f.Read(buf) if n > 0 { if err := stream.Send(&pb.DownloadResponse{ ResponseType: &pb.DownloadResponse_Chunk{ From 1f524bf39752989de190659c25116ef129a52bd5 Mon Sep 17 00:00:00 2001 From: j-rafique Date: Wed, 24 Sep 2025 18:37:05 +0500 Subject: [PATCH 08/15] Add profile script --- profile_cascade.sh | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100755 profile_cascade.sh diff --git a/profile_cascade.sh b/profile_cascade.sh new file mode 100755 index 00000000..7fe0af5e --- /dev/null +++ b/profile_cascade.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +# Cascade Download Heap Profiling Script +# Samples heap every 30 seconds during cascade downloads + +# Configuration - modify these as needed +PROFILE_URL="http://localhost:6062/debug/pprof/heap" +INTERVAL=30 +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +PROFILE_DIR="profiles_${TIMESTAMP}" + +# Allow override via command line +if [ "$1" != "" ]; then + PROFILE_URL="$1" +fi + +echo "=== Cascade Heap Profiling ===" +echo "Profile URL: $PROFILE_URL" +echo "Interval: ${INTERVAL}s" +echo "Output Dir: $PROFILE_DIR" +echo + +# Create profile directory +mkdir -p "$PROFILE_DIR" +cd "$PROFILE_DIR" + +# Test connection first +echo "Testing connection to profiling server..." +if ! curl -s --fail "$PROFILE_URL" > /dev/null; then + echo "ERROR: Cannot connect to profiling server at $PROFILE_URL" + echo "Make sure your supernode is running on testnet!" + exit 1 +fi + +echo "✓ Connected to profiling server" +echo + +# Take baseline +echo "Taking baseline heap snapshot..." +curl -s -o "heap_00s.prof" "$PROFILE_URL" +echo "✓ Baseline saved: heap_00s.prof" +echo + +echo "*** NOW START YOUR CASCADE DOWNLOAD ***" +echo "Press ENTER when download has started..." +read + +echo "Starting heap profiling every ${INTERVAL}s..." +echo "Press Ctrl+C to stop" +echo + +# Counter for snapshots +counter=1 + +# Function to handle cleanup on exit +cleanup() { + echo + echo "Profiling stopped. Taking final snapshot..." + final_elapsed=$((counter * INTERVAL)) + curl -s -o "heap_${final_elapsed}s_final.prof" "$PROFILE_URL" + + echo + echo "=== Profiling Complete ===" + echo "Location: $(pwd)" + echo "Files created:" + ls -la *.prof + echo + echo "Analysis commands:" + echo "# Compare baseline to final:" + echo "go tool pprof -http=:8080 -base heap_00s.prof heap_${final_elapsed}s_final.prof" + exit 0 +} + +# Set up signal handler +trap cleanup INT TERM + +# Main profiling loop +while true; do + sleep $INTERVAL + + elapsed=$((counter * INTERVAL)) + minutes=$((elapsed / 60)) + seconds=$((elapsed % 60)) + + timestamp=$(date +%H:%M:%S) + filename="heap_${elapsed}s.prof" + + echo "[$timestamp] Taking snapshot $counter (${minutes}m ${seconds}s elapsed)..." + + if curl -s -o "$filename" "$PROFILE_URL"; then + size=$(ls -lh "$filename" | awk '{print $5}') + echo "✓ Saved: $filename ($size)" + else + echo "✗ Failed to get snapshot $counter" + fi + + ((counter++)) +done \ No newline at end of file From b123dcefab57927edb7e398fd870bbe75c6948bb Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Wed, 24 Sep 2025 19:04:29 +0500 Subject: [PATCH 09/15] Change test port --- supernode/cmd/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index 9c186d1a..92ccc700 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -146,7 +146,7 @@ The supernode will connect to the Lumera network and begin participating in the isTestnet := strings.Contains(strings.ToLower(appConfig.LumeraClientConfig.ChainID), "testnet") if isTestnet { - profilingAddr := "0.0.0.0:6062" + profilingAddr := "0.0.0.0:8082" logtrace.Info(ctx, "Starting profiling server", logtrace.Fields{ "address": profilingAddr, From 3c66950c37aa46711219d0e86ee73efffc027eb1 Mon Sep 17 00:00:00 2001 From: j-rafique Date: Thu, 25 Sep 2025 16:27:39 +0500 Subject: [PATCH 10/15] Disable Metrics --- p2p/kademlia/dht.go | 78 +++++++++++-------- p2p/p2p.go | 61 ++++++++------- supernode/cmd/start.go | 42 +++++----- supernode/services/cascade/adaptors/p2p.go | 17 ++-- supernode/services/cascade/config.go | 6 +- supernode/services/cascade/download.go | 35 +++++---- supernode/services/cascade/register.go | 14 ++-- supernode/services/cascade/service.go | 14 ++-- tests/integration/p2p/p2p_integration_test.go | 5 +- 9 files changed, 154 insertions(+), 118 deletions(-) diff --git a/p2p/kademlia/dht.go b/p2p/kademlia/dht.go index 69c45023..b83fb34e 100644 --- a/p2p/kademlia/dht.go +++ b/p2p/kademlia/dht.go @@ -42,9 +42,10 @@ const ( delKeysCountThreshold = 10 lowSpaceThreshold = 50 // GB batchRetrieveSize = 1000 - storeSameSymbolsBatchConcurrency = 3 - fetchSymbolsBatchConcurrency = 6 - minimumDataStoreSuccessRate = 75.0 + + storeSameSymbolsBatchConcurrency = 3 + fetchSymbolsBatchConcurrency = 6 + minimumDataStoreSuccessRate = 75.0 maxIterations = 4 macConcurrentNetworkStoreCalls = 16 @@ -124,7 +125,7 @@ func (s *DHT) ConnPoolSnapshot() map[string]int64 { // Options contains configuration options for the queries node type Options struct { - ID []byte + ID []byte // The queries IPv4 or IPv6 address IP string @@ -139,8 +140,11 @@ type Options struct { // Lumera client for interacting with the blockchain LumeraClient lumera.Client - // Keyring for credentials - Keyring keyring.Keyring + // Keyring for credentials + Keyring keyring.Keyring + + // MetricsDisabled gates DHT-level metrics emission (p2pmetrics hooks and snapshots) + MetricsDisabled bool } // NewDHT returns a new DHT node @@ -739,7 +743,9 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, return nil, fmt.Errorf("fetch and add local keys: %v", err) } // Report how many were found locally, for event metrics - p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount)) + if !s.options.MetricsDisabled { + p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount)) + } if foundLocalCount >= required { return result, nil } @@ -788,7 +794,9 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, // Record batch retrieve stats for internal DHT snapshot window s.metrics.RecordBatchRetrieve(len(keys), int(required), int(foundLocalCount), netFound, time.Since(start)) // Also feed retrieve counts into the per-task collector for stream events - p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds()) + if !s.options.MetricsDisabled { + p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds()) + } return result, nil } @@ -946,14 +954,16 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, } mu.Unlock() // record failed RPC per-node - p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: node.IP, - Address: node.String(), - Keys: 0, - Success: false, - Error: err.Error(), - DurationMS: time.Since(callStart).Milliseconds(), - }) + if !s.options.MetricsDisabled { + p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: node.IP, + Address: node.String(), + Keys: 0, + Success: false, + Error: err.Error(), + DurationMS: time.Since(callStart).Milliseconds(), + }) + } return } @@ -976,14 +986,16 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, } // record successful RPC per-node (returned may be 0). Success is true when no error. - p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: node.IP, - Address: node.String(), - Keys: returned, - Success: true, - Error: "", - DurationMS: time.Since(callStart).Milliseconds(), - }) + if !s.options.MetricsDisabled { + p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: node.IP, + Address: node.String(), + Keys: returned, + Success: true, + Error: "", + DurationMS: time.Since(callStart).Milliseconds(), + }) + } }(node, nodeID) } @@ -1713,14 +1725,16 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i } // Emit per-node store RPC call via metrics bridge (no P2P API coupling) - p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: nodeIP, - Address: nodeAddr, - Keys: response.KeysCount, - Success: errMsg == "" && response.Error == nil, - Error: errMsg, - DurationMS: response.DurationMS, - }) + if !s.options.MetricsDisabled { + p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: nodeIP, + Address: nodeAddr, + Keys: response.KeysCount, + Success: errMsg == "" && response.Error == nil, + Error: errMsg, + DurationMS: response.DurationMS, + }) + } } diff --git a/p2p/p2p.go b/p2p/p2p.go index e3d6b40a..006c469a 100644 --- a/p2p/p2p.go +++ b/p2p/p2p.go @@ -40,14 +40,15 @@ type P2P interface { // p2p structure to implements interface type p2p struct { - store kademlia.Store // the store for kademlia network - metaStore kademlia.MetaStore - dht *kademlia.DHT // the kademlia network - config *Config // the service configuration - running bool // if the kademlia network is ready - lumeraClient lumera.Client - keyring keyring.Keyring // Add the keyring field - rqstore rqstore.Store + store kademlia.Store // the store for kademlia network + metaStore kademlia.MetaStore + dht *kademlia.DHT // the kademlia network + config *Config // the service configuration + running bool // if the kademlia network is ready + lumeraClient lumera.Client + keyring keyring.Keyring // Add the keyring field + rqstore rqstore.Store + metricsDisabled bool } // Run the kademlia network @@ -226,14 +227,15 @@ func (s *p2p) NClosestNodesWithIncludingNodeList(ctx context.Context, n int, key // configure the distributed hash table for p2p service func (s *p2p) configure(ctx context.Context) error { // new the queries storage - kadOpts := &kademlia.Options{ - LumeraClient: s.lumeraClient, - Keyring: s.keyring, // Pass the keyring - BootstrapNodes: []*kademlia.Node{}, - IP: s.config.ListenAddress, - Port: s.config.Port, - ID: []byte(s.config.ID), - } + kadOpts := &kademlia.Options{ + LumeraClient: s.lumeraClient, + Keyring: s.keyring, // Pass the keyring + BootstrapNodes: []*kademlia.Node{}, + IP: s.config.ListenAddress, + Port: s.config.Port, + ID: []byte(s.config.ID), + MetricsDisabled: s.metricsDisabled, + } if len(kadOpts.ID) == 0 { errors.Errorf("node id is empty") @@ -251,25 +253,26 @@ func (s *p2p) configure(ctx context.Context) error { } // New returns a new p2p instance. -func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr keyring.Keyring, rqstore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore) (P2P, error) { - store, err := sqlite.NewStore(ctx, config.DataDir, cloud, mst) - if err != nil { - return nil, errors.Errorf("new kademlia store: %w", err) - } +func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr keyring.Keyring, rqstore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore, metricsDisabled bool) (P2P, error) { + store, err := sqlite.NewStore(ctx, config.DataDir, cloud, mst) + if err != nil { + return nil, errors.Errorf("new kademlia store: %w", err) + } meta, err := meta.NewStore(ctx, config.DataDir) if err != nil { return nil, errors.Errorf("new kademlia meta store: %w", err) } - return &p2p{ - store: store, - metaStore: meta, - config: config, - lumeraClient: lumeraClient, - keyring: kr, // Store the keyring - rqstore: rqstore, - }, nil + return &p2p{ + store: store, + metaStore: meta, + config: config, + lumeraClient: lumeraClient, + keyring: kr, // Store the keyring + rqstore: rqstore, + metricsDisabled: metricsDisabled, + }, nil } // LocalStore store data into the kademlia network diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index 92ccc700..eaf1339e 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -84,11 +84,14 @@ The supernode will connect to the Lumera network and begin participating in the logtrace.Fatal(ctx, "Failed to initialize RaptorQ store", logtrace.Fields{"error": err.Error()}) } - // Initialize P2P service - p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil) - if err != nil { - logtrace.Fatal(ctx, "Failed to initialize P2P service", logtrace.Fields{"error": err.Error()}) - } + // Manually set the disable flag at the highest level + disableMetrics := true + + // Initialize P2P service with explicit disable flag + p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil, disableMetrics) + if err != nil { + logtrace.Fatal(ctx, "Failed to initialize P2P service", logtrace.Fields{"error": err.Error()}) + } // Initialize the supernode supernodeInstance, err := NewSupernode(ctx, appConfig, kr, p2pService, rqStore, lumeraClient) @@ -97,18 +100,19 @@ The supernode will connect to the Lumera network and begin participating in the } // Configure cascade service - cService := cascadeService.NewCascadeService( - &cascadeService.Config{ - Config: common.Config{ - SupernodeAccountAddress: appConfig.SupernodeConfig.Identity, - }, - RqFilesDir: appConfig.GetRaptorQFilesDir(), - }, - lumeraClient, - *p2pService, - codec.NewRaptorQCodec(appConfig.GetRaptorQFilesDir()), - rqStore, - ) + cService := cascadeService.NewCascadeService( + &cascadeService.Config{ + Config: common.Config{ + SupernodeAccountAddress: appConfig.SupernodeConfig.Identity, + }, + RqFilesDir: appConfig.GetRaptorQFilesDir(), + MetricsDisabled: disableMetrics, + }, + lumeraClient, + *p2pService, + codec.NewRaptorQCodec(appConfig.GetRaptorQFilesDir()), + rqStore, + ) // Create cascade action server cascadeActionServer := cascade.NewCascadeActionServer(cService) @@ -190,7 +194,7 @@ func init() { } // initP2PService initializes the P2P service -func initP2PService(ctx context.Context, config *config.Config, lumeraClient lumera.Client, kr cKeyring.Keyring, rqStore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore) (*p2p.P2P, error) { +func initP2PService(ctx context.Context, config *config.Config, lumeraClient lumera.Client, kr cKeyring.Keyring, rqStore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore, metricsDisabled bool) (*p2p.P2P, error) { // Get the supernode address from the keyring keyInfo, err := kr.Key(config.SupernodeConfig.KeyName) if err != nil { @@ -206,7 +210,7 @@ func initP2PService(ctx context.Context, config *config.Config, lumeraClient lum logtrace.Info(ctx, "Initializing P2P service", logtrace.Fields{"address": p2pConfig.ListenAddress, "port": p2pConfig.Port, "data_dir": p2pConfig.DataDir, "supernode_id": address.String()}) - p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst) + p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst, metricsDisabled) if err != nil { return nil, fmt.Errorf("failed to initialize p2p service: %w", err) } diff --git a/supernode/services/cascade/adaptors/p2p.go b/supernode/services/cascade/adaptors/p2p.go index 116d6810..a29e2b99 100644 --- a/supernode/services/cascade/adaptors/p2p.go +++ b/supernode/services/cascade/adaptors/p2p.go @@ -39,13 +39,14 @@ type P2PService interface { // p2pImpl is the default implementation of the P2PService interface. type p2pImpl struct { - p2p p2p.Client - rqStore rqstore.Store + p2p p2p.Client + rqStore rqstore.Store + metricsDisabled bool } // NewP2PService returns a concrete implementation of P2PService. -func NewP2PService(client p2p.Client, store rqstore.Store) P2PService { - return &p2pImpl{p2p: client, rqStore: store} +func NewP2PService(client p2p.Client, store rqstore.Store, metricsDisabled bool) P2PService { + return &p2pImpl{p2p: client, rqStore: store, metricsDisabled: metricsDisabled} } type StoreArtefactsRequest struct { @@ -58,9 +59,11 @@ type StoreArtefactsRequest struct { func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, f logtrace.Fields) error { logtrace.Info(ctx, "About to store artefacts (metadata + symbols)", logtrace.Fields{"taskID": req.TaskID, "id_files": len(req.IDFiles)}) - // Enable per-node store RPC capture for this task - cm.StartStoreCapture(req.TaskID) - defer cm.StopStoreCapture(req.TaskID) + // Optionally enable per-node store RPC capture for this task + if !p.metricsDisabled { + cm.StartStoreCapture(req.TaskID) + defer cm.StopStoreCapture(req.TaskID) + } start := time.Now() firstPassSymbols, totalSymbols, err := p.storeCascadeSymbolsAndData(ctx, req.TaskID, req.ActionID, req.SymbolsDir, req.IDFiles) diff --git a/supernode/services/cascade/config.go b/supernode/services/cascade/config.go index 7a0f1ef2..01401d41 100644 --- a/supernode/services/cascade/config.go +++ b/supernode/services/cascade/config.go @@ -8,6 +8,8 @@ import ( type Config struct { common.Config `mapstructure:",squash" json:"-"` - RaptorQServiceAddress string `mapstructure:"-" json:"-"` - RqFilesDir string `mapstructure:"rq_files_dir" json:"rq_files_dir,omitempty"` + RaptorQServiceAddress string `mapstructure:"-" json:"-"` + RqFilesDir string `mapstructure:"rq_files_dir" json:"rq_files_dir,omitempty"` + // MetricsDisabled toggles upload/download metrics for cascade service + MetricsDisabled bool `mapstructure:"-" json:"-"` } diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go index 363834bc..9da3dc1e 100644 --- a/supernode/services/cascade/download.go +++ b/supernode/services/cascade/download.go @@ -165,9 +165,11 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( } logtrace.Info(ctx, "Retrieving target-required symbols for decode", fields) - // Enable retrieve metrics capture for this action - cm.StartRetrieveCapture(actionID) - defer cm.StopRetrieveCapture(actionID) + + if !task.config.MetricsDisabled { + cm.StartRetrieveCapture(actionID) + defer cm.StopRetrieveCapture(actionID) + } // Measure symbols batch retrieve duration retrieveStart := time.Now() @@ -201,17 +203,22 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( } decodeMS := time.Since(decodeStart).Milliseconds() - // Set minimal retrieve summary and emit event strictly from internal collector - cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS) - payload := cm.BuildDownloadEventPayloadFromCollector(actionID) - if retrieve, ok := payload["retrieve"].(map[string]any); ok { - retrieve["target_required_percent"] = targetRequiredPercent - retrieve["target_required_count"] = targetRequiredCount - retrieve["total_symbols"] = totalSymbols - } - if b, err := json.MarshalIndent(payload, "", " "); err == nil { - task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send) - } + // Set minimal retrieve summary and emit event strictly from internal collector + if !task.config.MetricsDisabled { + cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS) + payload := cm.BuildDownloadEventPayloadFromCollector(actionID) + if retrieve, ok := payload["retrieve"].(map[string]any); ok { + retrieve["target_required_percent"] = targetRequiredPercent + retrieve["target_required_count"] = targetRequiredCount + retrieve["total_symbols"] = totalSymbols + } + if b, err := json.MarshalIndent(payload, "", " "); err == nil { + task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send) + } + } else { + // Send minimal hardcoded event when metrics disabled + task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, "Download completed (metrics disabled)", "", "", send) + } fileHash, err := crypto.HashFileIncrementally(decodeInfo.FilePath, 0) if err != nil { diff --git a/supernode/services/cascade/register.go b/supernode/services/cascade/register.go index dd6e1e77..4739e0d9 100644 --- a/supernode/services/cascade/register.go +++ b/supernode/services/cascade/register.go @@ -1,11 +1,11 @@ package cascade import ( - "context" - "os" + "context" + "os" - "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" - "github.com/LumeraProtocol/supernode/v2/supernode/services/common" + "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" + "github.com/LumeraProtocol/supernode/v2/supernode/services/common" ) // RegisterRequest contains parameters for upload request @@ -162,8 +162,10 @@ func (task *CascadeRegistrationTask) Register( if err := task.storeArtefacts(ctx, action.ActionID, rqidResp.RedundantMetadataFiles, encResp.SymbolsDir, fields); err != nil { return err } - // Emit compact analytics payload from centralized metrics collector - task.emitArtefactsStored(ctx, fields, encResp.Metadata, send) + // Emit compact analytics payload from centralized metrics collector (optional) + if !task.config.MetricsDisabled { + task.emitArtefactsStored(ctx, fields, encResp.Metadata, send) + } resp, err := task.LumeraClient.FinalizeAction(ctx, action.ActionID, rqidResp.RQIDs) if err != nil { diff --git a/supernode/services/cascade/service.go b/supernode/services/cascade/service.go index a1d9898b..b5b2870a 100644 --- a/supernode/services/cascade/service.go +++ b/supernode/services/cascade/service.go @@ -56,11 +56,11 @@ func (service *CascadeService) GetRunningTasks() []string { // NewCascadeService returns a new CascadeService instance func NewCascadeService(config *Config, lumera lumera.Client, p2pClient p2p.Client, codec codec.Codec, rqstore rqstore.Store) *CascadeService { - return &CascadeService{ - config: config, - SuperNodeService: base.NewSuperNodeService(p2pClient), - LumeraClient: adaptors.NewLumeraClient(lumera), - P2P: adaptors.NewP2PService(p2pClient, rqstore), - RQ: adaptors.NewCodecService(codec), - } + return &CascadeService{ + config: config, + SuperNodeService: base.NewSuperNodeService(p2pClient), + LumeraClient: adaptors.NewLumeraClient(lumera), + P2P: adaptors.NewP2PService(p2pClient, rqstore, config.MetricsDisabled), + RQ: adaptors.NewCodecService(codec), + } } diff --git a/tests/integration/p2p/p2p_integration_test.go b/tests/integration/p2p/p2p_integration_test.go index bce71f58..d5df6dc2 100644 --- a/tests/integration/p2p/p2p_integration_test.go +++ b/tests/integration/p2p/p2p_integration_test.go @@ -108,7 +108,7 @@ func TestP2PBasicIntegration(t *testing.T) { // Add debug logging log.Printf("Storing batch with keys: %v", expectedKeys) - err := services[0].StoreBatch(ctx, batchData, 0, taskID) + err := services[0].StoreBatch(ctx, batchData, 0, taskID) require.NoError(t, err) // Add immediate verification @@ -203,7 +203,8 @@ func SetupTestP2PNodes(t *testing.T, ctx context.Context) ([]p2p.Client, []*rqst require.NoError(t, err, "failed to create rqstore for node %d: %v", i, err) rqStores = append(rqStores, rqStore) - service, err := p2p.New(ctx, p2pConfig, mockClient, kr, rqStore, nil, nil) + // Disable metrics in integration tests by default + service, err := p2p.New(ctx, p2pConfig, mockClient, kr, rqStore, nil, nil, true) require.NoError(t, err, "failed to create p2p service for node %d: %v", i, err) // Start P2P service From c53d7c32d71ff5c1b0f4607833104c77512353a2 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Mon, 29 Sep 2025 19:28:29 +0500 Subject: [PATCH 11/15] Silence all logs --- p2p/kademlia/bootstrap.go | 2 +- p2p/kademlia/dht.go | 174 +++++++++--------- p2p/kademlia/fetch_and_store.go | 32 ++-- p2p/kademlia/network.go | 32 ++-- p2p/kademlia/node_activity.go | 4 +- p2p/kademlia/redundant_data.go | 14 +- p2p/kademlia/replication.go | 34 ++-- p2p/kademlia/rq_symbols.go | 8 +- p2p/kademlia/store/meta/meta.go | 10 +- p2p/kademlia/store/sqlite/meta_worker.go | 26 +-- p2p/kademlia/store/sqlite/sqlite.go | 16 +- p2p/p2p.go | 70 +++---- pkg/codec/decode.go | 104 +++++------ pkg/codec/raptorq.go | 6 +- pkg/common/task/task.go | 6 +- pkg/common/task/worker.go | 2 +- pkg/dd/image_rareness.go | 4 +- pkg/dd/status.go | 4 +- pkg/lumera/connection.go | 2 +- pkg/lumera/modules/auth/impl.go | 4 +- pkg/lumera/modules/tx/impl.go | 10 +- pkg/net/grpc/server/server.go | 4 +- pkg/net/interceptor.go | 4 +- pkg/storage/queries/health_check.go | 6 +- pkg/storage/queries/self_healing.go | 10 +- pkg/storage/queries/storage_challenge.go | 6 +- pkg/storage/queries/task_history.go | 2 +- sdk/task/task.go | 2 +- supernode/cmd/service.go | 2 +- supernode/cmd/start.go | 54 +++--- supernode/cmd/supernode.go | 8 +- .../server/cascade/cascade_action_server.go | 24 +-- supernode/node/supernode/gateway/server.go | 4 +- supernode/node/supernode/server/server.go | 6 +- supernode/services/cascade/adaptors/p2p.go | 34 ++-- supernode/services/cascade/download.go | 63 ++++--- supernode/services/cascade/helper.go | 32 ++-- supernode/services/cascade/register.go | 50 ++--- .../services/common/base/supernode_service.go | 2 +- .../services/common/base/supernode_task.go | 2 +- supernode/services/common/storage/handler.go | 12 +- .../services/common/supernode/service.go | 2 +- supernode/services/verifier/verifier.go | 2 +- 43 files changed, 447 insertions(+), 448 deletions(-) diff --git a/p2p/kademlia/bootstrap.go b/p2p/kademlia/bootstrap.go index 5b29f44d..25dc3b54 100644 --- a/p2p/kademlia/bootstrap.go +++ b/p2p/kademlia/bootstrap.go @@ -102,7 +102,7 @@ func (s *DHT) setBootstrapNodesFromConfigVar(ctx context.Context, bootstrapNodes }) } s.options.BootstrapNodes = nodes - logtrace.Info(ctx, "Bootstrap nodes set from config var", logtrace.Fields{ + logtrace.Debug(ctx, "Bootstrap nodes set from config var", logtrace.Fields{ logtrace.FieldModule: "p2p", "bootstrap_nodes": nodes, }) diff --git a/p2p/kademlia/dht.go b/p2p/kademlia/dht.go index b83fb34e..df2be5c7 100644 --- a/p2p/kademlia/dht.go +++ b/p2p/kademlia/dht.go @@ -43,9 +43,9 @@ const ( lowSpaceThreshold = 50 // GB batchRetrieveSize = 1000 - storeSameSymbolsBatchConcurrency = 3 - fetchSymbolsBatchConcurrency = 6 - minimumDataStoreSuccessRate = 75.0 + storeSameSymbolsBatchConcurrency = 3 + fetchSymbolsBatchConcurrency = 6 + minimumDataStoreSuccessRate = 75.0 maxIterations = 4 macConcurrentNetworkStoreCalls = 16 @@ -104,7 +104,7 @@ func (s *DHT) bootstrapIgnoreList(ctx context.Context) error { } if added > 0 { - logtrace.Info(ctx, "Ignore list bootstrapped from replication info", logtrace.Fields{ + logtrace.Debug(ctx, "Ignore list bootstrapped from replication info", logtrace.Fields{ logtrace.FieldModule: "p2p", "ignored_count": added, }) @@ -125,7 +125,7 @@ func (s *DHT) ConnPoolSnapshot() map[string]int64 { // Options contains configuration options for the queries node type Options struct { - ID []byte + ID []byte // The queries IPv4 or IPv6 address IP string @@ -140,11 +140,11 @@ type Options struct { // Lumera client for interacting with the blockchain LumeraClient lumera.Client - // Keyring for credentials - Keyring keyring.Keyring + // Keyring for credentials + Keyring keyring.Keyring - // MetricsDisabled gates DHT-level metrics emission (p2pmetrics hooks and snapshots) - MetricsDisabled bool + // MetricsDisabled gates DHT-level metrics emission (p2pmetrics hooks and snapshots) + MetricsDisabled bool } // NewDHT returns a new DHT node @@ -362,7 +362,7 @@ func (s *DHT) Store(ctx context.Context, data []byte, typ int) (string, error) { // measured success rate for node RPCs is below the configured minimum, an error // is returned. Metrics are not returned through the API. func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID string) error { - logtrace.Info(ctx, "Store DB batch begin", logtrace.Fields{ + logtrace.Debug(ctx, "Store DB batch begin", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, "records": len(values), @@ -370,7 +370,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s if err := s.store.StoreBatch(ctx, values, typ, true); err != nil { return fmt.Errorf("store batch: %v", err) } - logtrace.Info(ctx, "Store DB batch done, store network batch begin", logtrace.Fields{ + logtrace.Debug(ctx, "Store DB batch done, store network batch begin", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, }) @@ -380,7 +380,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s return fmt.Errorf("iterate batch store: %v", err) } - logtrace.Info(ctx, "Store network batch workers done", logtrace.Fields{ + logtrace.Debug(ctx, "Store network batch workers done", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, }) @@ -426,13 +426,13 @@ func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]by return nil, errors.Errorf("retrieve from peer: %w", err) } if len(peerValue) > 0 { - logtrace.Info(ctx, "Not found locally, retrieved from other nodes", logtrace.Fields{ + logtrace.Debug(ctx, "Not found locally, retrieved from other nodes", logtrace.Fields{ logtrace.FieldModule: "dht", "key": dbKey, "data_len": len(peerValue), }) } else { - logtrace.Info(ctx, "Not found locally, not found in other nodes", logtrace.Fields{ + logtrace.Debug(ctx, "Not found locally, not found in other nodes", logtrace.Fields{ logtrace.FieldModule: "dht", "key": dbKey, }) @@ -531,7 +531,7 @@ func (s *DHT) GetValueFromNode(ctx context.Context, target []byte, n *Node) ([]b response, err := s.network.Call(cctx, request, false) if err != nil { - logtrace.Info(ctx, "Network call request failed", logtrace.Fields{ + logtrace.Debug(ctx, "Network call request failed", logtrace.Fields{ logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "request": request.String(), @@ -573,7 +573,7 @@ func (s *DHT) doMultiWorkers(ctx context.Context, iterativeType int, target []by // update the running goroutines number++ - logtrace.Info(ctx, "Start work for node", logtrace.Fields{ + logtrace.Debug(ctx, "Start work for node", logtrace.Fields{ logtrace.FieldModule: "p2p", "iterate_type": iterativeType, "node": node.String(), @@ -600,7 +600,7 @@ func (s *DHT) doMultiWorkers(ctx context.Context, iterativeType int, target []by // send the request and receive the response response, err := s.network.Call(ctx, request, false) if err != nil { - logtrace.Info(ctx, "Network call request failed", logtrace.Fields{ + logtrace.Debug(ctx, "Network call request failed", logtrace.Fields{ logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "request": request.String(), @@ -637,7 +637,7 @@ func (s *DHT) fetchAndAddLocalKeys(ctx context.Context, hexKeys []string, result batchHexKeys := hexKeys[start:end] - logtrace.Info(ctx, "Processing batch of local keys", logtrace.Fields{ + logtrace.Debug(ctx, "Processing batch of local keys", logtrace.Fields{ logtrace.FieldModule: "dht", "batch_size": len(batchHexKeys), "total_keys": len(hexKeys), @@ -743,9 +743,9 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, return nil, fmt.Errorf("fetch and add local keys: %v", err) } // Report how many were found locally, for event metrics - if !s.options.MetricsDisabled { - p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount)) - } + if !s.options.MetricsDisabled { + p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount)) + } if foundLocalCount >= required { return result, nil } @@ -794,9 +794,9 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, // Record batch retrieve stats for internal DHT snapshot window s.metrics.RecordBatchRetrieve(len(keys), int(required), int(foundLocalCount), netFound, time.Since(start)) // Also feed retrieve counts into the per-task collector for stream events - if !s.options.MetricsDisabled { - p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds()) - } + if !s.options.MetricsDisabled { + p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds()) + } return result, nil } @@ -907,7 +907,7 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, defer cancel() for nodeID, node := range nodes { if s.ignorelist.Banned(node) { - logtrace.Info(ctx, "Ignore banned node in iterate batch get values", logtrace.Fields{ + logtrace.Debug(ctx, "Ignore banned node in iterate batch get values", logtrace.Fields{ logtrace.FieldModule: "dht", "node": node.String(), }) @@ -954,16 +954,16 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, } mu.Unlock() // record failed RPC per-node - if !s.options.MetricsDisabled { - p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: node.IP, - Address: node.String(), - Keys: 0, - Success: false, - Error: err.Error(), - DurationMS: time.Since(callStart).Milliseconds(), - }) - } + if !s.options.MetricsDisabled { + p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: node.IP, + Address: node.String(), + Keys: 0, + Success: false, + Error: err.Error(), + DurationMS: time.Since(callStart).Milliseconds(), + }) + } return } @@ -986,22 +986,22 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, } // record successful RPC per-node (returned may be 0). Success is true when no error. - if !s.options.MetricsDisabled { - p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: node.IP, - Address: node.String(), - Keys: returned, - Success: true, - Error: "", - DurationMS: time.Since(callStart).Milliseconds(), - }) - } + if !s.options.MetricsDisabled { + p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: node.IP, + Address: node.String(), + Keys: returned, + Success: true, + Error: "", + DurationMS: time.Since(callStart).Milliseconds(), + }) + } }(node, nodeID) } wg.Wait() - logtrace.Info(ctx, "Iterate batch get values done", logtrace.Fields{ + logtrace.Debug(ctx, "Iterate batch get values done", logtrace.Fields{ logtrace.FieldModule: "dht", "found_count": atomic.LoadInt32(&foundCount), }) @@ -1083,7 +1083,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // find the closest contacts for the target node from queries route tables nl, _ := s.ht.closestContacts(Alpha, target, igList) if len(igList) > 0 { - logtrace.Info(ctx, "Closest contacts", logtrace.Fields{ + logtrace.Debug(ctx, "Closest contacts", logtrace.Fields{ logtrace.FieldModule: "p2p", "nodes": nl.String(), "ignored": s.ignorelist.String(), @@ -1093,7 +1093,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat if nl.Len() == 0 { return nil, nil } - logtrace.Info(ctx, "Iterate start", logtrace.Fields{ + logtrace.Debug(ctx, "Iterate start", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "type": iterativeType, @@ -1107,7 +1107,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat if iterativeType == IterateFindNode { hashedTargetID, _ := utils.Blake3Hash(target) bucket := s.ht.bucketIndex(s.ht.self.HashedID, hashedTargetID) - logtrace.Info(ctx, "Bucket for target", logtrace.Fields{ + logtrace.Debug(ctx, "Bucket for target", logtrace.Fields{ logtrace.FieldModule: "p2p", "target": sKey, }) @@ -1131,7 +1131,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // Set a maximum number of iterations to prevent indefinite looping maxIterations := 5 // Adjust the maximum iterations as needed - logtrace.Info(ctx, "Begin iteration", logtrace.Fields{ + logtrace.Debug(ctx, "Begin iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1142,7 +1142,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat case <-ctx.Done(): return nil, fmt.Errorf("iterate cancelled: %w", ctx.Err()) case <-timeout: - logtrace.Info(ctx, "Iteration timed out", logtrace.Fields{ + logtrace.Debug(ctx, "Iteration timed out", logtrace.Fields{ logtrace.FieldModule: "p2p", }) return nil, nil @@ -1174,7 +1174,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // Stop search if no more nodes to contact if !searchRest && len(nl.Nodes) == 0 { - logtrace.Info(ctx, "Search stopped", logtrace.Fields{ + logtrace.Debug(ctx, "Search stopped", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1186,7 +1186,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat nl.Comparator = target nl.Sort() - logtrace.Info(ctx, "Iterate sorted nodes", logtrace.Fields{ + logtrace.Debug(ctx, "Iterate sorted nodes", logtrace.Fields{ logtrace.FieldModule: "p2p", "id": base58.Encode(s.ht.self.ID), "iterate": iterativeType, @@ -1223,7 +1223,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat } } - logtrace.Info(ctx, "Finish iteration without results", logtrace.Fields{ + logtrace.Debug(ctx, "Finish iteration without results", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1244,7 +1244,7 @@ func (s *DHT) handleResponses(ctx context.Context, responses <-chan *Message, nl v, ok := response.Data.(*FindValueResponse) if ok { if v.Status.Result == ResultOk && len(v.Value) > 0 { - logtrace.Info(ctx, "Iterate found value from network", logtrace.Fields{ + logtrace.Debug(ctx, "Iterate found value from network", logtrace.Fields{ logtrace.FieldModule: "p2p", }) return nl, v.Value @@ -1274,7 +1274,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] // nl will have the closest nodes to the target value, it will ignore the nodes in igList nl, _ := s.ht.closestContacts(Alpha, target, igList) if len(igList) > 0 { - logtrace.Info(ctx, "Closest contacts", logtrace.Fields{ + logtrace.Debug(ctx, "Closest contacts", logtrace.Fields{ logtrace.FieldModule: "p2p", "nodes": nl.String(), "ignored": s.ignorelist.String(), @@ -1289,7 +1289,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] searchRest := false // keep track of contacted nodes so that we don't hit them again contacted := make(map[string]bool) - logtrace.Info(ctx, "Begin iteration", logtrace.Fields{ + logtrace.Debug(ctx, "Begin iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1298,7 +1298,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] var closestNode *Node var iterationCount int for iterationCount = 0; iterationCount < maxIterations; iterationCount++ { - logtrace.Info(ctx, "Begin find value", logtrace.Fields{ + logtrace.Debug(ctx, "Begin find value", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "nl": nl.Len(), @@ -1318,7 +1318,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] // if the closest node is the same as the last iteration and we don't want to search rest of nodes, we are done if !searchRest && (closestNode != nil && bytes.Equal(nl.Nodes[0].ID, closestNode.ID)) { - logtrace.Info(ctx, "Closest node is the same as the last iteration", logtrace.Fields{ + logtrace.Debug(ctx, "Closest node is the same as the last iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1337,7 +1337,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] nl.Sort() - logtrace.Info(ctx, "Iteration progress", logtrace.Fields{ + logtrace.Debug(ctx, "Iteration progress", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1346,7 +1346,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] }) } - logtrace.Info(ctx, "Finished iterations without results", logtrace.Fields{ + logtrace.Debug(ctx, "Finished iterations without results", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1526,7 +1526,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, for i := Alpha; i < nl.Len() && finalStoreCount < int32(Alpha); i++ { n := nl.Nodes[i] if s.ignorelist.Banned(n) { - logtrace.Info(ctx, "Ignore banned node during sequential store", logtrace.Fields{ + logtrace.Debug(ctx, "Ignore banned node during sequential store", logtrace.Fields{ logtrace.FieldModule: "p2p", "node": n.String(), "task_id": taskID, @@ -1559,7 +1559,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, skey, _ := utils.Blake3Hash(data) if finalStoreCount >= int32(Alpha) { - logtrace.Info(ctx, "Store data to alpha nodes success", logtrace.Fields{ + logtrace.Debug(ctx, "Store data to alpha nodes success", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": taskID, "len_total_nodes": nl.Len(), @@ -1569,7 +1569,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, return nil } - logtrace.Info(ctx, "Store data to alpha nodes failed", logtrace.Fields{ + logtrace.Debug(ctx, "Store data to alpha nodes failed", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": taskID, "store_count": finalStoreCount, @@ -1582,7 +1582,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, func (s *DHT) removeNode(ctx context.Context, node *Node) { // ensure this is not itself address if bytes.Equal(node.ID, s.ht.self.ID) { - logtrace.Info(ctx, "Trying to remove itself", logtrace.Fields{ + logtrace.Debug(ctx, "Trying to remove itself", logtrace.Fields{ logtrace.FieldModule: "p2p", }) return @@ -1598,7 +1598,7 @@ func (s *DHT) removeNode(ctx context.Context, node *Node) { "bucket": index, }) } else { - logtrace.Info(ctx, "Removed node from bucket success", logtrace.Fields{ + logtrace.Debug(ctx, "Removed node from bucket success", logtrace.Fields{ logtrace.FieldModule: "p2p", "node": node.String(), "bucket": index, @@ -1656,7 +1656,7 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i knownNodes := make(map[string]*Node) hashes := make([][]byte, len(values)) - logtrace.Info(ctx, "Iterate batch store begin", logtrace.Fields{ + logtrace.Debug(ctx, "Iterate batch store begin", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "keys": len(values), @@ -1725,16 +1725,16 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i } // Emit per-node store RPC call via metrics bridge (no P2P API coupling) - if !s.options.MetricsDisabled { - p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ - IP: nodeIP, - Address: nodeAddr, - Keys: response.KeysCount, - Success: errMsg == "" && response.Error == nil, - Error: errMsg, - DurationMS: response.DurationMS, - }) - } + if !s.options.MetricsDisabled { + p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{ + IP: nodeIP, + Address: nodeAddr, + Keys: response.KeysCount, + Success: errMsg == "" && response.Error == nil, + Error: errMsg, + DurationMS: response.DurationMS, + }) + } } @@ -1743,14 +1743,14 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i successRate := float64(successful) / float64(requests) * 100 if successRate >= minimumDataStoreSuccessRate { - logtrace.Info(ctx, "Successful store operations", logtrace.Fields{ + logtrace.Debug(ctx, "Successful store operations", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "success_rate": fmt.Sprintf("%.2f%%", successRate), }) return nil } else { - logtrace.Info(ctx, "Failed to achieve desired success rate", logtrace.Fields{ + logtrace.Debug(ctx, "Failed to achieve desired success rate", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "success_rate": fmt.Sprintf("%.2f%%", successRate), @@ -1777,12 +1777,12 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ var wg sync.WaitGroup for key, node := range nodes { - logtrace.Info(ctx, "Node", logtrace.Fields{ + logtrace.Debug(ctx, "Node", logtrace.Fields{ logtrace.FieldModule: "dht", "port": node.String(), }) if s.ignorelist.Banned(node) { - logtrace.Info(ctx, "Ignoring banned node in batch store network call", logtrace.Fields{ + logtrace.Debug(ctx, "Ignoring banned node in batch store network call", logtrace.Fields{ logtrace.FieldModule: "dht", "node": node.String(), }) @@ -1810,7 +1810,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ totalBytes += len(values[idx]) } - logtrace.Info(ctx, "Batch store to node", logtrace.Fields{ + logtrace.Debug(ctx, "Batch store to node", logtrace.Fields{ logtrace.FieldModule: "dht", "keys": len(toStore), "size_before_compress": utils.BytesIntToMB(totalBytes), @@ -1818,7 +1818,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ // Skip empty payloads: avoid sending empty store RPCs and do not record no-op metrics. if len(toStore) == 0 { - logtrace.Info(ctx, "Skipping store RPC with empty payload", logtrace.Fields{ + logtrace.Debug(ctx, "Skipping store RPC with empty payload", logtrace.Fields{ logtrace.FieldModule: "dht", "node": receiver.String(), }) @@ -1835,7 +1835,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ s.metrics.IncHotPathBanIncr() } - logtrace.Info(ctx, "Network call batch store request failed", logtrace.Fields{ + logtrace.Debug(ctx, "Network call batch store request failed", logtrace.Fields{ logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "request": request.String(), @@ -1856,7 +1856,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ } func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[string]*Node, contacted map[string]bool, txid string) (chan *MessageWithError, bool) { - logtrace.Info(ctx, "Batch find node begin", logtrace.Fields{ + logtrace.Debug(ctx, "Batch find node begin", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": txid, "nodes_count": len(nodes), @@ -1879,7 +1879,7 @@ func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[str continue } if s.ignorelist.Banned(node) { - logtrace.Info(ctx, "Ignoring banned node in batch find call", logtrace.Fields{ + logtrace.Debug(ctx, "Ignoring banned node in batch find call", logtrace.Fields{ logtrace.FieldModule: "dht", "node": node.String(), "txid": txid, @@ -1927,7 +1927,7 @@ func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[str } wg.Wait() close(responses) - logtrace.Info(ctx, "Batch find node done", logtrace.Fields{ + logtrace.Debug(ctx, "Batch find node done", logtrace.Fields{ logtrace.FieldModule: "dht", "nodes_count": len(nodes), "len_resp": len(responses), diff --git a/p2p/kademlia/fetch_and_store.go b/p2p/kademlia/fetch_and_store.go index 9803bf3d..d7bc0f28 100644 --- a/p2p/kademlia/fetch_and_store.go +++ b/p2p/kademlia/fetch_and_store.go @@ -26,12 +26,12 @@ const ( // FetchAndStore fetches all keys from the queries TODO replicate list, fetches value from respective nodes and stores them in the queries store func (s *DHT) FetchAndStore(ctx context.Context) error { - logtrace.Info(ctx, "Getting fetch and store keys", logtrace.Fields{}) + logtrace.Debug(ctx, "Getting fetch and store keys", logtrace.Fields{}) keys, err := s.store.GetAllToDoRepKeys(failedKeysClosestContactsLookupCount+maxBatchAttempts+1, totalMaxAttempts) if err != nil { return fmt.Errorf("get all keys error: %w", err) } - logtrace.Info(ctx, "got keys from queries store", logtrace.Fields{"count": len(keys)}) + logtrace.Debug(ctx, "got keys from queries store", logtrace.Fields{"count": len(keys)}) if len(keys) == 0 { return nil @@ -79,7 +79,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error { return } - logtrace.Info(cctx, "iterate fetch for replication success", logtrace.Fields{"key": info.Key, "ip": info.IP}) + logtrace.Debug(cctx, "iterate fetch for replication success", logtrace.Fields{"key": info.Key, "ip": info.IP}) } if err := s.store.Store(cctx, sKey, value, 0, false); err != nil { @@ -94,7 +94,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error { atomic.AddInt32(&successCounter, 1) // Increment the counter atomically - logtrace.Info(cctx, "fetch & store key success", logtrace.Fields{"key": info.Key, "ip": info.IP}) + logtrace.Debug(cctx, "fetch & store key success", logtrace.Fields{"key": info.Key, "ip": info.IP}) }(key) time.Sleep(100 * time.Millisecond) @@ -102,7 +102,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error { //wg.Wait() - logtrace.Info(ctx, "Successfully fetched & stored keys", logtrace.Fields{"todo-keys": len(keys), "successfully-added-keys": atomic.LoadInt32(&successCounter)}) // Log the final count + logtrace.Debug(ctx, "Successfully fetched & stored keys", logtrace.Fields{"todo-keys": len(keys), "successfully-added-keys": atomic.LoadInt32(&successCounter)}) // Log the final count return nil } @@ -114,7 +114,7 @@ func (s *DHT) BatchFetchAndStoreFailedKeys(ctx context.Context) error { if err != nil { return fmt.Errorf("get all keys error: %w", err) } - logtrace.Info(ctx, "read failed keys from store", logtrace.Fields{"count": len(keys)}) + logtrace.Debug(ctx, "read failed keys from store", logtrace.Fields{"count": len(keys)}) if len(keys) == 0 { return nil @@ -143,7 +143,7 @@ func (s *DHT) BatchFetchAndStoreFailedKeys(ctx context.Context) error { repKeys = append(repKeys, repKey) } } - logtrace.Info(ctx, "got 2nd tier replication keys from queries store", logtrace.Fields{"count": len(repKeys)}) + logtrace.Debug(ctx, "got 2nd tier replication keys from queries store", logtrace.Fields{"count": len(repKeys)}) if err := s.GroupAndBatchFetch(ctx, repKeys, 0, false); err != nil { logtrace.Error(ctx, "group and batch fetch failed-keys error", logtrace.Fields{logtrace.FieldError: err}) @@ -160,7 +160,7 @@ func (s *DHT) BatchFetchAndStore(ctx context.Context) error { if err != nil { return fmt.Errorf("get all keys error: %w", err) } - logtrace.Info(ctx, "got batch todo rep-keys from queries store", logtrace.Fields{"count": len(keys)}) + logtrace.Debug(ctx, "got batch todo rep-keys from queries store", logtrace.Fields{"count": len(keys)}) if len(keys) == 0 { return nil @@ -213,12 +213,12 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, totalKeysFound := 0 for len(stringKeys) > 0 && iterations < maxSingleBatchIterations { iterations++ - logtrace.Info(ctx, "fetching batch values from node", logtrace.Fields{"node-ip": node.IP, "count": len(stringKeys), "keys[0]": stringKeys[0], "keys[len()]": stringKeys[len(stringKeys)-1]}) + logtrace.Debug(ctx, "fetching batch values from node", logtrace.Fields{"node-ip": node.IP, "count": len(stringKeys), "keys[0]": stringKeys[0], "keys[len()]": stringKeys[len(stringKeys)-1]}) isDone, retMap, failedKeys, err := s.GetBatchValuesFromNode(ctx, stringKeys, node) if err != nil { // Log the error but don't stop the process, continue to the next node - logtrace.Info(ctx, "failed to get batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) + logtrace.Debug(ctx, "failed to get batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) continue } @@ -238,7 +238,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, err = s.store.StoreBatch(ctx, response, datatype, isOriginal) if err != nil { // Log the error but don't stop the process, continue to the next node - logtrace.Info(ctx, "failed to store batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) + logtrace.Debug(ctx, "failed to store batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) continue } @@ -246,7 +246,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, err = s.store.BatchDeleteRepKeys(stringDelKeys) if err != nil { // Log the error but don't stop the process, continue to the next node - logtrace.Info(ctx, "failed to delete rep keys", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) + logtrace.Debug(ctx, "failed to delete rep keys", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) continue } } else { @@ -255,7 +255,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, if isDone && len(failedKeys) > 0 { if err := s.store.IncrementAttempts(failedKeys); err != nil { - logtrace.Info(ctx, "failed to increment attempts", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) + logtrace.Debug(ctx, "failed to increment attempts", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err}) // not adding 'continue' here because we want to delete the keys from the todo list } } else if isDone { @@ -265,7 +265,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, } } - logtrace.Info(ctx, "fetch batch values from node successfully", logtrace.Fields{"node-ip": node.IP, "count": totalKeysFound, "iterations": iterations}) + logtrace.Debug(ctx, "fetch batch values from node successfully", logtrace.Fields{"node-ip": node.IP, "count": totalKeysFound, "iterations": iterations}) } } @@ -274,7 +274,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, // GetBatchValuesFromNode get values from node in bateches func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node) (bool, map[string][]byte, []string, error) { - logtrace.Info(ctx, "sending batch fetch request", logtrace.Fields{"node-ip": n.IP, "keys": len(keys)}) + logtrace.Debug(ctx, "sending batch fetch request", logtrace.Fields{"node-ip": n.IP, "keys": len(keys)}) messageType := BatchFindValues @@ -347,7 +347,7 @@ func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node if err != nil { return isDone, nil, nil, fmt.Errorf("failed to verify and filter data: %w", err) } - logtrace.Info(ctx, "batch fetch response rcvd and keys verified", logtrace.Fields{"node-ip": n.IP, "received-keys": len(decompressedMap), "verified-keys": len(retMap), "failed-keys": len(failedKeys)}) + logtrace.Debug(ctx, "batch fetch response rcvd and keys verified", logtrace.Fields{"node-ip": n.IP, "received-keys": len(decompressedMap), "verified-keys": len(retMap), "failed-keys": len(failedKeys)}) return v.Done, retMap, failedKeys, nil } diff --git a/p2p/kademlia/network.go b/p2p/kademlia/network.go index 935d1583..c887eab1 100644 --- a/p2p/kademlia/network.go +++ b/p2p/kademlia/network.go @@ -336,7 +336,7 @@ func (s *Network) handleReplicateRequest(ctx context.Context, req *ReplicateData return fmt.Errorf("unable to store batch replication keys: %w", err) } - logtrace.Info(ctx, "Store batch replication keys stored", logtrace.Fields{ + logtrace.Debug(ctx, "Store batch replication keys stored", logtrace.Fields{ logtrace.FieldModule: "p2p", "to-store-keys": len(keysToStore), "rcvd-keys": len(req.Keys), @@ -649,7 +649,7 @@ func (s *Network) rpcOnceWrapper(ctx context.Context, cw *connWrapper, remoteAdd if _, e := cw.secureConn.Write(data); e != nil { cw.mtx.Unlock() if isStaleConnError(e) && !retried { - logtrace.Info(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{ + logtrace.Debug(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{ logtrace.FieldModule: "p2p", "remote": remoteAddr, "message_type": msgType, @@ -690,7 +690,7 @@ func (s *Network) rpcOnceWrapper(ctx context.Context, cw *connWrapper, remoteAdd cw.mtx.Unlock() if e != nil { if isStaleConnError(e) && !retried { - logtrace.Info(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{ + logtrace.Debug(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{ logtrace.FieldModule: "p2p", "remote": remoteAddr, "message_type": msgType, @@ -743,7 +743,7 @@ Retry: } if _, err := conn.Write(data); err != nil { if isStaleConnError(err) && !retried { - logtrace.Info(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{ + logtrace.Debug(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{ logtrace.FieldModule: "p2p", "remote": remoteAddr, "message_type": msgType, @@ -777,7 +777,7 @@ Retry: _ = conn.SetDeadline(time.Time{}) if err != nil { if isStaleConnError(err) && !retried { - logtrace.Info(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{ + logtrace.Debug(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{ logtrace.FieldModule: "p2p", "remote": remoteAddr, "message_type": msgType, @@ -841,7 +841,7 @@ func (s *Network) handleBatchFindValues(ctx context.Context, message *Message, r // Try to acquire the semaphore, wait up to 1 minute logtrace.Debug(ctx, "Attempting to acquire semaphore immediately", logtrace.Fields{logtrace.FieldModule: "p2p"}) if !s.sem.TryAcquire(1) { - logtrace.Info(ctx, "Immediate acquisition failed. Waiting up to 1 minute", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "Immediate acquisition failed. Waiting up to 1 minute", logtrace.Fields{logtrace.FieldModule: "p2p"}) ctxWithTimeout, cancel := context.WithTimeout(ctx, 1*time.Minute) defer cancel() @@ -850,7 +850,7 @@ func (s *Network) handleBatchFindValues(ctx context.Context, message *Message, r // failed to acquire semaphore within 1 minute return s.generateResponseMessage(BatchFindValues, message.Sender, ResultFailed, errorBusy) } - logtrace.Info(ctx, "Semaphore acquired after waiting", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "Semaphore acquired after waiting", logtrace.Fields{logtrace.FieldModule: "p2p"}) } // Add a defer function to recover from panic @@ -936,7 +936,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error()) } - logtrace.Info(ctx, "Batch get values request received", logtrace.Fields{ + logtrace.Debug(ctx, "Batch get values request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "from": message.Sender.String(), }) @@ -966,7 +966,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error()) } - logtrace.Info(ctx, "Batch get values request processed", logtrace.Fields{ + logtrace.Debug(ctx, "Batch get values request processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "requested-keys": len(keys), "found": count, @@ -1006,7 +1006,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFindValuesRequest, ip string, reqID string) (isDone bool, compressedData []byte, err error) { // log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("keys", len(req.Keys)).WithField("from-ip", ip).Info("batch find values request received") - logtrace.Info(ctx, "Batch find values request received", logtrace.Fields{ + logtrace.Debug(ctx, "Batch find values request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "from": ip, "keys": len(req.Keys), @@ -1029,7 +1029,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi return false, nil, fmt.Errorf("failed to retrieve batch values: %w", err) } // log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("values-len", len(values)).WithField("found", count).WithField("from-ip", ip).Info("batch find values request processed") - logtrace.Info(ctx, "Batch find values request processed", logtrace.Fields{ + logtrace.Debug(ctx, "Batch find values request processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "p2p-req-id": reqID, "values-len": len(values), @@ -1044,7 +1044,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi // log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("compressed-data-len", utils.BytesToMB(uint64(len(compressedData)))).WithField("found", count). // WithField("from-ip", ip).Info("batch find values response sent") - logtrace.Info(ctx, "Batch find values response sent", logtrace.Fields{ + logtrace.Debug(ctx, "Batch find values response sent", logtrace.Fields{ logtrace.FieldModule: "p2p", "p2p-req-id": reqID, "compressed-data-len": utils.BytesToMB(uint64(len(compressedData))), @@ -1208,7 +1208,7 @@ func (s *Network) handleBatchStoreData(ctx context.Context, message *Message) (r } // log.P2P().WithContext(ctx).Info("handle batch store data request received") - logtrace.Info(ctx, "Handle batch store data request received", logtrace.Fields{ + logtrace.Debug(ctx, "Handle batch store data request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), "keys": len(request.Data), @@ -1238,7 +1238,7 @@ func (s *Network) handleBatchStoreData(ctx context.Context, message *Message) (r }, } // log.P2P().WithContext(ctx).Info("handle batch store data request processed") - logtrace.Info(ctx, "Handle batch store data request processed", logtrace.Fields{ + logtrace.Debug(ctx, "Handle batch store data request processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), "keys": len(request.Data), @@ -1283,7 +1283,7 @@ func (s *Network) handleBatchFindNode(ctx context.Context, message *Message) (re closestMap := make(map[string][]*Node) // log.WithContext(ctx).WithField("sender", message.Sender.String()).Info("Batch Find Nodes Request Received") - logtrace.Info(ctx, "Batch Find Nodes Request Received", logtrace.Fields{ + logtrace.Debug(ctx, "Batch Find Nodes Request Received", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), "hashed-targets": len(request.HashedTarget), @@ -1294,7 +1294,7 @@ func (s *Network) handleBatchFindNode(ctx context.Context, message *Message) (re } response.ClosestNodes = closestMap // log.WithContext(ctx).WithField("sender", message.Sender.String()).Info("Batch Find Nodes Request Processed") - logtrace.Info(ctx, "Batch Find Nodes Request Processed", logtrace.Fields{ + logtrace.Debug(ctx, "Batch Find Nodes Request Processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), }) diff --git a/p2p/kademlia/node_activity.go b/p2p/kademlia/node_activity.go index cc7089d6..88e09f7a 100644 --- a/p2p/kademlia/node_activity.go +++ b/p2p/kademlia/node_activity.go @@ -25,7 +25,7 @@ func (s *DHT) checkNodeActivity(ctx context.Context) { return case <-ticker.C: if !utils.CheckInternetConnectivity() { - logtrace.Info(ctx, "no internet connectivity, not checking node activity", logtrace.Fields{}) + logtrace.Debug(ctx, "no internet connectivity, not checking node activity", logtrace.Fields{}) continue } @@ -115,7 +115,7 @@ func (s *DHT) handlePingSuccess(ctx context.Context, wasActive bool, n *Node) { s.ignorelist.Delete(n) if !wasActive { - logtrace.Info(ctx, "node found to be active again", logtrace.Fields{ + logtrace.Debug(ctx, "node found to be active again", logtrace.Fields{ logtrace.FieldModule: "p2p", "ip": n.IP, "node_id": string(n.ID), diff --git a/p2p/kademlia/redundant_data.go b/p2p/kademlia/redundant_data.go index bfe6947d..151269d1 100644 --- a/p2p/kademlia/redundant_data.go +++ b/p2p/kademlia/redundant_data.go @@ -13,7 +13,7 @@ import ( ) func (s *DHT) startDisabledKeysCleanupWorker(ctx context.Context) error { - logtrace.Info(ctx, "disabled keys cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "disabled keys cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { @@ -50,7 +50,7 @@ func (s *DHT) cleanupDisabledKeys(ctx context.Context) error { } func (s *DHT) startCleanupRedundantDataWorker(ctx context.Context) { - logtrace.Info(ctx, "redundant data cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "redundant data cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { @@ -66,7 +66,7 @@ func (s *DHT) startCleanupRedundantDataWorker(ctx context.Context) { func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) { from := time.Now().AddDate(-5, 0, 0) // 5 years ago - logtrace.Info(ctx, "getting all possible replication keys past five years", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from}) + logtrace.Debug(ctx, "getting all possible replication keys past five years", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from}) to := time.Now().UTC() replicationKeys := s.store.GetKeysForReplication(ctx, from, to) @@ -88,7 +88,7 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) { removeKeys := make([]domain.DelKey, 0) for key, closestContacts := range closestContactsMap { if len(closestContacts) < Alpha { - logtrace.Info(ctx, "not enough contacts to replicate", logtrace.Fields{logtrace.FieldModule: "p2p", "key": key, "closest contacts": closestContacts}) + logtrace.Debug(ctx, "not enough contacts to replicate", logtrace.Fields{logtrace.FieldModule: "p2p", "key": key, "closest contacts": closestContacts}) continue } @@ -118,9 +118,9 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) { return } - logtrace.Info(ctx, "insert del keys success", logtrace.Fields{logtrace.FieldModule: "p2p", "count-del-keys": len(insertKeys)}) + logtrace.Debug(ctx, "insert del keys success", logtrace.Fields{logtrace.FieldModule: "p2p", "count-del-keys": len(insertKeys)}) } else { - logtrace.Info(ctx, "No redundant key found to be stored in the storage", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "No redundant key found to be stored in the storage", logtrace.Fields{logtrace.FieldModule: "p2p"}) } if len(removeKeys) > 0 { @@ -133,7 +133,7 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) { } func (s *DHT) startDeleteDataWorker(ctx context.Context) { - logtrace.Info(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { diff --git a/p2p/kademlia/replication.go b/p2p/kademlia/replication.go index 5163fd0b..4a36c422 100644 --- a/p2p/kademlia/replication.go +++ b/p2p/kademlia/replication.go @@ -34,7 +34,7 @@ var ( // StartReplicationWorker starts replication func (s *DHT) StartReplicationWorker(ctx context.Context) error { - logtrace.Info(ctx, "replication worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "replication worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) go s.checkNodeActivity(ctx) go s.StartBatchFetchAndStoreWorker(ctx) @@ -54,7 +54,7 @@ func (s *DHT) StartReplicationWorker(ctx context.Context) error { // StartBatchFetchAndStoreWorker starts replication func (s *DHT) StartBatchFetchAndStoreWorker(ctx context.Context) error { - logtrace.Info(ctx, "batch fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "batch fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { @@ -69,7 +69,7 @@ func (s *DHT) StartBatchFetchAndStoreWorker(ctx context.Context) error { // StartFailedFetchAndStoreWorker starts replication func (s *DHT) StartFailedFetchAndStoreWorker(ctx context.Context) error { - logtrace.Info(ctx, "fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { @@ -131,7 +131,7 @@ func (s *DHT) Replicate(ctx context.Context) { historicStart = time.Now().UTC().Add(-24 * time.Hour * 180) } - logtrace.Info(ctx, "replicating data", logtrace.Fields{logtrace.FieldModule: "p2p", "historic-start": historicStart}) + logtrace.Debug(ctx, "replicating data", logtrace.Fields{logtrace.FieldModule: "p2p", "historic-start": historicStart}) for i := 0; i < B; i++ { if time.Since(s.ht.refreshTime(i)) > defaultRefreshTime { @@ -150,7 +150,7 @@ func (s *DHT) Replicate(ctx context.Context) { } if len(repInfo) == 0 { - logtrace.Info(ctx, "no replication info found", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "no replication info found", logtrace.Fields{logtrace.FieldModule: "p2p"}) return } @@ -159,7 +159,7 @@ func (s *DHT) Replicate(ctx context.Context) { from = *repInfo[0].LastReplicatedAt } - logtrace.Info(ctx, "getting all possible replication keys", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from}) + logtrace.Debug(ctx, "getting all possible replication keys", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from}) to := time.Now().UTC() replicationKeys := s.store.GetKeysForReplication(ctx, from, to) @@ -199,7 +199,7 @@ func (s *DHT) Replicate(ctx context.Context) { continue } countToSendKeys := len(replicationKeys) - idx - logtrace.Info(ctx, "count of replication keys to be checked", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": countToSendKeys}) + logtrace.Debug(ctx, "count of replication keys to be checked", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": countToSendKeys}) // Preallocate a slice with a capacity equal to the number of keys. closestContactKeys := make([]string, 0, countToSendKeys) @@ -212,13 +212,13 @@ func (s *DHT) Replicate(ctx context.Context) { } } - logtrace.Info(ctx, "closest contact keys count", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": len(closestContactKeys)}) + logtrace.Debug(ctx, "closest contact keys count", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": len(closestContactKeys)}) if len(closestContactKeys) == 0 { if err := s.updateLastReplicated(ctx, info.ID, to); err != nil { logtrace.Error(ctx, "replicate update lastReplicated failed", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) } else { - logtrace.Info(ctx, "no closest keys found - replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "closest-contact-keys": 0}) + logtrace.Debug(ctx, "no closest keys found - replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "closest-contact-keys": 0}) } continue @@ -258,17 +258,17 @@ func (s *DHT) Replicate(ctx context.Context) { if err := s.updateLastReplicated(ctx, info.ID, to); err != nil { logtrace.Error(ctx, "replicate update lastReplicated failed", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) } else { - logtrace.Info(ctx, "replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "expected-rep-keys": len(closestContactKeys)}) + logtrace.Debug(ctx, "replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "expected-rep-keys": len(closestContactKeys)}) } } - logtrace.Info(ctx, "Replication done", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "Replication done", logtrace.Fields{logtrace.FieldModule: "p2p"}) } func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.NodeReplicationInfo) error { replicationKeys := s.store.GetKeysForReplication(ctx, from, time.Now().UTC()) - logtrace.Info(ctx, "begin adjusting node keys process for offline node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "total-rep-keys": len(replicationKeys), "from": from.String()}) + logtrace.Debug(ctx, "begin adjusting node keys process for offline node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "total-rep-keys": len(replicationKeys), "from": from.String()}) // prepare ignored nodes list but remove the node we are adjusting // because we want to find if this node was supposed to hold this key @@ -315,7 +315,7 @@ func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.No failureCount := 0 for nodeInfoKey, keys := range nodeKeysMap { - logtrace.Info(ctx, "sending adjusted replication keys to node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "adjust-to-node": nodeInfoKey, "to-adjust-keys-len": len(keys)}) + logtrace.Debug(ctx, "sending adjusted replication keys to node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "adjust-to-node": nodeInfoKey, "to-adjust-keys-len": len(keys)}) // Retrieve the node object from the key node, err := getNodeFromKey(nodeInfoKey) if err != nil { @@ -370,14 +370,14 @@ func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.No return fmt.Errorf("replicate update isAdjusted failed: %v", err) } - logtrace.Info(ctx, "offline node was successfully adjusted", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID)}) + logtrace.Debug(ctx, "offline node was successfully adjusted", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID)}) return nil } func isNodeGoneAndShouldBeAdjusted(lastSeen *time.Time, isAlreadyAdjusted bool) bool { if lastSeen == nil { - logtrace.Info(context.Background(), "lastSeen is nil - aborting node adjustment", logtrace.Fields{}) + logtrace.Debug(context.Background(), "lastSeen is nil - aborting node adjustment", logtrace.Fields{}) return false } @@ -396,10 +396,10 @@ func (s *DHT) checkAndAdjustNode(ctx context.Context, info domain.NodeReplicatio if err := s.store.UpdateIsAdjusted(ctx, string(info.ID), true); err != nil { logtrace.Error(ctx, "failed to update replication info, set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "rep-ip": info.IP, "rep-id": string(info.ID)}) } else { - logtrace.Info(ctx, "set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) + logtrace.Debug(ctx, "set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) } } } - logtrace.Info(ctx, "replication node not active, skipping over it.", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) + logtrace.Debug(ctx, "replication node not active, skipping over it.", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)}) } diff --git a/p2p/kademlia/rq_symbols.go b/p2p/kademlia/rq_symbols.go index fbf6563d..c8ad2000 100644 --- a/p2p/kademlia/rq_symbols.go +++ b/p2p/kademlia/rq_symbols.go @@ -16,7 +16,7 @@ const ( ) func (s *DHT) startStoreSymbolsWorker(ctx context.Context) { - logtrace.Info(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { @@ -38,12 +38,12 @@ func (s *DHT) storeSymbols(ctx context.Context) error { } for _, dir := range dirs { - logtrace.Info(ctx, "rq_symbols worker: start scanning dir & storing raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) + logtrace.Debug(ctx, "rq_symbols worker: start scanning dir & storing raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) if err := s.scanDirAndStoreSymbols(ctx, dir.Dir, dir.TXID); err != nil { logtrace.Error(ctx, "scan and store symbols", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err}) } - logtrace.Info(ctx, "rq_symbols worker: scanned dir & stored raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) + logtrace.Debug(ctx, "rq_symbols worker: scanned dir & stored raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) } return nil @@ -66,7 +66,7 @@ func (s *DHT) scanDirAndStoreSymbols(ctx context.Context, dir, txid string) erro } sort.Strings(keys) - logtrace.Info(ctx, "p2p-worker: storing ALL RaptorQ symbols", logtrace.Fields{"txid": txid, "dir": dir, "total": len(keys)}) + logtrace.Debug(ctx, "p2p-worker: storing ALL RaptorQ symbols", logtrace.Fields{"txid": txid, "dir": dir, "total": len(keys)}) // Batch-flush at loadSymbolsBatchSize for start := 0; start < len(keys); { diff --git a/p2p/kademlia/store/meta/meta.go b/p2p/kademlia/store/meta/meta.go index fa75dc81..c57d05a4 100644 --- a/p2p/kademlia/store/meta/meta.go +++ b/p2p/kademlia/store/meta/meta.go @@ -67,7 +67,7 @@ func NewStore(ctx context.Context, dataDir string) (*Store, error) { quit: make(chan bool), } - logtrace.Info(ctx, fmt.Sprintf("p2p data dir: %v", dataDir), logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, fmt.Sprintf("p2p data dir: %v", dataDir), logtrace.Fields{logtrace.FieldModule: "p2p"}) if _, err := os.Stat(dataDir); os.IsNotExist(err) { if err := os.MkdirAll(dataDir, 0750); err != nil { return nil, fmt.Errorf("mkdir %q: %w", dataDir, err) @@ -185,10 +185,10 @@ func (s *Store) startCheckpointWorker(ctx context.Context) { select { case <-ctx.Done(): - logtrace.Info(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{}) + logtrace.Debug(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{}) return case <-s.worker.quit: - logtrace.Info(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{}) + logtrace.Debug(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{}) return default: } @@ -204,10 +204,10 @@ func (s *Store) start(ctx context.Context) { logtrace.Error(ctx, "Failed to perform job", logtrace.Fields{logtrace.FieldError: err}) } case <-s.worker.quit: - logtrace.Info(ctx, "exit sqlite meta db worker - quit signal received", logtrace.Fields{}) + logtrace.Debug(ctx, "exit sqlite meta db worker - quit signal received", logtrace.Fields{}) return case <-ctx.Done(): - logtrace.Info(ctx, "exit sqlite meta db worker- ctx done signal received", logtrace.Fields{}) + logtrace.Debug(ctx, "exit sqlite meta db worker- ctx done signal received", logtrace.Fields{}) return } } diff --git a/p2p/kademlia/store/sqlite/meta_worker.go b/p2p/kademlia/store/sqlite/meta_worker.go index eb7a968f..6d1207df 100644 --- a/p2p/kademlia/store/sqlite/meta_worker.go +++ b/p2p/kademlia/store/sqlite/meta_worker.go @@ -124,7 +124,7 @@ func NewMigrationMetaStore(ctx context.Context, dataDir string, cloud cloud.Stor go handler.startLastAccessedUpdateWorker(ctx) go handler.startInsertWorker(ctx) go handler.startMigrationExecutionWorker(ctx) - logtrace.Info(ctx, "MigrationMetaStore workers started", logtrace.Fields{}) + logtrace.Debug(ctx, "MigrationMetaStore workers started", logtrace.Fields{}) return handler, nil } @@ -348,7 +348,7 @@ func (d *MigrationMetaStore) startLastAccessedUpdateWorker(ctx context.Context) case <-d.updateTicker.C: d.commitLastAccessedUpdates(ctx) case <-ctx.Done(): - logtrace.Info(ctx, "Shutting down last accessed update worker", logtrace.Fields{}) + logtrace.Debug(ctx, "Shutting down last accessed update worker", logtrace.Fields{}) return } } @@ -414,7 +414,7 @@ func (d *MigrationMetaStore) commitLastAccessedUpdates(ctx context.Context) { d.updates.Delete(k) } - logtrace.Info(ctx, "Committed last accessed updates", logtrace.Fields{"count": len(keysToUpdate)}) + logtrace.Debug(ctx, "Committed last accessed updates", logtrace.Fields{"count": len(keysToUpdate)}) } func PostKeysInsert(updates []UpdateMessage) { @@ -437,7 +437,7 @@ func (d *MigrationMetaStore) startInsertWorker(ctx context.Context) { case <-d.insertTicker.C: d.commitInserts(ctx) case <-ctx.Done(): - logtrace.Info(ctx, "Shutting down insert meta keys worker", logtrace.Fields{}) + logtrace.Debug(ctx, "Shutting down insert meta keys worker", logtrace.Fields{}) d.commitInserts(ctx) return } @@ -501,7 +501,7 @@ func (d *MigrationMetaStore) commitInserts(ctx context.Context) { d.inserts.Delete(k) } - logtrace.Info(ctx, "Committed inserts", logtrace.Fields{"count": len(keysToUpdate)}) + logtrace.Debug(ctx, "Committed inserts", logtrace.Fields{"count": len(keysToUpdate)}) } // startMigrationExecutionWorker starts the worker that executes a migration @@ -511,7 +511,7 @@ func (d *MigrationMetaStore) startMigrationExecutionWorker(ctx context.Context) case <-d.migrationExecutionTicker.C: d.checkAndExecuteMigration(ctx) case <-ctx.Done(): - logtrace.Info(ctx, "Shutting down data migration worker", logtrace.Fields{}) + logtrace.Debug(ctx, "Shutting down data migration worker", logtrace.Fields{}) return } } @@ -544,7 +544,7 @@ func (d *MigrationMetaStore) checkAndExecuteMigration(ctx context.Context) { //return //} - logtrace.Info(ctx, "Starting data migration", logtrace.Fields{"islow": isLow}) + logtrace.Debug(ctx, "Starting data migration", logtrace.Fields{"islow": isLow}) // Step 1: Fetch pending migrations var migrations Migrations @@ -553,11 +553,11 @@ func (d *MigrationMetaStore) checkAndExecuteMigration(ctx context.Context) { logtrace.Error(ctx, "Failed to fetch pending migrations", logtrace.Fields{logtrace.FieldError: err}) return } - logtrace.Info(ctx, "Fetched pending migrations", logtrace.Fields{"count": len(migrations)}) + logtrace.Debug(ctx, "Fetched pending migrations", logtrace.Fields{"count": len(migrations)}) // Iterate over each migration for _, migration := range migrations { - logtrace.Info(ctx, "Processing migration", logtrace.Fields{"migration_id": migration.ID}) + logtrace.Debug(ctx, "Processing migration", logtrace.Fields{"migration_id": migration.ID}) if err := d.ProcessMigrationInBatches(ctx, migration); err != nil { logtrace.Error(ctx, "Failed to process migration", logtrace.Fields{logtrace.FieldError: err, "migration_id": migration.ID}) @@ -579,7 +579,7 @@ func (d *MigrationMetaStore) ProcessMigrationInBatches(ctx context.Context, migr } if totalKeys < minKeysToMigrate { - logtrace.Info(ctx, "Skipping migration due to insufficient keys", logtrace.Fields{"migration_id": migration.ID, "keys-count": totalKeys}) + logtrace.Debug(ctx, "Skipping migration due to insufficient keys", logtrace.Fields{"migration_id": migration.ID, "keys-count": totalKeys}) return nil } @@ -630,7 +630,7 @@ func (d *MigrationMetaStore) ProcessMigrationInBatches(ctx context.Context, migr } } - logtrace.Info(ctx, "Migration processed successfully", logtrace.Fields{"migration_id": migration.ID, "tota-keys-count": totalKeys, "migrated_in_current_iteration": nonMigratedKeys}) + logtrace.Debug(ctx, "Migration processed successfully", logtrace.Fields{"migration_id": migration.ID, "tota-keys-count": totalKeys, "migrated_in_current_iteration": nonMigratedKeys}) return nil } @@ -683,7 +683,7 @@ func (d *MigrationMetaStore) uploadInBatches(ctx context.Context, keys []string, continue } - logtrace.Info(ctx, "Successfully uploaded and deleted records for batch", logtrace.Fields{"batch": i + 1, "total_batches": batches}) + logtrace.Debug(ctx, "Successfully uploaded and deleted records for batch", logtrace.Fields{"batch": i + 1, "total_batches": batches}) } return lastError @@ -823,7 +823,7 @@ func (d *MigrationMetaStore) InsertMetaMigrationData(ctx context.Context, migrat func (d *MigrationMetaStore) batchSetMigrated(keys []string) error { if len(keys) == 0 { // log.P2P().Info("no keys provided for batch update (is_migrated)") - logtrace.Info(context.Background(), "No keys provided for batch update (is_migrated)", logtrace.Fields{}) + logtrace.Debug(context.Background(), "No keys provided for batch update (is_migrated)", logtrace.Fields{}) return nil } diff --git a/p2p/kademlia/store/sqlite/sqlite.go b/p2p/kademlia/store/sqlite/sqlite.go index 71224a57..d38661d1 100644 --- a/p2p/kademlia/store/sqlite/sqlite.go +++ b/p2p/kademlia/store/sqlite/sqlite.go @@ -293,10 +293,10 @@ func (s *Store) startCheckpointWorker(ctx context.Context) { select { case <-ctx.Done(): - logtrace.Info(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{}) + logtrace.Debug(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{}) return case <-s.worker.quit: - logtrace.Info(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{}) + logtrace.Debug(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{}) return default: } @@ -312,10 +312,10 @@ func (s *Store) start(ctx context.Context) { logtrace.Error(ctx, "Failed to perform job", logtrace.Fields{logtrace.FieldError: err.Error()}) } case <-s.worker.quit: - logtrace.Info(ctx, "exit sqlite db worker - quit signal received", logtrace.Fields{}) + logtrace.Debug(ctx, "exit sqlite db worker - quit signal received", logtrace.Fields{}) return case <-ctx.Done(): - logtrace.Info(ctx, "exit sqlite db worker- ctx done signal received", logtrace.Fields{}) + logtrace.Debug(ctx, "exit sqlite db worker- ctx done signal received", logtrace.Fields{}) return } } @@ -737,11 +737,11 @@ func (s *Store) GetOwnCreatedAt(ctx context.Context) (time.Time, error) { func (s *Store) GetLocalKeys(from time.Time, to time.Time) ([]string, error) { var keys []string ctx := context.Background() - logtrace.Info(ctx, "getting all keys for SC", logtrace.Fields{}) + logtrace.Debug(ctx, "getting all keys for SC", logtrace.Fields{}) if err := s.db.SelectContext(ctx, &keys, `SELECT key FROM data WHERE createdAt > ? and createdAt < ?`, from, to); err != nil { return keys, fmt.Errorf("error reading all keys from database: %w", err) } - logtrace.Info(ctx, "got all keys for SC", logtrace.Fields{}) + logtrace.Debug(ctx, "got all keys for SC", logtrace.Fields{}) return keys, nil } @@ -762,7 +762,7 @@ func stringArgsToInterface(args []string) []interface{} { func batchDeleteRecords(db *sqlx.DB, keys []string) error { if len(keys) == 0 { - logtrace.Info(context.Background(), "no keys provided for batch delete", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(context.Background(), "no keys provided for batch delete", logtrace.Fields{logtrace.FieldModule: "p2p"}) return nil } total := int64(0) @@ -784,7 +784,7 @@ func batchDeleteRecords(db *sqlx.DB, keys []string) error { func batchSetMigratedRecords(db *sqlx.DB, keys []string) error { if len(keys) == 0 { - logtrace.Info(context.Background(), "no keys provided for batch update (migrated)", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(context.Background(), "no keys provided for batch update (migrated)", logtrace.Fields{logtrace.FieldModule: "p2p"}) return nil } total := int64(0) diff --git a/p2p/p2p.go b/p2p/p2p.go index 006c469a..2e416111 100644 --- a/p2p/p2p.go +++ b/p2p/p2p.go @@ -40,15 +40,15 @@ type P2P interface { // p2p structure to implements interface type p2p struct { - store kademlia.Store // the store for kademlia network - metaStore kademlia.MetaStore - dht *kademlia.DHT // the kademlia network - config *Config // the service configuration - running bool // if the kademlia network is ready - lumeraClient lumera.Client - keyring keyring.Keyring // Add the keyring field - rqstore rqstore.Store - metricsDisabled bool + store kademlia.Store // the store for kademlia network + metaStore kademlia.MetaStore + dht *kademlia.DHT // the kademlia network + config *Config // the service configuration + running bool // if the kademlia network is ready + lumeraClient lumera.Client + keyring keyring.Keyring // Add the keyring field + rqstore rqstore.Store + metricsDisabled bool } // Run the kademlia network @@ -65,7 +65,7 @@ func (s *p2p) Run(ctx context.Context) error { logtrace.Error(ctx, "failed to run kadmelia, retrying.", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err}) } else { - logtrace.Info(ctx, "kadmelia started successfully", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "kadmelia started successfully", logtrace.Fields{logtrace.FieldModule: "p2p"}) return nil } } @@ -75,7 +75,7 @@ func (s *p2p) Run(ctx context.Context) error { // run the kademlia network func (s *p2p) run(ctx context.Context) error { - logtrace.Info(ctx, "Running kademlia network", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Debug(ctx, "Running kademlia network", logtrace.Fields{logtrace.FieldModule: "p2p"}) // configure the kademlia dht for p2p service if err := s.configure(ctx); err != nil { return errors.Errorf("configure kademlia dht: %w", err) @@ -96,7 +96,7 @@ func (s *p2p) run(ctx context.Context) error { } s.running = true - logtrace.Info(ctx, "p2p service is started", logtrace.Fields{}) + logtrace.Debug(ctx, "p2p service is started", logtrace.Fields{}) // block until context is done <-ctx.Done() @@ -104,7 +104,7 @@ func (s *p2p) run(ctx context.Context) error { // stop the node for kademlia network s.dht.Stop(ctx) - logtrace.Info(ctx, "p2p service is stopped", logtrace.Fields{}) + logtrace.Debug(ctx, "p2p service is stopped", logtrace.Fields{}) return nil } @@ -227,15 +227,15 @@ func (s *p2p) NClosestNodesWithIncludingNodeList(ctx context.Context, n int, key // configure the distributed hash table for p2p service func (s *p2p) configure(ctx context.Context) error { // new the queries storage - kadOpts := &kademlia.Options{ - LumeraClient: s.lumeraClient, - Keyring: s.keyring, // Pass the keyring - BootstrapNodes: []*kademlia.Node{}, - IP: s.config.ListenAddress, - Port: s.config.Port, - ID: []byte(s.config.ID), - MetricsDisabled: s.metricsDisabled, - } + kadOpts := &kademlia.Options{ + LumeraClient: s.lumeraClient, + Keyring: s.keyring, // Pass the keyring + BootstrapNodes: []*kademlia.Node{}, + IP: s.config.ListenAddress, + Port: s.config.Port, + ID: []byte(s.config.ID), + MetricsDisabled: s.metricsDisabled, + } if len(kadOpts.ID) == 0 { errors.Errorf("node id is empty") @@ -254,25 +254,25 @@ func (s *p2p) configure(ctx context.Context) error { // New returns a new p2p instance. func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr keyring.Keyring, rqstore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore, metricsDisabled bool) (P2P, error) { - store, err := sqlite.NewStore(ctx, config.DataDir, cloud, mst) - if err != nil { - return nil, errors.Errorf("new kademlia store: %w", err) - } + store, err := sqlite.NewStore(ctx, config.DataDir, cloud, mst) + if err != nil { + return nil, errors.Errorf("new kademlia store: %w", err) + } meta, err := meta.NewStore(ctx, config.DataDir) if err != nil { return nil, errors.Errorf("new kademlia meta store: %w", err) } - return &p2p{ - store: store, - metaStore: meta, - config: config, - lumeraClient: lumeraClient, - keyring: kr, // Store the keyring - rqstore: rqstore, - metricsDisabled: metricsDisabled, - }, nil + return &p2p{ + store: store, + metaStore: meta, + config: config, + lumeraClient: lumeraClient, + keyring: kr, // Store the keyring + rqstore: rqstore, + metricsDisabled: metricsDisabled, + }, nil } // LocalStore store data into the kademlia network diff --git a/pkg/codec/decode.go b/pkg/codec/decode.go index bd3b0231..348894e4 100644 --- a/pkg/codec/decode.go +++ b/pkg/codec/decode.go @@ -145,7 +145,7 @@ func (rq *raptorQ) PrepareDecode( return os.RemoveAll(symbolsDir) } - logtrace.Info(ctx, "prepare decode workspace created", logtrace.Fields{ + logtrace.Debug(ctx, "prepare decode workspace created", logtrace.Fields{ "symbols_dir": symbolsDir, "blocks": len(blockDirs), }) @@ -164,7 +164,7 @@ func (rq *raptorQ) DecodeFromPrepared( logtrace.FieldModule: "rq", logtrace.FieldActionID: ws.ActionID, } - logtrace.Info(ctx, "RaptorQ decode (prepared) requested", fields) + logtrace.Debug(ctx, "RaptorQ decode (prepared) requested", fields) processor, err := raptorq.NewRaptorQProcessor(rqSymbolSize, rqRedundancyFactor, rqMaxMemoryMB, rqConcurrency) if err != nil { @@ -184,7 +184,7 @@ func (rq *raptorQ) DecodeFromPrepared( fields[logtrace.FieldError] = err.Error() return DecodeResponse{}, fmt.Errorf("write layout file: %w", err) } - logtrace.Info(ctx, "layout.json written (prepared)", fields) + logtrace.Debug(ctx, "layout.json written (prepared)", fields) // Decode to output (idempotent-safe: overwrite on success) outputPath := filepath.Join(ws.SymbolsDir, "output") @@ -194,64 +194,64 @@ func (rq *raptorQ) DecodeFromPrepared( return DecodeResponse{}, fmt.Errorf("raptorq decode: %w", err) } - logtrace.Info(ctx, "RaptorQ decoding completed successfully (prepared)", logtrace.Fields{ + logtrace.Debug(ctx, "RaptorQ decoding completed successfully (prepared)", logtrace.Fields{ "output_path": outputPath, }) return DecodeResponse{FilePath: outputPath, DecodeTmpDir: ws.SymbolsDir}, nil } func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) { - fields := logtrace.Fields{ - logtrace.FieldMethod: "Decode", - logtrace.FieldModule: "rq", - logtrace.FieldActionID: req.ActionID, - } - logtrace.Info(ctx, "RaptorQ decode request received", fields) + fields := logtrace.Fields{ + logtrace.FieldMethod: "Decode", + logtrace.FieldModule: "rq", + logtrace.FieldActionID: req.ActionID, + } + logtrace.Debug(ctx, "RaptorQ decode request received", fields) - // 1) Validate layout (the check) - if len(req.Layout.Blocks) == 0 { - fields[logtrace.FieldError] = "empty layout" - return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present") - } - for _, blk := range req.Layout.Blocks { - if len(blk.Symbols) == 0 { - fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID) - return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID) - } - } + // 1) Validate layout (the check) + if len(req.Layout.Blocks) == 0 { + fields[logtrace.FieldError] = "empty layout" + return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present") + } + for _, blk := range req.Layout.Blocks { + if len(blk.Symbols) == 0 { + fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID) + return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID) + } + } - // 2) Prepare workspace (functionality) - _, Write, Cleanup, ws, err := rq.PrepareDecode(ctx, req.ActionID, req.Layout) - if err != nil { - fields[logtrace.FieldError] = err.Error() - return DecodeResponse{}, fmt.Errorf("prepare decode workspace: %w", err) - } + // 2) Prepare workspace (functionality) + _, Write, Cleanup, ws, err := rq.PrepareDecode(ctx, req.ActionID, req.Layout) + if err != nil { + fields[logtrace.FieldError] = err.Error() + return DecodeResponse{}, fmt.Errorf("prepare decode workspace: %w", err) + } - // Ensure workspace cleanup on failure. On success, caller cleans up via returned path. - success := false - defer func() { - if !success && Cleanup != nil { - _ = Cleanup() - } - }() + // Ensure workspace cleanup on failure. On success, caller cleans up via returned path. + success := false + defer func() { + if !success && Cleanup != nil { + _ = Cleanup() + } + }() - // 3) Persist provided in-memory symbols via Write (functionality) - if len(req.Symbols) > 0 { - for id, data := range req.Symbols { - if _, werr := Write(-1, id, data); werr != nil { - fields[logtrace.FieldError] = werr.Error() - return DecodeResponse{}, werr - } - } - logtrace.Info(ctx, "symbols persisted via Write()", fields) - } + // 3) Persist provided in-memory symbols via Write (functionality) + if len(req.Symbols) > 0 { + for id, data := range req.Symbols { + if _, werr := Write(-1, id, data); werr != nil { + fields[logtrace.FieldError] = werr.Error() + return DecodeResponse{}, werr + } + } + logtrace.Debug(ctx, "symbols persisted via Write()", fields) + } - // 4) Decode using the prepared workspace (functionality) - resp, derr := rq.DecodeFromPrepared(ctx, ws, req.Layout) - if derr != nil { - fields[logtrace.FieldError] = derr.Error() - return DecodeResponse{}, derr - } - success = true - return resp, nil + // 4) Decode using the prepared workspace (functionality) + resp, derr := rq.DecodeFromPrepared(ctx, ws, req.Layout) + if derr != nil { + fields[logtrace.FieldError] = derr.Error() + return DecodeResponse{}, derr + } + success = true + return resp, nil } diff --git a/pkg/codec/raptorq.go b/pkg/codec/raptorq.go index 4564bc1b..7c28d0c5 100644 --- a/pkg/codec/raptorq.go +++ b/pkg/codec/raptorq.go @@ -48,7 +48,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons return EncodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err) } defer processor.Free() - logtrace.Info(ctx, "RaptorQ processor created", fields) + logtrace.Debug(ctx, "RaptorQ processor created", fields) /* ---------- 1. run the encoder ---------- */ // Deterministic: force single block @@ -60,7 +60,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons os.Remove(req.Path) return EncodeResponse{}, fmt.Errorf("mkdir %s: %w", symbolsDir, err) } - logtrace.Info(ctx, "RaptorQ processor encoding", fields) + logtrace.Debug(ctx, "RaptorQ processor encoding", fields) resp, err := processor.EncodeFile(req.Path, symbolsDir, blockSize) if err != nil { @@ -74,7 +74,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons /* ---------- 2. read the layout JSON ---------- */ layoutData, err := os.ReadFile(resp.LayoutFilePath) - logtrace.Info(ctx, "RaptorQ processor layout file", logtrace.Fields{ + logtrace.Debug(ctx, "RaptorQ processor layout file", logtrace.Fields{ "layout-file": resp.LayoutFilePath}) if err != nil { fields[logtrace.FieldError] = err.Error() diff --git a/pkg/common/task/task.go b/pkg/common/task/task.go index e4bb062a..adf173e4 100644 --- a/pkg/common/task/task.go +++ b/pkg/common/task/task.go @@ -92,13 +92,13 @@ func (task *task) RunAction(ctx context.Context) error { for { select { case <-ctx.Done(): - logtrace.Info(ctx, "context done", logtrace.Fields{"task_id": task.ID()}) + logtrace.Debug(ctx, "context done", logtrace.Fields{"task_id": task.ID()}) case <-task.Done(): - logtrace.Info(ctx, "task done", logtrace.Fields{"task_id": task.ID()}) + logtrace.Debug(ctx, "task done", logtrace.Fields{"task_id": task.ID()}) cancel() case action, ok := <-task.actionCh: if !ok { - logtrace.Info(ctx, "action channel closed", logtrace.Fields{"task_id": task.ID()}) + logtrace.Debug(ctx, "action channel closed", logtrace.Fields{"task_id": task.ID()}) return group.Wait() } diff --git a/pkg/common/task/worker.go b/pkg/common/task/worker.go index 280b5fb8..14043079 100644 --- a/pkg/common/task/worker.go +++ b/pkg/common/task/worker.go @@ -91,7 +91,7 @@ func (worker *Worker) Run(ctx context.Context) error { logtrace.Error(ctx, "Recovered from panic in common task's worker run", logtrace.Fields{"task": currentTask.ID(), "error": r}) } - logtrace.Info(ctx, "Task Removed", logtrace.Fields{"task": currentTask.ID()}) + logtrace.Debug(ctx, "Task Removed", logtrace.Fields{"task": currentTask.ID()}) // Remove the task from the worker's task list worker.RemoveTask(currentTask) }() diff --git a/pkg/dd/image_rareness.go b/pkg/dd/image_rareness.go index d021da1b..74fec800 100644 --- a/pkg/dd/image_rareness.go +++ b/pkg/dd/image_rareness.go @@ -56,7 +56,7 @@ func (c *ddServerClientImpl) ImageRarenessScore(ctx context.Context, req Rarenes logtrace.FieldMethod: "ImageRarenessScore", logtrace.FieldRequest: req, } - logtrace.Info(ctx, "getting image rareness score", fields) + logtrace.Debug(ctx, "getting image rareness score", fields) res, err := c.ddService.ImageRarenessScore(ctx, &ddService.RarenessScoreRequest{ImageFilepath: req.Filepath}) if err != nil { @@ -65,7 +65,7 @@ func (c *ddServerClientImpl) ImageRarenessScore(ctx context.Context, req Rarenes return ImageRarenessScoreResponse{}, fmt.Errorf("dd image rareness score error: %w", err) } - logtrace.Info(ctx, "successfully got image rareness score", fields) + logtrace.Debug(ctx, "successfully got image rareness score", fields) return toImageRarenessScoreResponse(res), nil } diff --git a/pkg/dd/status.go b/pkg/dd/status.go index fc7f4d30..812b62d6 100644 --- a/pkg/dd/status.go +++ b/pkg/dd/status.go @@ -26,7 +26,7 @@ func (c *ddServerClientImpl) GetStatus(ctx context.Context, req GetStatusRequest logtrace.FieldMethod: "GetStatus", logtrace.FieldRequest: req, } - logtrace.Info(ctx, "getting status", fields) + logtrace.Debug(ctx, "getting status", fields) res, err := c.ddService.GetStatus(ctx, &ddService.GetStatusRequest{}) if err != nil { @@ -35,7 +35,7 @@ func (c *ddServerClientImpl) GetStatus(ctx context.Context, req GetStatusRequest return GetStatusResponse{}, fmt.Errorf("dd get status error: %w", err) } - logtrace.Info(ctx, "successfully got status", fields) + logtrace.Debug(ctx, "successfully got status", fields) return GetStatusResponse{ Version: res.GetVersion(), TaskCount: res.GetTaskCount(), diff --git a/pkg/lumera/connection.go b/pkg/lumera/connection.go index ab28702c..06c39748 100644 --- a/pkg/lumera/connection.go +++ b/pkg/lumera/connection.go @@ -127,7 +127,7 @@ func newGRPCConnection(ctx context.Context, rawAddr string) (Connection, error) if firstCand.useTLS { scheme = "tls" } - logtrace.Info(ctx, "gRPC connection established", logtrace.Fields{ + logtrace.Debug(ctx, "gRPC connection established", logtrace.Fields{ "target": firstCand.target, "scheme": scheme, }) diff --git a/pkg/lumera/modules/auth/impl.go b/pkg/lumera/modules/auth/impl.go index 3597d7a9..a3ad3bca 100644 --- a/pkg/lumera/modules/auth/impl.go +++ b/pkg/lumera/modules/auth/impl.go @@ -46,7 +46,7 @@ func (m *module) Verify(ctx context.Context, accAddress string, data, signature return fmt.Errorf("invalid address: %w", err) } - logtrace.Info(ctx, "Verifying signature", logtrace.Fields{"address": addr.String()}) + logtrace.Debug(ctx, "Verifying signature", logtrace.Fields{"address": addr.String()}) // Use Account RPC instead of AccountInfo to get the full account with public key accResp, err := m.client.Account(ctx, &authtypes.QueryAccountRequest{ @@ -66,7 +66,7 @@ func (m *module) Verify(ctx context.Context, accAddress string, data, signature if pubKey == nil { return fmt.Errorf("public key is nil") } - logtrace.Info(ctx, "Public key retrieved", logtrace.Fields{"pubKey": pubKey.String()}) + logtrace.Debug(ctx, "Public key retrieved", logtrace.Fields{"pubKey": pubKey.String()}) if !pubKey.VerifySignature(data, signature) { return fmt.Errorf("invalid signature") } diff --git a/pkg/lumera/modules/tx/impl.go b/pkg/lumera/modules/tx/impl.go index d342601b..bcdb694d 100644 --- a/pkg/lumera/modules/tx/impl.go +++ b/pkg/lumera/modules/tx/impl.go @@ -103,7 +103,7 @@ func (m *module) SimulateTransaction(ctx context.Context, msgs []types.Msg, acco return nil, fmt.Errorf("simulation error: %w", err) } - logtrace.Info(ctx, fmt.Sprintf("simulation complete | gasUsed=%d", simRes.GasInfo.GasUsed), nil) + logtrace.Debug(ctx, fmt.Sprintf("simulation complete | gasUsed=%d", simRes.GasInfo.GasUsed), nil) return simRes, nil } @@ -143,7 +143,7 @@ func (m *module) BuildAndSignTransaction(ctx context.Context, msgs []types.Msg, return nil, fmt.Errorf("failed to sign transaction: %w", err) } - logtrace.Info(ctx, "transaction signed successfully", nil) + logtrace.Debug(ctx, "transaction signed successfully", nil) // Encode signed transaction txBytes, err := clientCtx.TxConfig.TxEncoder()(txBuilder.GetTx()) @@ -273,7 +273,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou // Step 3: Calculate fee based on adjusted gas fee := m.CalculateFee(gasToUse, config) - logtrace.Info(ctx, fmt.Sprintf("using simulated gas and calculated fee | simulatedGas=%d adjustedGas=%d fee=%s", simulatedGasUsed, gasToUse, fee), nil) + logtrace.Debug(ctx, fmt.Sprintf("using simulated gas and calculated fee | simulatedGas=%d adjustedGas=%d fee=%s", simulatedGasUsed, gasToUse, fee), nil) // Step 4: Build and sign transaction txBytes, err := m.BuildAndSignTransaction(ctx, msgs, accountInfo, gasToUse, fee, config) @@ -288,7 +288,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou } if result != nil && result.TxResponse != nil && result.TxResponse.Code == 0 && len(result.TxResponse.Events) == 0 { - logtrace.Info(ctx, "Transaction broadcast successful, waiting for inclusion to get events...", nil) + logtrace.Debug(ctx, "Transaction broadcast successful, waiting for inclusion to get events...", nil) // Retry 5 times with 1 second intervals var txResp *sdktx.GetTxResponse @@ -298,7 +298,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou txResp, err = m.GetTransaction(ctx, result.TxResponse.TxHash) if err == nil && txResp != nil && txResp.TxResponse != nil { // Successfully got the transaction with events - logtrace.Info(ctx, fmt.Sprintf("Retrieved transaction with %d events", len(txResp.TxResponse.Events)), nil) + logtrace.Debug(ctx, fmt.Sprintf("Retrieved transaction with %d events", len(txResp.TxResponse.Events)), nil) result.TxResponse = txResp.TxResponse break } diff --git a/pkg/net/grpc/server/server.go b/pkg/net/grpc/server/server.go index 64dfe0f2..ae1a3524 100644 --- a/pkg/net/grpc/server/server.go +++ b/pkg/net/grpc/server/server.go @@ -203,7 +203,7 @@ func (s *Server) createListener(ctx context.Context, address string) (net.Listen if err != nil { return nil, errors.Errorf("failed to create listener: %w", err).WithField("address", address) } - logtrace.Info(ctx, "gRPC server listening", logtrace.Fields{"address": address}) + logtrace.Debug(ctx, "gRPC server listening", logtrace.Fields{"address": address}) return lis, nil } @@ -256,7 +256,7 @@ func (s *Server) Serve(ctx context.Context, address string, opts *ServerOptions) // Wait for context cancellation or error select { case <-ctx.Done(): - logtrace.Info(ctx, "Shutting down gRPC server", logtrace.Fields{"address": address}) + logtrace.Debug(ctx, "Shutting down gRPC server", logtrace.Fields{"address": address}) return s.Stop(opts.GracefulShutdownTime) case err := <-serveErr: return err diff --git a/pkg/net/interceptor.go b/pkg/net/interceptor.go index f29d88a1..b33aadcf 100644 --- a/pkg/net/interceptor.go +++ b/pkg/net/interceptor.go @@ -34,7 +34,7 @@ func UnaryServerInterceptor() grpc.UnaryServerInterceptor { logtrace.FieldMethod: info.FullMethod, logtrace.FieldCorrelationID: correlationID, } - logtrace.Info(ctx, "received gRPC request", fields) + logtrace.Debug(ctx, "received gRPC request", fields) resp, err := handler(ctx, req) @@ -42,7 +42,7 @@ func UnaryServerInterceptor() grpc.UnaryServerInterceptor { fields[logtrace.FieldError] = err.Error() logtrace.Error(ctx, "gRPC request failed", fields) } else { - logtrace.Info(ctx, "gRPC request processed successfully", fields) + logtrace.Debug(ctx, "gRPC request processed successfully", fields) } return resp, err diff --git a/pkg/storage/queries/health_check.go b/pkg/storage/queries/health_check.go index e76799da..96802dd8 100644 --- a/pkg/storage/queries/health_check.go +++ b/pkg/storage/queries/health_check.go @@ -98,10 +98,10 @@ func (s *SQLiteStore) GetHCSummaryStats(from time.Time) (hcMetrics metrics.HCMet if err != nil { return hcMetrics, err } - logtrace.Info(context.Background(), "observer evaluations retrieved", logtrace.Fields{"observer_evaluations": len(hcObserversEvaluations), "from": from}) + logtrace.Debug(context.Background(), "observer evaluations retrieved", logtrace.Fields{"observer_evaluations": len(hcObserversEvaluations), "from": from}) observerEvaluationMetrics := processHCObserverEvaluations(hcObserversEvaluations) - logtrace.Info(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{"observer_evaluation_metrics": len(observerEvaluationMetrics), "from": from}) + logtrace.Debug(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{"observer_evaluation_metrics": len(observerEvaluationMetrics), "from": from}) for _, obMetrics := range observerEvaluationMetrics { if obMetrics.ChallengesVerified >= 3 { @@ -154,7 +154,7 @@ func (s *SQLiteStore) GetMetricsDataByHealthCheckChallengeID(ctx context.Context if err != nil { return healthCheckChallengeMessages, err } - logtrace.Info(ctx, "health-check-challenge metrics row count", logtrace.Fields{"rows": len(hcMetrics), "challenge_id": challengeID}) + logtrace.Debug(ctx, "health-check-challenge metrics row count", logtrace.Fields{"rows": len(hcMetrics), "challenge_id": challengeID}) for _, hcMetric := range hcMetrics { msg := types.HealthCheckMessageData{} diff --git a/pkg/storage/queries/self_healing.go b/pkg/storage/queries/self_healing.go index 47145a0b..61e7c63c 100644 --- a/pkg/storage/queries/self_healing.go +++ b/pkg/storage/queries/self_healing.go @@ -257,7 +257,7 @@ func (s *SQLiteStore) GetSHExecutionMetrics(ctx context.Context, from time.Time) if err != nil { return m, err } - logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) + logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) challenges := make(map[string]SHChallengeMetric) for _, row := range rows { @@ -361,11 +361,11 @@ func (s *SQLiteStore) GetSHExecutionMetrics(ctx context.Context, from time.Time) } } - logtrace.Info(ctx, "self-healing execution metrics challenges count", logtrace.Fields{"challenges": len(challenges)}) + logtrace.Debug(ctx, "self-healing execution metrics challenges count", logtrace.Fields{"challenges": len(challenges)}) for _, challenge := range challenges { - logtrace.Info(ctx, "self-healing challenge metric", logtrace.Fields{ + logtrace.Debug(ctx, "self-healing challenge metric", logtrace.Fields{ "challenge-id": challenge.ChallengeID, "is-accepted": challenge.IsAccepted, "is-verified": challenge.IsVerified, @@ -475,7 +475,7 @@ func (s *SQLiteStore) GetLastNSHChallenges(ctx context.Context, n int) (types.Se if err != nil { return challenges, err } - logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) + logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) challengesInserted := 0 for _, row := range rows { @@ -507,7 +507,7 @@ func (s *SQLiteStore) GetSHChallengeReport(ctx context.Context, challengeID stri if err != nil { return challenges, err } - logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) + logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)}) for _, row := range rows { if row.ChallengeID == challengeID { diff --git a/pkg/storage/queries/storage_challenge.go b/pkg/storage/queries/storage_challenge.go index 574e7f4f..164ed2be 100644 --- a/pkg/storage/queries/storage_challenge.go +++ b/pkg/storage/queries/storage_challenge.go @@ -97,7 +97,7 @@ func (s *SQLiteStore) GetMetricsDataByStorageChallengeID(ctx context.Context, ch return storageChallengeMessages, err } // log.WithContext(ctx).WithField("rows", len(scMetrics)).Info("storage-challenge metrics row count") - logtrace.Info(ctx, "storage-challenge metrics row count", logtrace.Fields{ + logtrace.Debug(ctx, "storage-challenge metrics row count", logtrace.Fields{ "rows": len(scMetrics), }) @@ -210,13 +210,13 @@ func (s *SQLiteStore) GetSCSummaryStats(from time.Time) (scMetrics metrics.SCMet return scMetrics, err } // log.WithField("observer_evaluations", len(observersEvaluations)).Info("observer evaluations retrieved") - logtrace.Info(context.Background(), "observer evaluations retrieved", logtrace.Fields{ + logtrace.Debug(context.Background(), "observer evaluations retrieved", logtrace.Fields{ "observer_evaluations": len(observersEvaluations), }) observerEvaluationMetrics := processObserverEvaluations(observersEvaluations) // log.WithField("observer_evaluation_metrics", len(observerEvaluationMetrics)).Info("observer evaluation metrics retrieved") - logtrace.Info(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{ + logtrace.Debug(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{ "observer_evaluation_metrics": len(observerEvaluationMetrics), }) diff --git a/pkg/storage/queries/task_history.go b/pkg/storage/queries/task_history.go index 73a55ef8..29539a49 100644 --- a/pkg/storage/queries/task_history.go +++ b/pkg/storage/queries/task_history.go @@ -59,7 +59,7 @@ func (s *SQLiteStore) QueryTaskHistory(taskID string) (history []types.TaskHisto err = json.Unmarshal([]byte(details), &i.Details) if err != nil { - logtrace.Info(context.Background(), "Detals", logtrace.Fields{"details": details}) + logtrace.Debug(context.Background(), "Detals", logtrace.Fields{"details": details}) logtrace.Error(context.Background(), fmt.Sprintf("cannot unmarshal task history details: %s", details), logtrace.Fields{"error": err}) i.Details = nil } diff --git a/sdk/task/task.go b/sdk/task/task.go index e359c907..976725a0 100644 --- a/sdk/task/task.go +++ b/sdk/task/task.go @@ -126,7 +126,7 @@ func (t *BaseTask) isServing(parent context.Context, sn lumera.Supernode) bool { PeerType: t.config.Account.PeerType, }).CreateClient(ctx, sn) if err != nil { - logtrace.Info(ctx, "Failed to create client for supernode", logtrace.Fields{logtrace.FieldMethod: "isServing"}) + logtrace.Debug(ctx, "Failed to create client for supernode", logtrace.Fields{logtrace.FieldMethod: "isServing"}) return false } defer client.Close(ctx) diff --git a/supernode/cmd/service.go b/supernode/cmd/service.go index d4af1269..8cd8708f 100644 --- a/supernode/cmd/service.go +++ b/supernode/cmd/service.go @@ -23,7 +23,7 @@ func RunServices(ctx context.Context, services ...service) error { if err != nil { logtrace.Error(ctx, "service stopped with an error", logtrace.Fields{"service": reflect.TypeOf(service).String(), "error": err}) } else { - logtrace.Info(ctx, "service stopped", logtrace.Fields{"service": reflect.TypeOf(service).String()}) + logtrace.Debug(ctx, "service stopped", logtrace.Fields{"service": reflect.TypeOf(service).String()}) } return err }) diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index eaf1339e..952113d3 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -46,7 +46,7 @@ The supernode will connect to the Lumera network and begin participating in the // Log configuration info cfgFile := filepath.Join(baseDir, DefaultConfigFile) - logtrace.Info(ctx, "Starting supernode with configuration", logtrace.Fields{"config_file": cfgFile, "keyring_dir": appConfig.GetKeyringDir(), "key_name": appConfig.SupernodeConfig.KeyName}) + logtrace.Debug(ctx, "Starting supernode with configuration", logtrace.Fields{"config_file": cfgFile, "keyring_dir": appConfig.GetKeyringDir(), "key_name": appConfig.SupernodeConfig.KeyName}) // Initialize keyring kr, err := initKeyringFromConfig(appConfig) @@ -61,7 +61,7 @@ The supernode will connect to the Lumera network and begin participating in the } // Verify config matches chain registration before starting services - logtrace.Info(ctx, "Verifying configuration against chain registration", logtrace.Fields{}) + logtrace.Debug(ctx, "Verifying configuration against chain registration", logtrace.Fields{}) configVerifier := verifier.NewConfigVerifier(appConfig, lumeraClient, kr) verificationResult, err := configVerifier.VerifyConfig(ctx) if err != nil { @@ -76,7 +76,7 @@ The supernode will connect to the Lumera network and begin participating in the logtrace.Warn(ctx, "Config verification warnings", logtrace.Fields{"summary": verificationResult.Summary()}) } - logtrace.Info(ctx, "Configuration verification successful", logtrace.Fields{}) + logtrace.Debug(ctx, "Configuration verification successful", logtrace.Fields{}) // Initialize RaptorQ store for Cascade processing rqStore, err := initRQStore(ctx, appConfig) @@ -84,14 +84,14 @@ The supernode will connect to the Lumera network and begin participating in the logtrace.Fatal(ctx, "Failed to initialize RaptorQ store", logtrace.Fields{"error": err.Error()}) } - // Manually set the disable flag at the highest level - disableMetrics := true + // Manually set the disable flag at the highest level + disableMetrics := true - // Initialize P2P service with explicit disable flag - p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil, disableMetrics) - if err != nil { - logtrace.Fatal(ctx, "Failed to initialize P2P service", logtrace.Fields{"error": err.Error()}) - } + // Initialize P2P service with explicit disable flag + p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil, disableMetrics) + if err != nil { + logtrace.Fatal(ctx, "Failed to initialize P2P service", logtrace.Fields{"error": err.Error()}) + } // Initialize the supernode supernodeInstance, err := NewSupernode(ctx, appConfig, kr, p2pService, rqStore, lumeraClient) @@ -100,19 +100,19 @@ The supernode will connect to the Lumera network and begin participating in the } // Configure cascade service - cService := cascadeService.NewCascadeService( - &cascadeService.Config{ - Config: common.Config{ - SupernodeAccountAddress: appConfig.SupernodeConfig.Identity, - }, - RqFilesDir: appConfig.GetRaptorQFilesDir(), - MetricsDisabled: disableMetrics, - }, - lumeraClient, - *p2pService, - codec.NewRaptorQCodec(appConfig.GetRaptorQFilesDir()), - rqStore, - ) + cService := cascadeService.NewCascadeService( + &cascadeService.Config{ + Config: common.Config{ + SupernodeAccountAddress: appConfig.SupernodeConfig.Identity, + }, + RqFilesDir: appConfig.GetRaptorQFilesDir(), + MetricsDisabled: disableMetrics, + }, + lumeraClient, + *p2pService, + codec.NewRaptorQCodec(appConfig.GetRaptorQFilesDir()), + rqStore, + ) // Create cascade action server cascadeActionServer := cascade.NewCascadeActionServer(cService) @@ -152,7 +152,7 @@ The supernode will connect to the Lumera network and begin participating in the if isTestnet { profilingAddr := "0.0.0.0:8082" - logtrace.Info(ctx, "Starting profiling server", logtrace.Fields{ + logtrace.Debug(ctx, "Starting profiling server", logtrace.Fields{ "address": profilingAddr, "chain_id": appConfig.LumeraClientConfig.ChainID, "is_testnet": isTestnet, @@ -178,7 +178,7 @@ The supernode will connect to the Lumera network and begin participating in the // Wait for termination signal sig := <-sigCh - logtrace.Info(ctx, "Received signal, shutting down", logtrace.Fields{"signal": sig.String()}) + logtrace.Debug(ctx, "Received signal, shutting down", logtrace.Fields{"signal": sig.String()}) // Graceful shutdown if err := supernodeInstance.Stop(ctx); err != nil { @@ -208,9 +208,9 @@ func initP2PService(ctx context.Context, config *config.Config, lumeraClient lum // Create P2P config using helper function p2pConfig := createP2PConfig(config, address.String()) - logtrace.Info(ctx, "Initializing P2P service", logtrace.Fields{"address": p2pConfig.ListenAddress, "port": p2pConfig.Port, "data_dir": p2pConfig.DataDir, "supernode_id": address.String()}) + logtrace.Debug(ctx, "Initializing P2P service", logtrace.Fields{"address": p2pConfig.ListenAddress, "port": p2pConfig.Port, "data_dir": p2pConfig.DataDir, "supernode_id": address.String()}) - p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst, metricsDisabled) + p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst, metricsDisabled) if err != nil { return nil, fmt.Errorf("failed to initialize p2p service: %w", err) } diff --git a/supernode/cmd/supernode.go b/supernode/cmd/supernode.go index 19a65718..c0740fd0 100644 --- a/supernode/cmd/supernode.go +++ b/supernode/cmd/supernode.go @@ -71,13 +71,13 @@ func (s *Supernode) Start(ctx context.Context) error { return err } - logtrace.Info(ctx, "Found valid key in keyring", logtrace.Fields{ + logtrace.Debug(ctx, "Found valid key in keyring", logtrace.Fields{ "key_name": s.config.SupernodeConfig.KeyName, "address": address.String(), }) // Use the P2P service that was passed in via constructor - logtrace.Info(ctx, "Starting P2P service", logtrace.Fields{}) + logtrace.Debug(ctx, "Starting P2P service", logtrace.Fields{}) if err := s.p2pService.Run(ctx); err != nil { return fmt.Errorf("p2p service error: %w", err) } @@ -89,7 +89,7 @@ func (s *Supernode) Start(ctx context.Context) error { func (s *Supernode) Stop(ctx context.Context) error { // Close the Lumera client connection if s.lumeraClient != nil { - logtrace.Info(ctx, "Closing Lumera client", logtrace.Fields{}) + logtrace.Debug(ctx, "Closing Lumera client", logtrace.Fields{}) if err := s.lumeraClient.Close(); err != nil { logtrace.Error(ctx, "Error closing Lumera client", logtrace.Fields{ "error": err.Error(), @@ -131,7 +131,7 @@ func initRQStore(ctx context.Context, config *config.Config) (rqstore.Store, err // Create the SQLite file path rqStoreFile := rqDir + "/rqstore.db" - logtrace.Info(ctx, "Initializing RaptorQ store", logtrace.Fields{ + logtrace.Debug(ctx, "Initializing RaptorQ store", logtrace.Fields{ "file_path": rqStoreFile, }) diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go index 449f4c42..180ebd3d 100644 --- a/supernode/node/action/server/cascade/cascade_action_server.go +++ b/supernode/node/action/server/cascade/cascade_action_server.go @@ -74,7 +74,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er } ctx := stream.Context() - logtrace.Info(ctx, "client streaming request to upload cascade input data received", fields) + logtrace.Debug(ctx, "client streaming request to upload cascade input data received", fields) const maxFileSize = 1 * 1024 * 1024 * 1024 // 1GB limit @@ -140,7 +140,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er return fmt.Errorf("file size %d exceeds maximum allowed size of 1GB", totalSize) } - logtrace.Info(ctx, "received data chunk", logtrace.Fields{ + logtrace.Debug(ctx, "received data chunk", logtrace.Fields{ "chunk_size": len(x.Chunk.Data), "total_size_so_far": totalSize, }) @@ -148,7 +148,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er case *pb.RegisterRequest_Metadata: // Store metadata - this should be the final message metadata = x.Metadata - logtrace.Info(ctx, "received metadata", logtrace.Fields{ + logtrace.Debug(ctx, "received metadata", logtrace.Fields{ "task_id": metadata.TaskId, "action_id": metadata.ActionId, }) @@ -162,7 +162,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er } fields[logtrace.FieldTaskID] = metadata.GetTaskId() fields[logtrace.FieldActionID] = metadata.GetActionId() - logtrace.Info(ctx, "metadata received from action-sdk", fields) + logtrace.Debug(ctx, "metadata received from action-sdk", fields) // Ensure all data is written to disk before calculating hash if err := tempFile.Sync(); err != nil { @@ -174,7 +174,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er hash := hasher.Sum(nil) hashHex := hex.EncodeToString(hash) fields[logtrace.FieldHashHex] = hashHex - logtrace.Info(ctx, "final BLAKE3 hash generated", fields) + logtrace.Debug(ctx, "final BLAKE3 hash generated", fields) targetPath, err := replaceTempDirWithTaskDir(metadata.GetTaskId(), tempFilePath, tempFile) if err != nil { @@ -213,7 +213,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er return fmt.Errorf("registration failed: %w", err) } - logtrace.Info(ctx, "cascade registration completed successfully", fields) + logtrace.Debug(ctx, "cascade registration completed successfully", fields) return nil } @@ -225,7 +225,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS } ctx := stream.Context() - logtrace.Info(ctx, "download request received from client", fields) + logtrace.Debug(ctx, "download request received from client", fields) task := server.factory.NewCascadeRegistrationTask() @@ -254,7 +254,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS if err := task.CleanupDownload(ctx, tmpDir); err != nil { logtrace.Error(ctx, "error cleaning up the tmp dir", logtrace.Fields{logtrace.FieldError: err.Error()}) } else { - logtrace.Info(ctx, "tmp dir has been cleaned up", logtrace.Fields{"tmp_dir": tmpDir}) + logtrace.Debug(ctx, "tmp dir has been cleaned up", logtrace.Fields{"tmp_dir": tmpDir}) } } }() @@ -290,7 +290,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS logtrace.Error(ctx, "no artefact file retrieved", fields) return fmt.Errorf("no artefact to stream") } - logtrace.Info(ctx, "streaming artefact file in chunks", fields) + logtrace.Debug(ctx, "streaming artefact file in chunks", fields) // Open the restored file and stream directly from disk to avoid buffering entire file in memory f, err := os.Open(restoredFilePath) @@ -308,7 +308,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS // Calculate optimal chunk size based on file size chunkSize := calculateOptimalChunkSize(fi.Size()) - logtrace.Info(ctx, "calculated optimal chunk size for download", logtrace.Fields{ + logtrace.Debug(ctx, "calculated optimal chunk size for download", logtrace.Fields{ "file_size": fi.Size(), "chunk_size": chunkSize, }) @@ -347,7 +347,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS // If EOF after first read, we're done if readErr == io.EOF { - logtrace.Info(ctx, "completed streaming all chunks", fields) + logtrace.Debug(ctx, "completed streaming all chunks", fields) return nil } @@ -374,6 +374,6 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS // Cleanup is handled in deferred block above - logtrace.Info(ctx, "completed streaming all chunks", fields) + logtrace.Debug(ctx, "completed streaming all chunks", fields) return nil } diff --git a/supernode/node/supernode/gateway/server.go b/supernode/node/supernode/gateway/server.go index 5440a7f4..7e17e238 100644 --- a/supernode/node/supernode/gateway/server.go +++ b/supernode/node/supernode/gateway/server.go @@ -86,7 +86,7 @@ func (s *Server) Run(ctx context.Context) error { IdleTimeout: 60 * time.Second, } - logtrace.Info(ctx, "Starting HTTP gateway server", logtrace.Fields{ + logtrace.Debug(ctx, "Starting HTTP gateway server", logtrace.Fields{ "address": s.ipAddress, "port": s.port, }) @@ -105,7 +105,7 @@ func (s *Server) Stop(ctx context.Context) error { return nil } - logtrace.Info(ctx, "Shutting down HTTP gateway server", nil) + logtrace.Debug(ctx, "Shutting down HTTP gateway server", nil) return s.server.Shutdown(ctx) } diff --git a/supernode/node/supernode/server/server.go b/supernode/node/supernode/server/server.go index 37e8f4dd..774be094 100644 --- a/supernode/node/supernode/server/server.go +++ b/supernode/node/supernode/server/server.go @@ -48,8 +48,8 @@ func (server *Server) Run(ctx context.Context) error { // Set up gRPC logging logtrace.SetGRPCLogger() - logtrace.Info(ctx, "Server identity configured", logtrace.Fields{logtrace.FieldModule: "server", "identity": server.config.Identity}) - logtrace.Info(ctx, "Server listening", logtrace.Fields{logtrace.FieldModule: "server", "addresses": server.config.ListenAddresses}) + logtrace.Debug(ctx, "Server identity configured", logtrace.Fields{logtrace.FieldModule: "server", "identity": server.config.Identity}) + logtrace.Debug(ctx, "Server listening", logtrace.Fields{logtrace.FieldModule: "server", "addresses": server.config.ListenAddresses}) group, ctx := errgroup.WithContext(ctx) @@ -74,7 +74,7 @@ func (server *Server) Run(ctx context.Context) error { address := addr // Create a new variable to avoid closure issues group.Go(func() error { - logtrace.Info(ctx, "Starting gRPC server", logtrace.Fields{logtrace.FieldModule: "server", "address": address}) + logtrace.Debug(ctx, "Starting gRPC server", logtrace.Fields{logtrace.FieldModule: "server", "address": address}) return server.grpcServer.Serve(ctx, address, opts) }) } diff --git a/supernode/services/cascade/adaptors/p2p.go b/supernode/services/cascade/adaptors/p2p.go index a29e2b99..3621167e 100644 --- a/supernode/services/cascade/adaptors/p2p.go +++ b/supernode/services/cascade/adaptors/p2p.go @@ -39,14 +39,14 @@ type P2PService interface { // p2pImpl is the default implementation of the P2PService interface. type p2pImpl struct { - p2p p2p.Client - rqStore rqstore.Store - metricsDisabled bool + p2p p2p.Client + rqStore rqstore.Store + metricsDisabled bool } // NewP2PService returns a concrete implementation of P2PService. func NewP2PService(client p2p.Client, store rqstore.Store, metricsDisabled bool) P2PService { - return &p2pImpl{p2p: client, rqStore: store, metricsDisabled: metricsDisabled} + return &p2pImpl{p2p: client, rqStore: store, metricsDisabled: metricsDisabled} } type StoreArtefactsRequest struct { @@ -57,13 +57,13 @@ type StoreArtefactsRequest struct { } func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, f logtrace.Fields) error { - logtrace.Info(ctx, "About to store artefacts (metadata + symbols)", logtrace.Fields{"taskID": req.TaskID, "id_files": len(req.IDFiles)}) + logtrace.Debug(ctx, "About to store artefacts (metadata + symbols)", logtrace.Fields{"taskID": req.TaskID, "id_files": len(req.IDFiles)}) - // Optionally enable per-node store RPC capture for this task - if !p.metricsDisabled { - cm.StartStoreCapture(req.TaskID) - defer cm.StopStoreCapture(req.TaskID) - } + // Optionally enable per-node store RPC capture for this task + if !p.metricsDisabled { + cm.StartStoreCapture(req.TaskID) + defer cm.StopStoreCapture(req.TaskID) + } start := time.Now() firstPassSymbols, totalSymbols, err := p.storeCascadeSymbolsAndData(ctx, req.TaskID, req.ActionID, req.SymbolsDir, req.IDFiles) @@ -71,7 +71,7 @@ func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, return errors.Wrap(err, "error storing artefacts") } dur := time.Since(start).Milliseconds() - logtrace.Info(ctx, "artefacts have been stored", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total": totalSymbols, "id_files_count": len(req.IDFiles)}) + logtrace.Debug(ctx, "artefacts have been stored", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total": totalSymbols, "id_files_count": len(req.IDFiles)}) // Record store summary for later event emission cm.SetStoreSummary(req.TaskID, firstPassSymbols, totalSymbols, len(req.IDFiles), dur) return nil @@ -101,7 +101,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action if targetCount < 1 && totalAvailable > 0 { targetCount = 1 } - logtrace.Info(ctx, "first-pass target coverage (symbols)", logtrace.Fields{ + logtrace.Debug(ctx, "first-pass target coverage (symbols)", logtrace.Fields{ "total_symbols": totalAvailable, "target_percent": storeSymbolsPercent, "target_count": targetCount, @@ -117,7 +117,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action sort.Strings(keys) // deterministic order inside the sample } - logtrace.Info(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) + logtrace.Debug(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) /* stream in fixed-size batches -------------------------------------- */ @@ -188,7 +188,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action if totalAvailable > 0 { achievedPct = (float64(totalSymbols) / float64(totalAvailable)) * 100.0 } - logtrace.Info(ctx, "first-pass achieved coverage (symbols)", + logtrace.Debug(ctx, "first-pass achieved coverage (symbols)", logtrace.Fields{"achieved_symbols": totalSymbols, "achieved_percent": achievedPct}) if err := p.rqStore.UpdateIsFirstBatchStored(actionID); err != nil { @@ -228,7 +228,7 @@ func walkSymbolTree(root string) ([]string, error) { // storeSymbolsInP2P loads a batch of symbols and stores them via P2P. // Returns (ratePct, requests, count, error) where `count` is the number of symbols in this batch. func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fileKeys []string) (int, error) { - logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)}) + logtrace.Debug(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)}) symbols, err := utils.LoadSymbols(root, fileKeys) if err != nil { @@ -242,12 +242,12 @@ func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fi if err := c.p2p.StoreBatch(symCtx, symbols, storage.P2PDataRaptorQSymbol, taskID); err != nil { return len(symbols), fmt.Errorf("p2p store batch: %w", err) } - logtrace.Info(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)}) + logtrace.Debug(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)}) if err := utils.DeleteSymbols(ctx, root, fileKeys); err != nil { return len(symbols), fmt.Errorf("delete symbols: %w", err) } - logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)}) + logtrace.Debug(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)}) // No per-RPC metrics propagated from p2p return len(symbols), nil diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go index 9da3dc1e..8271fc51 100644 --- a/supernode/services/cascade/download.go +++ b/supernode/services/cascade/download.go @@ -39,7 +39,7 @@ func (task *CascadeRegistrationTask) Download( send func(resp *DownloadResponse) error, ) (err error) { fields := logtrace.Fields{logtrace.FieldMethod: "Download", logtrace.FieldRequest: req} - logtrace.Info(ctx, "Cascade download request received", fields) + logtrace.Debug(ctx, "Cascade download request received", fields) // Ensure task status is finalized regardless of outcome defer func() { @@ -56,7 +56,7 @@ func (task *CascadeRegistrationTask) Download( fields[logtrace.FieldError] = err return task.wrapErr(ctx, "failed to get action", err, fields) } - logtrace.Info(ctx, "Action retrieved", fields) + logtrace.Debug(ctx, "Action retrieved", fields) task.streamDownloadEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", "", send) if actionDetails.GetAction().State != actiontypes.ActionStateDone { @@ -65,14 +65,14 @@ func (task *CascadeRegistrationTask) Download( fields[logtrace.FieldActionState] = actionDetails.GetAction().State return task.wrapErr(ctx, "action not found", err, fields) } - logtrace.Info(ctx, "Action state validated", fields) + logtrace.Debug(ctx, "Action state validated", fields) metadata, err := task.decodeCascadeMetadata(ctx, actionDetails.GetAction().Metadata, fields) if err != nil { fields[logtrace.FieldError] = err.Error() return task.wrapErr(ctx, "error decoding cascade metadata", err, fields) } - logtrace.Info(ctx, "Cascade metadata decoded", fields) + logtrace.Debug(ctx, "Cascade metadata decoded", fields) task.streamDownloadEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", "", send) // Notify: network retrieval phase begins @@ -83,7 +83,7 @@ func (task *CascadeRegistrationTask) Download( fields[logtrace.FieldError] = err.Error() return task.wrapErr(ctx, "failed to download artifacts", err, fields) } - logtrace.Info(ctx, "File reconstructed and hash verified", fields) + logtrace.Debug(ctx, "File reconstructed and hash verified", fields) // Notify: decode completed, file ready on disk task.streamDownloadEvent(SupernodeEventTypeDecodeCompleted, "Decode completed", filePath, tmpDir, send) @@ -91,7 +91,7 @@ func (task *CascadeRegistrationTask) Download( } func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, actionID string, metadata actiontypes.CascadeMetadata, fields logtrace.Fields, send func(resp *DownloadResponse) error) (string, string, error) { - logtrace.Info(ctx, "started downloading the artifacts", fields) + logtrace.Debug(ctx, "started downloading the artifacts", fields) var ( layout codec.Layout @@ -109,7 +109,7 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti // Parse index file to get layout IDs indexData, err := task.parseIndexFile(indexFile) if err != nil { - logtrace.Info(ctx, "failed to parse index file", fields) + logtrace.Debug(ctx, "failed to parse index file", fields) continue } @@ -117,14 +117,14 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti var netMS, decMS int64 layout, netMS, decMS, layoutAttempts, err = task.retrieveLayoutFromIndex(ctx, indexData, fields) if err != nil { - logtrace.Info(ctx, "failed to retrieve layout from index", fields) + logtrace.Debug(ctx, "failed to retrieve layout from index", fields) continue } layoutFetchMS = netMS layoutDecodeMS = decMS if len(layout.Blocks) > 0 { - logtrace.Info(ctx, "layout file retrieved via index", fields) + logtrace.Debug(ctx, "layout file retrieved via index", fields) break } } @@ -163,13 +163,12 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( if targetRequiredCount < 1 && totalSymbols > 0 { targetRequiredCount = 1 } - logtrace.Info(ctx, "Retrieving target-required symbols for decode", fields) + logtrace.Debug(ctx, "Retrieving target-required symbols for decode", fields) - - if !task.config.MetricsDisabled { - cm.StartRetrieveCapture(actionID) - defer cm.StopRetrieveCapture(actionID) - } + if !task.config.MetricsDisabled { + cm.StartRetrieveCapture(actionID) + defer cm.StopRetrieveCapture(actionID) + } // Measure symbols batch retrieve duration retrieveStart := time.Now() @@ -203,22 +202,22 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( } decodeMS := time.Since(decodeStart).Milliseconds() - // Set minimal retrieve summary and emit event strictly from internal collector - if !task.config.MetricsDisabled { - cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS) - payload := cm.BuildDownloadEventPayloadFromCollector(actionID) - if retrieve, ok := payload["retrieve"].(map[string]any); ok { - retrieve["target_required_percent"] = targetRequiredPercent - retrieve["target_required_count"] = targetRequiredCount - retrieve["total_symbols"] = totalSymbols - } - if b, err := json.MarshalIndent(payload, "", " "); err == nil { - task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send) - } - } else { - // Send minimal hardcoded event when metrics disabled - task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, "Download completed (metrics disabled)", "", "", send) - } + // Set minimal retrieve summary and emit event strictly from internal collector + if !task.config.MetricsDisabled { + cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS) + payload := cm.BuildDownloadEventPayloadFromCollector(actionID) + if retrieve, ok := payload["retrieve"].(map[string]any); ok { + retrieve["target_required_percent"] = targetRequiredPercent + retrieve["target_required_count"] = targetRequiredCount + retrieve["total_symbols"] = totalSymbols + } + if b, err := json.MarshalIndent(payload, "", " "); err == nil { + task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send) + } + } else { + // Send minimal hardcoded event when metrics disabled + task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, "Download completed (metrics disabled)", "", "", send) + } fileHash, err := crypto.HashFileIncrementally(decodeInfo.FilePath, 0) if err != nil { @@ -238,7 +237,7 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( fields[logtrace.FieldError] = err.Error() return "", decodeInfo.DecodeTmpDir, err } - logtrace.Info(ctx, "File successfully restored and hash verified", fields) + logtrace.Debug(ctx, "File successfully restored and hash verified", fields) return decodeInfo.FilePath, decodeInfo.DecodeTmpDir, nil } diff --git a/supernode/services/cascade/helper.go b/supernode/services/cascade/helper.go index fb8c7ef5..b22ec14a 100644 --- a/supernode/services/cascade/helper.go +++ b/supernode/services/cascade/helper.go @@ -14,9 +14,9 @@ import ( "github.com/LumeraProtocol/supernode/v2/pkg/errors" "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" "github.com/LumeraProtocol/supernode/v2/pkg/lumera/modules/supernode" + cm "github.com/LumeraProtocol/supernode/v2/pkg/p2pmetrics" "github.com/LumeraProtocol/supernode/v2/pkg/utils" "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors" - cm "github.com/LumeraProtocol/supernode/v2/pkg/p2pmetrics" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/golang/protobuf/proto" @@ -36,7 +36,7 @@ func (task *CascadeRegistrationTask) fetchAction(ctx context.Context, actionID s if res.GetAction().ActionID == "" { return nil, task.wrapErr(ctx, "action not found", errors.New(""), f) } - logtrace.Info(ctx, "action has been retrieved", f) + logtrace.Debug(ctx, "action has been retrieved", f) return res.GetAction(), nil } @@ -46,7 +46,7 @@ func (task *CascadeRegistrationTask) ensureIsTopSupernode(ctx context.Context, b if err != nil { return task.wrapErr(ctx, "failed to get top SNs", err, f) } - logtrace.Info(ctx, "Fetched Top Supernodes", f) + logtrace.Debug(ctx, "Fetched Top Supernodes", f) if !supernode.Exists(top.Supernodes, task.config.SupernodeAccountAddress) { // Build information about supernodes for better error context @@ -54,7 +54,7 @@ func (task *CascadeRegistrationTask) ensureIsTopSupernode(ctx context.Context, b for i, sn := range top.Supernodes { addresses[i] = sn.SupernodeAccount } - logtrace.Info(ctx, "Supernode not in top list", logtrace.Fields{ + logtrace.Debug(ctx, "Supernode not in top list", logtrace.Fields{ "currentAddress": task.config.SupernodeAccountAddress, "topSupernodes": addresses, }) @@ -78,7 +78,7 @@ func (task *CascadeRegistrationTask) verifyDataHash(ctx context.Context, dh []by if string(b64) != expected { return task.wrapErr(ctx, "data hash doesn't match", errors.New(""), f) } - logtrace.Info(ctx, "request data-hash has been matched with the action data-hash", f) + logtrace.Debug(ctx, "request data-hash has been matched with the action data-hash", f) return nil } @@ -110,7 +110,7 @@ func (task *CascadeRegistrationTask) verifySignatureAndDecodeLayout(ctx context. if err := task.LumeraClient.Verify(ctx, creator, []byte(indexFileB64), creatorSigBytes); err != nil { return codec.Layout{}, "", task.wrapErr(ctx, "failed to verify creator signature", err, f) } - logtrace.Info(ctx, "creator signature successfully verified", f) + logtrace.Debug(ctx, "creator signature successfully verified", f) // Decode index file to get the layout signature indexFile, err := decodeIndexFile(indexFileB64) @@ -132,7 +132,7 @@ func (task *CascadeRegistrationTask) verifySignatureAndDecodeLayout(ctx context. if err := task.LumeraClient.Verify(ctx, creator, layoutB64, layoutSigBytes); err != nil { return codec.Layout{}, "", task.wrapErr(ctx, "failed to verify layout signature", err, f) } - logtrace.Info(ctx, "layout signature successfully verified", f) + logtrace.Debug(ctx, "layout signature successfully verified", f) return encodedMeta, indexFile.LayoutSignature, nil } @@ -199,22 +199,22 @@ func (task *CascadeRegistrationTask) wrapErr(ctx context.Context, msg string, er // emitArtefactsStored builds a single-line metrics summary and emits the // SupernodeEventTypeArtefactsStored event while logging the metrics line. func (task *CascadeRegistrationTask) emitArtefactsStored( - ctx context.Context, - fields logtrace.Fields, - _ codec.Layout, - send func(resp *RegisterResponse) error, + ctx context.Context, + fields logtrace.Fields, + _ codec.Layout, + send func(resp *RegisterResponse) error, ) { if fields == nil { fields = logtrace.Fields{} } - // Build payload strictly from internal collector (no P2P snapshots) - payload := cm.BuildStoreEventPayloadFromCollector(task.ID()) + // Build payload strictly from internal collector (no P2P snapshots) + payload := cm.BuildStoreEventPayloadFromCollector(task.ID()) b, _ := json.MarshalIndent(payload, "", " ") msg := string(b) fields["metrics_json"] = msg - logtrace.Info(ctx, "artefacts have been stored", fields) + logtrace.Debug(ctx, "artefacts have been stored", fields) task.streamEvent(SupernodeEventTypeArtefactsStored, msg, "", send) // No central state to clear; adaptor returns calls inline } @@ -279,7 +279,7 @@ func (task *CascadeRegistrationTask) verifyActionFee(ctx context.Context, action requiredFee := sdk.NewCoin("ulume", math.NewInt(amount)) // Log the calculated fee - logtrace.Info(ctx, "calculated required fee", logtrace.Fields{ + logtrace.Debug(ctx, "calculated required fee", logtrace.Fields{ "fee": requiredFee.String(), "dataBytes": dataSize, }) @@ -377,6 +377,6 @@ func (task *CascadeRegistrationTask) VerifyDownloadSignature(ctx context.Context return task.wrapErr(ctx, "failed to verify download signature", err, fields) } - logtrace.Info(ctx, "download signature successfully verified", fields) + logtrace.Debug(ctx, "download signature successfully verified", fields) return nil } diff --git a/supernode/services/cascade/register.go b/supernode/services/cascade/register.go index 4739e0d9..4b456734 100644 --- a/supernode/services/cascade/register.go +++ b/supernode/services/cascade/register.go @@ -1,11 +1,11 @@ package cascade import ( - "context" - "os" + "context" + "os" - "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" - "github.com/LumeraProtocol/supernode/v2/supernode/services/common" + "github.com/LumeraProtocol/supernode/v2/pkg/logtrace" + "github.com/LumeraProtocol/supernode/v2/supernode/services/common" ) // RegisterRequest contains parameters for upload request @@ -46,7 +46,7 @@ func (task *CascadeRegistrationTask) Register( ) (err error) { fields := logtrace.Fields{logtrace.FieldMethod: "Register", logtrace.FieldRequest: req} - logtrace.Info(ctx, "Cascade registration request received", fields) + logtrace.Debug(ctx, "Cascade registration request received", fields) // Ensure task status and resources are finalized regardless of outcome defer func() { @@ -64,7 +64,7 @@ func (task *CascadeRegistrationTask) Register( if remErr := os.RemoveAll(req.FilePath); remErr != nil { logtrace.Warn(ctx, "Failed to remove uploaded file", fields) } else { - logtrace.Info(ctx, "Uploaded file cleaned up", fields) + logtrace.Debug(ctx, "Uploaded file cleaned up", fields) } } }() @@ -78,14 +78,14 @@ func (task *CascadeRegistrationTask) Register( fields[logtrace.FieldCreator] = action.Creator fields[logtrace.FieldStatus] = action.State fields[logtrace.FieldPrice] = action.Price - logtrace.Info(ctx, "Action retrieved", fields) + logtrace.Debug(ctx, "Action retrieved", fields) task.streamEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", send) /* 2. Verify action fee -------------------------------------------------------- */ if err := task.verifyActionFee(ctx, action, req.DataSize, fields); err != nil { return err } - logtrace.Info(ctx, "Action fee verified", fields) + logtrace.Debug(ctx, "Action fee verified", fields) task.streamEvent(SupernodeEventTypeActionFeeVerified, "Action fee verified", "", send) /* 3. Ensure this super-node is eligible -------------------------------------- */ @@ -93,7 +93,7 @@ func (task *CascadeRegistrationTask) Register( if err := task.ensureIsTopSupernode(ctx, uint64(action.BlockHeight), fields); err != nil { return err } - logtrace.Info(ctx, "Top supernode eligibility confirmed", fields) + logtrace.Debug(ctx, "Top supernode eligibility confirmed", fields) task.streamEvent(SupernodeEventTypeTopSupernodeCheckPassed, "Top supernode eligibility confirmed", "", send) /* 4. Decode cascade metadata -------------------------------------------------- */ @@ -101,14 +101,14 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Info(ctx, "Cascade metadata decoded", fields) + logtrace.Debug(ctx, "Cascade metadata decoded", fields) task.streamEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", send) /* 5. Verify data hash --------------------------------------------------------- */ if err := task.verifyDataHash(ctx, req.DataHash, cascadeMeta.DataHash, fields); err != nil { return err } - logtrace.Info(ctx, "Data hash verified", fields) + logtrace.Debug(ctx, "Data hash verified", fields) task.streamEvent(SupernodeEventTypeDataHashVerified, "Data hash verified", "", send) /* 6. Encode the raw data ------------------------------------------------------ */ @@ -116,7 +116,7 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Info(ctx, "Input encoded", fields) + logtrace.Debug(ctx, "Input encoded", fields) task.streamEvent(SupernodeEventTypeInputEncoded, "Input encoded", "", send) /* 7. Signature verification + layout decode ---------------------------------- */ @@ -126,7 +126,7 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Info(ctx, "Signature verified", fields) + logtrace.Debug(ctx, "Signature verified", fields) task.streamEvent(SupernodeEventTypeSignatureVerified, "Signature verified", "", send) /* 8. Generate RQ-ID files ----------------------------------------------------- */ @@ -134,48 +134,48 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Info(ctx, "RQID files generated", fields) + logtrace.Debug(ctx, "RQID files generated", fields) task.streamEvent(SupernodeEventTypeRQIDsGenerated, "RQID files generated", "", send) /* 9. Consistency checks ------------------------------------------------------- */ if err := verifyIDs(layout, encResp.Metadata); err != nil { return task.wrapErr(ctx, "failed to verify IDs", err, fields) } - logtrace.Info(ctx, "RQIDs verified", fields) + logtrace.Debug(ctx, "RQIDs verified", fields) task.streamEvent(SupernodeEventTypeRqIDsVerified, "RQIDs verified", "", send) /* 10. Simulate finalize to avoid storing artefacts if it would fail ---------- */ if _, err := task.LumeraClient.SimulateFinalizeAction(ctx, action.ActionID, rqidResp.RQIDs); err != nil { fields[logtrace.FieldError] = err.Error() - logtrace.Info(ctx, "Finalize simulation failed", fields) + logtrace.Debug(ctx, "Finalize simulation failed", fields) // Emit explicit simulation failure event for client visibility task.streamEvent(SupernodeEventTypeFinalizeSimulationFailed, "Finalize simulation failed", "", send) return task.wrapErr(ctx, "finalize action simulation failed", err, fields) } - logtrace.Info(ctx, "Finalize simulation passed", fields) + logtrace.Debug(ctx, "Finalize simulation passed", fields) // Transmit as a standard event so SDK can propagate it (dedicated type) task.streamEvent(SupernodeEventTypeFinalizeSimulated, "Finalize simulation passed", "", send) /* 11. Persist artefacts -------------------------------------------------------- */ - // Persist artefacts to the P2P network. P2P interfaces return error only; - // metrics are summarized at the cascade layer and emitted via event. + // Persist artefacts to the P2P network. P2P interfaces return error only; + // metrics are summarized at the cascade layer and emitted via event. if err := task.storeArtefacts(ctx, action.ActionID, rqidResp.RedundantMetadataFiles, encResp.SymbolsDir, fields); err != nil { return err } - // Emit compact analytics payload from centralized metrics collector (optional) - if !task.config.MetricsDisabled { - task.emitArtefactsStored(ctx, fields, encResp.Metadata, send) - } + // Emit compact analytics payload from centralized metrics collector (optional) + if !task.config.MetricsDisabled { + task.emitArtefactsStored(ctx, fields, encResp.Metadata, send) + } resp, err := task.LumeraClient.FinalizeAction(ctx, action.ActionID, rqidResp.RQIDs) if err != nil { fields[logtrace.FieldError] = err.Error() - logtrace.Info(ctx, "Finalize action error", fields) + logtrace.Debug(ctx, "Finalize action error", fields) return task.wrapErr(ctx, "failed to finalize action", err, fields) } txHash := resp.TxResponse.TxHash fields[logtrace.FieldTxHash] = txHash - logtrace.Info(ctx, "Action finalized", fields) + logtrace.Debug(ctx, "Action finalized", fields) task.streamEvent(SupernodeEventTypeActionFinalized, "Action finalized", txHash, send) return nil diff --git a/supernode/services/common/base/supernode_service.go b/supernode/services/common/base/supernode_service.go index 1d41715b..424556b0 100644 --- a/supernode/services/common/base/supernode_service.go +++ b/supernode/services/common/base/supernode_service.go @@ -52,7 +52,7 @@ func (service *SuperNodeService) RunHelper(ctx context.Context, nodeID string, p service.Worker = task.NewWorker() logtrace.Error(ctx, "Service run failed, retrying", logtrace.Fields{logtrace.FieldModule: "supernode", logtrace.FieldError: err.Error()}) } else { - logtrace.Info(ctx, "Service run completed successfully - closing sn services", logtrace.Fields{logtrace.FieldModule: "supernode"}) + logtrace.Debug(ctx, "Service run completed successfully - closing sn services", logtrace.Fields{logtrace.FieldModule: "supernode"}) return nil } } diff --git a/supernode/services/common/base/supernode_task.go b/supernode/services/common/base/supernode_task.go index 937e6013..2908558d 100644 --- a/supernode/services/common/base/supernode_task.go +++ b/supernode/services/common/base/supernode_task.go @@ -25,7 +25,7 @@ type SuperNodeTask struct { func (task *SuperNodeTask) RunHelper(ctx context.Context, clean TaskCleanerFunc) error { ctx = task.context(ctx) logtrace.Debug(ctx, "Start task", logtrace.Fields{}) - defer logtrace.Info(ctx, "Task canceled", logtrace.Fields{}) + defer logtrace.Debug(ctx, "Task canceled", logtrace.Fields{}) defer task.Cancel() task.SetStatusNotifyFunc(func(status *state.Status) { diff --git a/supernode/services/common/storage/handler.go b/supernode/services/common/storage/handler.go index 210dab0f..3967fe2d 100644 --- a/supernode/services/common/storage/handler.go +++ b/supernode/services/common/storage/handler.go @@ -74,7 +74,7 @@ func (h *StorageHandler) StoreBatch(ctx context.Context, list [][]byte, typ int) taskID = fmt.Sprintf("%v", val) } - logtrace.Info(ctx, "task_id in storeList", logtrace.Fields{logtrace.FieldTaskID: taskID}) + logtrace.Debug(ctx, "task_id in storeList", logtrace.Fields{logtrace.FieldTaskID: taskID}) // Add taskID to context for metrics ctx = p2pmetrics.WithTaskID(ctx, taskID) return h.P2PClient.StoreBatch(ctx, list, typ, taskID) @@ -110,7 +110,7 @@ func (h *StorageHandler) StoreRaptorQSymbolsIntoP2P(ctx context.Context, taskID, sort.Strings(keys) // deterministic order inside the sample } - logtrace.Info(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) + logtrace.Debug(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) /* stream in fixed-size batches -------------------------------------- */ for start := 0; start < len(keys); { @@ -128,7 +128,7 @@ func (h *StorageHandler) StoreRaptorQSymbolsIntoP2P(ctx context.Context, taskID, return fmt.Errorf("update first-batch flag: %w", err) } - logtrace.Info(ctx, "finished storing RaptorQ symbols", logtrace.Fields{"curr-time": time.Now().UTC(), "count": len(keys)}) + logtrace.Debug(ctx, "finished storing RaptorQ symbols", logtrace.Fields{"curr-time": time.Now().UTC(), "count": len(keys)}) return nil } @@ -160,7 +160,7 @@ func walkSymbolTree(root string) ([]string, error) { } func (h *StorageHandler) storeSymbolsInP2P(ctx context.Context, taskID, root string, fileKeys []string) error { - logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)}) + logtrace.Debug(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)}) symbols, err := utils.LoadSymbols(root, fileKeys) if err != nil { @@ -173,13 +173,13 @@ func (h *StorageHandler) storeSymbolsInP2P(ctx context.Context, taskID, root str return fmt.Errorf("p2p store batch: %w", err) } - logtrace.Info(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)}) + logtrace.Debug(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)}) if err := utils.DeleteSymbols(ctx, root, fileKeys); err != nil { return fmt.Errorf("delete symbols: %w", err) } - logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)}) + logtrace.Debug(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)}) return nil } diff --git a/supernode/services/common/supernode/service.go b/supernode/services/common/supernode/service.go index 13d5efe4..81bac456 100644 --- a/supernode/services/common/supernode/service.go +++ b/supernode/services/common/supernode/service.go @@ -54,7 +54,7 @@ func (s *SupernodeStatusService) GetStatus(ctx context.Context, includeP2PMetric logtrace.FieldMethod: "GetStatus", logtrace.FieldModule: "SupernodeStatusService", } - logtrace.Info(ctx, "status request received", fields) + logtrace.Debug(ctx, "status request received", fields) var resp StatusResponse resp.Version = Version diff --git a/supernode/services/verifier/verifier.go b/supernode/services/verifier/verifier.go index 867bd966..68a2ae77 100644 --- a/supernode/services/verifier/verifier.go +++ b/supernode/services/verifier/verifier.go @@ -75,7 +75,7 @@ func (cv *ConfigVerifier) VerifyConfig(ctx context.Context) (*VerificationResult // Check 5: Verify all required ports are available cv.checkPortsAvailable(result) - logtrace.Info(ctx, "Config verification completed", logtrace.Fields{ + logtrace.Debug(ctx, "Config verification completed", logtrace.Fields{ "valid": result.IsValid(), "errors": len(result.Errors), "warnings": len(result.Warnings), From 6bd2b9c381241772d857d9fed7baad9f4063eedb Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 30 Sep 2025 08:57:51 +0500 Subject: [PATCH 12/15] high-signal logs --- p2p/kademlia/dht.go | 114 ++++++++++++--------- p2p/kademlia/fetch_and_store.go | 3 + p2p/kademlia/network.go | 12 +-- p2p/kademlia/rq_symbols.go | 27 ++++- supernode/services/cascade/adaptors/p2p.go | 53 ++++++++-- supernode/services/cascade/download.go | 56 +++++++--- supernode/services/cascade/helper.go | 32 ++++-- supernode/services/cascade/register.go | 24 ++--- 8 files changed, 220 insertions(+), 101 deletions(-) diff --git a/p2p/kademlia/dht.go b/p2p/kademlia/dht.go index df2be5c7..d0022274 100644 --- a/p2p/kademlia/dht.go +++ b/p2p/kademlia/dht.go @@ -362,7 +362,7 @@ func (s *DHT) Store(ctx context.Context, data []byte, typ int) (string, error) { // measured success rate for node RPCs is below the configured minimum, an error // is returned. Metrics are not returned through the API. func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID string) error { - logtrace.Debug(ctx, "Store DB batch begin", logtrace.Fields{ + logtrace.Info(ctx, "DHT StoreBatch begin", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, "records": len(values), @@ -370,7 +370,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s if err := s.store.StoreBatch(ctx, values, typ, true); err != nil { return fmt.Errorf("store batch: %v", err) } - logtrace.Debug(ctx, "Store DB batch done, store network batch begin", logtrace.Fields{ + logtrace.Info(ctx, "DHT StoreBatch: local stored; network begin", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, }) @@ -380,7 +380,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s return fmt.Errorf("iterate batch store: %v", err) } - logtrace.Debug(ctx, "Store network batch workers done", logtrace.Fields{ + logtrace.Info(ctx, "DHT StoreBatch: network done", logtrace.Fields{ logtrace.FieldModule: "dht", logtrace.FieldTaskID: taskID, }) @@ -391,6 +391,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s // Retrieve data from the networking using key. Key is the base58 encoded // identifier of the data. func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]byte, error) { + start := time.Now() decoded := base58.Decode(key) if len(decoded) != B/8 { return nil, fmt.Errorf("invalid key: %v", key) @@ -406,6 +407,7 @@ func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]by // retrieve the key/value from queries storage value, err := s.store.Retrieve(ctx, decoded) if err == nil && len(value) > 0 { + logtrace.Info(ctx, "DHT Retrieve local hit", logtrace.Fields{"key": hex.EncodeToString(decoded), "ms": time.Since(start).Milliseconds()}) return value, nil } else if err != nil { logtrace.Error(ctx, "Error retrieving key from local storage", logtrace.Fields{ @@ -421,20 +423,23 @@ func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]by } // if not found locally, iterative find value from kademlia network + logtrace.Info(ctx, "DHT Retrieve network lookup", logtrace.Fields{"key": dbKey}) peerValue, err := s.iterate(ctx, IterateFindValue, decoded, nil, 0) if err != nil { return nil, errors.Errorf("retrieve from peer: %w", err) } if len(peerValue) > 0 { - logtrace.Debug(ctx, "Not found locally, retrieved from other nodes", logtrace.Fields{ + logtrace.Info(ctx, "DHT Retrieve network hit", logtrace.Fields{ logtrace.FieldModule: "dht", "key": dbKey, "data_len": len(peerValue), + "ms": time.Since(start).Milliseconds(), }) } else { - logtrace.Debug(ctx, "Not found locally, not found in other nodes", logtrace.Fields{ + logtrace.Info(ctx, "DHT Retrieve miss", logtrace.Fields{ logtrace.FieldModule: "dht", "key": dbKey, + "ms": time.Since(start).Milliseconds(), }) } @@ -529,6 +534,8 @@ func (s *DHT) GetValueFromNode(ctx context.Context, target []byte, n *Node) ([]b cctx, ccancel := context.WithTimeout(ctx, time.Second*5) defer ccancel() + // Minimal per-RPC visibility + logtrace.Info(ctx, "RPC FindValue send", logtrace.Fields{"node": n.String(), "key": hex.EncodeToString(target)}) response, err := s.network.Call(cctx, request, false) if err != nil { logtrace.Debug(ctx, "Network call request failed", logtrace.Fields{ @@ -538,6 +545,7 @@ func (s *DHT) GetValueFromNode(ctx context.Context, target []byte, n *Node) ([]b }) return nil, fmt.Errorf("network call request %s failed: %w", request.String(), err) } + logtrace.Info(ctx, "RPC FindValue completed", logtrace.Fields{"node": n.String()}) v, ok := response.Data.(*FindValueResponse) if ok && v.Status.Result == ResultOk && len(v.Value) > 0 { @@ -573,7 +581,7 @@ func (s *DHT) doMultiWorkers(ctx context.Context, iterativeType int, target []by // update the running goroutines number++ - logtrace.Debug(ctx, "Start work for node", logtrace.Fields{ + logtrace.Info(ctx, "Start work for node", logtrace.Fields{ logtrace.FieldModule: "p2p", "iterate_type": iterativeType, "node": node.String(), @@ -597,18 +605,35 @@ func (s *DHT) doMultiWorkers(ctx context.Context, iterativeType int, target []by // new a request message request := s.newMessage(messageType, receiver, data) + // Minimal per-RPC visibility + op := "" + switch messageType { + case FindNode: + op = "FindNode" + case FindValue: + op = "FindValue" + default: + op = "RPC" + } + fields := logtrace.Fields{"node": receiver.String()} + if messageType == FindValue { + fields["key"] = hex.EncodeToString(target) + } + logtrace.Info(ctx, "RPC "+op+" send", fields) // send the request and receive the response response, err := s.network.Call(ctx, request, false) if err != nil { - logtrace.Debug(ctx, "Network call request failed", logtrace.Fields{ + logtrace.Info(ctx, "Iterate worker RPC failed", logtrace.Fields{ logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "request": request.String(), + "node": receiver.String(), }) // node is unreachable, remove the node //removedNodes = append(removedNodes, receiver) return } + logtrace.Info(ctx, "RPC "+op+" completed", logtrace.Fields{"node": receiver.String()}) // send the response to message channel responses <- response @@ -637,7 +662,7 @@ func (s *DHT) fetchAndAddLocalKeys(ctx context.Context, hexKeys []string, result batchHexKeys := hexKeys[start:end] - logtrace.Debug(ctx, "Processing batch of local keys", logtrace.Fields{ + logtrace.Info(ctx, "Processing batch of local keys", logtrace.Fields{ logtrace.FieldModule: "dht", "batch_size": len(batchHexKeys), "total_keys": len(hexKeys), @@ -671,6 +696,7 @@ func (s *DHT) fetchAndAddLocalKeys(ctx context.Context, hexKeys []string, result func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, txID string, localOnly ...bool) (result map[string][]byte, err error) { start := time.Now() + logtrace.Info(ctx, "DHT BatchRetrieve begin", logtrace.Fields{"txid": txID, "keys": len(keys), "required": required}) result = make(map[string][]byte) var resMap sync.Map var foundLocalCount int32 @@ -791,6 +817,7 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, wg.Wait() netFound := int(atomic.LoadInt32(&networkFound)) + logtrace.Info(ctx, "DHT BatchRetrieve complete", logtrace.Fields{"txid": txID, "found_local": foundLocalCount, "found_network": netFound, "required": required, "ms": time.Since(start).Milliseconds()}) // Record batch retrieve stats for internal DHT snapshot window s.metrics.RecordBatchRetrieve(len(keys), int(required), int(foundLocalCount), netFound, time.Since(start)) // Also feed retrieve counts into the per-task collector for stream events @@ -1001,7 +1028,7 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, wg.Wait() - logtrace.Debug(ctx, "Iterate batch get values done", logtrace.Fields{ + logtrace.Info(ctx, "Iterate batch get values done", logtrace.Fields{ logtrace.FieldModule: "dht", "found_count": atomic.LoadInt32(&foundCount), }) @@ -1045,10 +1072,12 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node, func (s *DHT) doBatchGetValuesCall(ctx context.Context, node *Node, requestKeys map[string]KeyValWithClosest) (map[string]KeyValWithClosest, error) { request := s.newMessage(BatchGetValues, node, &BatchGetValuesRequest{Data: requestKeys}) + logtrace.Info(ctx, "RPC BatchGetValues send", logtrace.Fields{"node": node.String(), "keys": len(requestKeys)}) response, err := s.network.Call(ctx, request, false) if err != nil { return nil, fmt.Errorf("network call request %s failed: %w", request.String(), err) } + logtrace.Info(ctx, "RPC BatchGetValues completed", logtrace.Fields{"node": node.String()}) resp, ok := response.Data.(*BatchGetValuesResponse) if !ok { @@ -1083,7 +1112,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // find the closest contacts for the target node from queries route tables nl, _ := s.ht.closestContacts(Alpha, target, igList) if len(igList) > 0 { - logtrace.Debug(ctx, "Closest contacts", logtrace.Fields{ + logtrace.Info(ctx, "Closest contacts", logtrace.Fields{ logtrace.FieldModule: "p2p", "nodes": nl.String(), "ignored": s.ignorelist.String(), @@ -1093,7 +1122,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat if nl.Len() == 0 { return nil, nil } - logtrace.Debug(ctx, "Iterate start", logtrace.Fields{ + logtrace.Info(ctx, "Iterate start", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "type": iterativeType, @@ -1107,7 +1136,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat if iterativeType == IterateFindNode { hashedTargetID, _ := utils.Blake3Hash(target) bucket := s.ht.bucketIndex(s.ht.self.HashedID, hashedTargetID) - logtrace.Debug(ctx, "Bucket for target", logtrace.Fields{ + logtrace.Info(ctx, "Bucket for target", logtrace.Fields{ logtrace.FieldModule: "p2p", "target": sKey, }) @@ -1131,7 +1160,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // Set a maximum number of iterations to prevent indefinite looping maxIterations := 5 // Adjust the maximum iterations as needed - logtrace.Debug(ctx, "Begin iteration", logtrace.Fields{ + logtrace.Info(ctx, "Begin iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1142,7 +1171,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat case <-ctx.Done(): return nil, fmt.Errorf("iterate cancelled: %w", ctx.Err()) case <-timeout: - logtrace.Debug(ctx, "Iteration timed out", logtrace.Fields{ + logtrace.Info(ctx, "Iteration timed out", logtrace.Fields{ logtrace.FieldModule: "p2p", }) return nil, nil @@ -1165,7 +1194,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat } default: - logtrace.Error(ctx, "Unknown message type", logtrace.Fields{ + logtrace.Info(ctx, "Unknown message type", logtrace.Fields{ logtrace.FieldModule: "dht", "type": response.MessageType, }) @@ -1174,7 +1203,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat // Stop search if no more nodes to contact if !searchRest && len(nl.Nodes) == 0 { - logtrace.Debug(ctx, "Search stopped", logtrace.Fields{ + logtrace.Info(ctx, "Search stopped", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1186,7 +1215,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat nl.Comparator = target nl.Sort() - logtrace.Debug(ctx, "Iterate sorted nodes", logtrace.Fields{ + logtrace.Info(ctx, "Iterate sorted nodes", logtrace.Fields{ logtrace.FieldModule: "p2p", "id": base58.Encode(s.ht.self.ID), "iterate": iterativeType, @@ -1223,7 +1252,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat } } - logtrace.Debug(ctx, "Finish iteration without results", logtrace.Fields{ + logtrace.Info(ctx, "Finish iteration without results", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1244,7 +1273,7 @@ func (s *DHT) handleResponses(ctx context.Context, responses <-chan *Message, nl v, ok := response.Data.(*FindValueResponse) if ok { if v.Status.Result == ResultOk && len(v.Value) > 0 { - logtrace.Debug(ctx, "Iterate found value from network", logtrace.Fields{ + logtrace.Info(ctx, "Iterate found value from network", logtrace.Fields{ logtrace.FieldModule: "p2p", }) return nl, v.Value @@ -1274,7 +1303,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] // nl will have the closest nodes to the target value, it will ignore the nodes in igList nl, _ := s.ht.closestContacts(Alpha, target, igList) if len(igList) > 0 { - logtrace.Debug(ctx, "Closest contacts", logtrace.Fields{ + logtrace.Info(ctx, "Closest contacts", logtrace.Fields{ logtrace.FieldModule: "p2p", "nodes": nl.String(), "ignored": s.ignorelist.String(), @@ -1289,7 +1318,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] searchRest := false // keep track of contacted nodes so that we don't hit them again contacted := make(map[string]bool) - logtrace.Debug(ctx, "Begin iteration", logtrace.Fields{ + logtrace.Info(ctx, "Begin iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1298,7 +1327,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] var closestNode *Node var iterationCount int for iterationCount = 0; iterationCount < maxIterations; iterationCount++ { - logtrace.Debug(ctx, "Begin find value", logtrace.Fields{ + logtrace.Info(ctx, "Begin find value", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "nl": nl.Len(), @@ -1307,7 +1336,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] }) if nl.Len() == 0 { - logtrace.Error(ctx, "Nodes list length is 0", logtrace.Fields{ + logtrace.Info(ctx, "Nodes list length is 0", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1318,7 +1347,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] // if the closest node is the same as the last iteration and we don't want to search rest of nodes, we are done if !searchRest && (closestNode != nil && bytes.Equal(nl.Nodes[0].ID, closestNode.ID)) { - logtrace.Debug(ctx, "Closest node is the same as the last iteration", logtrace.Fields{ + logtrace.Info(ctx, "Closest node is the same as the last iteration", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1337,7 +1366,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] nl.Sort() - logtrace.Debug(ctx, "Iteration progress", logtrace.Fields{ + logtrace.Info(ctx, "Iteration progress", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1346,7 +1375,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target [] }) } - logtrace.Debug(ctx, "Finished iterations without results", logtrace.Fields{ + logtrace.Info(ctx, "Finished iterations without results", logtrace.Fields{ logtrace.FieldModule: "p2p", "task_id": taskID, "key": sKey, @@ -1559,7 +1588,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, skey, _ := utils.Blake3Hash(data) if finalStoreCount >= int32(Alpha) { - logtrace.Debug(ctx, "Store data to alpha nodes success", logtrace.Fields{ + logtrace.Info(ctx, "Store data to alpha nodes success", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": taskID, "len_total_nodes": nl.Len(), @@ -1569,7 +1598,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte, return nil } - logtrace.Debug(ctx, "Store data to alpha nodes failed", logtrace.Fields{ + logtrace.Info(ctx, "Store data to alpha nodes failed", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": taskID, "store_count": finalStoreCount, @@ -1656,7 +1685,7 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i knownNodes := make(map[string]*Node) hashes := make([][]byte, len(values)) - logtrace.Debug(ctx, "Iterate batch store begin", logtrace.Fields{ + logtrace.Info(ctx, "Iterate batch store begin", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "keys": len(values), @@ -1683,6 +1712,7 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i requests := 0 successful := 0 + logtrace.Info(ctx, "Iterate batch store: dispatching to nodes", logtrace.Fields{"task_id": id, "nodes": len(knownNodes)}) storeResponses := s.batchStoreNetwork(ctx, values, knownNodes, storageMap, typ) for response := range storeResponses { requests++ @@ -1743,14 +1773,14 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i successRate := float64(successful) / float64(requests) * 100 if successRate >= minimumDataStoreSuccessRate { - logtrace.Debug(ctx, "Successful store operations", logtrace.Fields{ + logtrace.Info(ctx, "Successful store operations", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "success_rate": fmt.Sprintf("%.2f%%", successRate), }) return nil } else { - logtrace.Debug(ctx, "Failed to achieve desired success rate", logtrace.Fields{ + logtrace.Info(ctx, "Failed to achieve desired success rate", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": id, "success_rate": fmt.Sprintf("%.2f%%", successRate), @@ -1777,10 +1807,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ var wg sync.WaitGroup for key, node := range nodes { - logtrace.Debug(ctx, "Node", logtrace.Fields{ - logtrace.FieldModule: "dht", - "port": node.String(), - }) + logtrace.Info(ctx, "Preparing batch store to node", logtrace.Fields{logtrace.FieldModule: "dht", "node": node.String()}) if s.ignorelist.Banned(node) { logtrace.Debug(ctx, "Ignoring banned node in batch store network call", logtrace.Fields{ logtrace.FieldModule: "dht", @@ -1810,11 +1837,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ totalBytes += len(values[idx]) } - logtrace.Debug(ctx, "Batch store to node", logtrace.Fields{ - logtrace.FieldModule: "dht", - "keys": len(toStore), - "size_before_compress": utils.BytesIntToMB(totalBytes), - }) + logtrace.Info(ctx, "RPC BatchStoreData send", logtrace.Fields{logtrace.FieldModule: "dht", "node": receiver.String(), "keys": len(toStore), "size_mb": utils.BytesIntToMB(totalBytes)}) // Skip empty payloads: avoid sending empty store RPCs and do not record no-op metrics. if len(toStore) == 0 { @@ -1835,15 +1858,12 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ s.metrics.IncHotPathBanIncr() } - logtrace.Debug(ctx, "Network call batch store request failed", logtrace.Fields{ - logtrace.FieldModule: "p2p", - logtrace.FieldError: err.Error(), - "request": request.String(), - }) + logtrace.Info(ctx, "RPC BatchStoreData failed", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "node": receiver.String(), "ms": dur}) responses <- &MessageWithError{Error: err, Message: response, KeysCount: len(toStore), Receiver: receiver, DurationMS: dur} return } + logtrace.Info(ctx, "RPC BatchStoreData completed", logtrace.Fields{logtrace.FieldModule: "p2p", "node": receiver.String(), "keys": len(toStore), "ms": dur}) responses <- &MessageWithError{Message: response, KeysCount: len(toStore), Receiver: receiver, DurationMS: dur} } }(node, key) @@ -1856,7 +1876,7 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[ } func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[string]*Node, contacted map[string]bool, txid string) (chan *MessageWithError, bool) { - logtrace.Debug(ctx, "Batch find node begin", logtrace.Fields{ + logtrace.Info(ctx, "Batch find node begin", logtrace.Fields{ logtrace.FieldModule: "dht", "task_id": txid, "nodes_count": len(nodes), @@ -1927,7 +1947,7 @@ func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[str } wg.Wait() close(responses) - logtrace.Debug(ctx, "Batch find node done", logtrace.Fields{ + logtrace.Info(ctx, "Batch find node done", logtrace.Fields{ logtrace.FieldModule: "dht", "nodes_count": len(nodes), "len_resp": len(responses), diff --git a/p2p/kademlia/fetch_and_store.go b/p2p/kademlia/fetch_and_store.go index d7bc0f28..8f954364 100644 --- a/p2p/kademlia/fetch_and_store.go +++ b/p2p/kademlia/fetch_and_store.go @@ -275,6 +275,8 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey, // GetBatchValuesFromNode get values from node in bateches func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node) (bool, map[string][]byte, []string, error) { logtrace.Debug(ctx, "sending batch fetch request", logtrace.Fields{"node-ip": n.IP, "keys": len(keys)}) + // Minimal per-RPC visibility for background replication path + logtrace.Info(ctx, "RPC BatchFindValues send", logtrace.Fields{"node": n.String(), "keys": len(keys)}) messageType := BatchFindValues @@ -349,6 +351,7 @@ func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node } logtrace.Debug(ctx, "batch fetch response rcvd and keys verified", logtrace.Fields{"node-ip": n.IP, "received-keys": len(decompressedMap), "verified-keys": len(retMap), "failed-keys": len(failedKeys)}) + logtrace.Info(ctx, "RPC BatchFindValues completed", logtrace.Fields{"node": n.String(), "received_keys": len(decompressedMap), "verified_keys": len(retMap)}) return v.Done, retMap, failedKeys, nil } diff --git a/p2p/kademlia/network.go b/p2p/kademlia/network.go index c887eab1..3bca8f20 100644 --- a/p2p/kademlia/network.go +++ b/p2p/kademlia/network.go @@ -936,7 +936,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error()) } - logtrace.Debug(ctx, "Batch get values request received", logtrace.Fields{ + logtrace.Info(ctx, "Batch get values request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "from": message.Sender.String(), }) @@ -966,7 +966,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error()) } - logtrace.Debug(ctx, "Batch get values request processed", logtrace.Fields{ + logtrace.Info(ctx, "Batch get values request processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "requested-keys": len(keys), "found": count, @@ -1006,7 +1006,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message, func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFindValuesRequest, ip string, reqID string) (isDone bool, compressedData []byte, err error) { // log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("keys", len(req.Keys)).WithField("from-ip", ip).Info("batch find values request received") - logtrace.Debug(ctx, "Batch find values request received", logtrace.Fields{ + logtrace.Info(ctx, "Batch find values request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "from": ip, "keys": len(req.Keys), @@ -1015,7 +1015,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi if len(req.Keys) > 0 { // log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("keys[0]", req.Keys[0]).WithField("keys[len]", req.Keys[len(req.Keys)-1]). // WithField("from-ip", ip).Debug("first & last batch keys") - logtrace.Debug(ctx, "First & last batch keys", logtrace.Fields{ + logtrace.Info(ctx, "First & last batch keys", logtrace.Fields{ logtrace.FieldModule: "p2p", "p2p-req-id": reqID, "keys[0]": req.Keys[0], @@ -1208,7 +1208,7 @@ func (s *Network) handleBatchStoreData(ctx context.Context, message *Message) (r } // log.P2P().WithContext(ctx).Info("handle batch store data request received") - logtrace.Debug(ctx, "Handle batch store data request received", logtrace.Fields{ + logtrace.Info(ctx, "Handle batch store data request received", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), "keys": len(request.Data), @@ -1238,7 +1238,7 @@ func (s *Network) handleBatchStoreData(ctx context.Context, message *Message) (r }, } // log.P2P().WithContext(ctx).Info("handle batch store data request processed") - logtrace.Debug(ctx, "Handle batch store data request processed", logtrace.Fields{ + logtrace.Info(ctx, "Handle batch store data request processed", logtrace.Fields{ logtrace.FieldModule: "p2p", "sender": message.Sender.String(), "keys": len(request.Data), diff --git a/p2p/kademlia/rq_symbols.go b/p2p/kademlia/rq_symbols.go index c8ad2000..819d0944 100644 --- a/p2p/kademlia/rq_symbols.go +++ b/p2p/kademlia/rq_symbols.go @@ -16,16 +16,20 @@ const ( ) func (s *DHT) startStoreSymbolsWorker(ctx context.Context) { - logtrace.Debug(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) + // Minimal visibility for lifecycle + each tick + logtrace.Info(ctx, "rq_symbols worker started", logtrace.Fields{logtrace.FieldModule: "p2p"}) for { select { case <-time.After(defaultSoreSymbolsInterval): + tickStart := time.Now() + logtrace.Info(ctx, "rq_symbols: tick", logtrace.Fields{"interval": defaultSoreSymbolsInterval.String()}) if err := s.storeSymbols(ctx); err != nil { logtrace.Error(ctx, "store symbols", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err}) } + logtrace.Info(ctx, "rq_symbols: tick complete", logtrace.Fields{"ms": time.Since(tickStart).Milliseconds()}) case <-ctx.Done(): - logtrace.Error(ctx, "closing store symbols worker", logtrace.Fields{logtrace.FieldModule: "p2p"}) + logtrace.Info(ctx, "rq_symbols worker stopping", logtrace.Fields{logtrace.FieldModule: "p2p"}) return } } @@ -37,13 +41,26 @@ func (s *DHT) storeSymbols(ctx context.Context) error { return fmt.Errorf("get to do store symbol dirs: %w", err) } + // Minimal visibility: how many dirs to process this tick + logtrace.Info(ctx, "rq_symbols: todo directories", logtrace.Fields{"count": len(dirs)}) + for _, dir := range dirs { - logtrace.Debug(ctx, "rq_symbols worker: start scanning dir & storing raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) + // Pre-count symbols in this directory + preCount := -1 + if set, rerr := utils.ReadDirFilenames(dir.Dir); rerr == nil { + preCount = len(set) + } + start := time.Now() + logtrace.Info(ctx, "rq_symbols: processing dir", logtrace.Fields{"dir": dir.Dir, "txid": dir.TXID, "symbols": preCount}) if err := s.scanDirAndStoreSymbols(ctx, dir.Dir, dir.TXID); err != nil { logtrace.Error(ctx, "scan and store symbols", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err}) } - - logtrace.Debug(ctx, "rq_symbols worker: scanned dir & stored raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID}) + // Post-count remaining symbols + remCount := -1 + if set, rerr := utils.ReadDirFilenames(dir.Dir); rerr == nil { + remCount = len(set) + } + logtrace.Info(ctx, "rq_symbols: processed dir", logtrace.Fields{"dir": dir.Dir, "txid": dir.TXID, "remaining": remCount, "ms": time.Since(start).Milliseconds()}) } return nil diff --git a/supernode/services/cascade/adaptors/p2p.go b/supernode/services/cascade/adaptors/p2p.go index 3621167e..944b9b50 100644 --- a/supernode/services/cascade/adaptors/p2p.go +++ b/supernode/services/cascade/adaptors/p2p.go @@ -57,7 +57,7 @@ type StoreArtefactsRequest struct { } func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, f logtrace.Fields) error { - logtrace.Debug(ctx, "About to store artefacts (metadata + symbols)", logtrace.Fields{"taskID": req.TaskID, "id_files": len(req.IDFiles)}) + logtrace.Info(ctx, "StoreArtefacts start", logtrace.Fields{"taskID": req.TaskID, "actionID": req.ActionID, "id_files": len(req.IDFiles), "symbols_dir": req.SymbolsDir}) // Optionally enable per-node store RPC capture for this task if !p.metricsDisabled { @@ -71,7 +71,17 @@ func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, return errors.Wrap(err, "error storing artefacts") } dur := time.Since(start).Milliseconds() - logtrace.Debug(ctx, "artefacts have been stored", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total": totalSymbols, "id_files_count": len(req.IDFiles)}) + // After first-pass, log how many symbols remain on disk + remaining := 0 + if req.SymbolsDir != "" { + if keys, werr := walkSymbolTree(req.SymbolsDir); werr == nil { + remaining = len(keys) + } + } + logtrace.Info(ctx, "StoreArtefacts completed", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total_available": totalSymbols, "id_files_count": len(req.IDFiles), "symbols_left_on_disk": remaining, "ms": dur}) + if remaining == 0 { + logtrace.Info(ctx, "Symbols directory is empty after first-pass", logtrace.Fields{"taskID": req.TaskID, "dir": req.SymbolsDir}) + } // Record store summary for later event emission cm.SetStoreSummary(req.TaskID, firstPassSymbols, totalSymbols, len(req.IDFiles), dur) return nil @@ -101,7 +111,8 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action if targetCount < 1 && totalAvailable > 0 { targetCount = 1 } - logtrace.Debug(ctx, "first-pass target coverage (symbols)", logtrace.Fields{ + logtrace.Info(ctx, "Symbols discovered in directory", logtrace.Fields{"total_symbols": totalAvailable, "dir": symbolsDir}) + logtrace.Info(ctx, "first-pass target coverage (symbols)", logtrace.Fields{ "total_symbols": totalAvailable, "target_percent": storeSymbolsPercent, "target_count": targetCount, @@ -116,8 +127,8 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action } sort.Strings(keys) // deterministic order inside the sample } - - logtrace.Debug(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) + logtrace.Info(ctx, "first-pass selected symbols", logtrace.Fields{"selected": len(keys), "of_total": totalAvailable, "dir": symbolsDir}) + logtrace.Info(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)}) /* stream in fixed-size batches -------------------------------------- */ @@ -153,6 +164,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action payload = append(payload, symBytes...) // Send as the same data type you use for symbols + logtrace.Info(ctx, "RPC StoreBatch (first-batch): metadata + symbols", logtrace.Fields{"taskID": taskID, "metadata_count": len(metadataFiles), "symbols_in_batch": len(symBytes), "payload_total": len(payload)}) bctx, cancel := context.WithTimeout(ctx, storeBatchContextTimeout) bctx = cm.WithTaskID(bctx, taskID) err = p.p2p.StoreBatch(bctx, payload, storage.P2PDataRaptorQSymbol, taskID) @@ -160,6 +172,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action if err != nil { return totalSymbols, totalAvailable, fmt.Errorf("p2p store batch (first): %w", err) } + logtrace.Info(ctx, "RPC StoreBatch completed (first-batch)", logtrace.Fields{"taskID": taskID, "symbols_stored": len(symBytes)}) totalSymbols += len(symBytes) // No per-RPC metrics propagated from p2p @@ -170,6 +183,14 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action return totalSymbols, totalAvailable, fmt.Errorf("delete symbols: %w", err) } } + // Log remaining symbols in directory after deletion + if rem, werr := walkSymbolTree(symbolsDir); werr == nil { + if left := len(rem); left > 0 { + logtrace.Info(ctx, "symbols left after first-batch", logtrace.Fields{"taskID": taskID, "left": left}) + } else { + logtrace.Info(ctx, "Symbols directory is empty after first-batch", logtrace.Fields{"taskID": taskID, "dir": symbolsDir}) + } + } firstBatchProcessed = true } else { @@ -188,12 +209,20 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action if totalAvailable > 0 { achievedPct = (float64(totalSymbols) / float64(totalAvailable)) * 100.0 } - logtrace.Debug(ctx, "first-pass achieved coverage (symbols)", + logtrace.Info(ctx, "first-pass achieved coverage (symbols)", logtrace.Fields{"achieved_symbols": totalSymbols, "achieved_percent": achievedPct}) if err := p.rqStore.UpdateIsFirstBatchStored(actionID); err != nil { return totalSymbols, totalAvailable, fmt.Errorf("update first-batch flag: %w", err) } + // Final remaining count after first pass flagged + if rem, werr := walkSymbolTree(symbolsDir); werr == nil { + if left := len(rem); left > 0 { + logtrace.Info(ctx, "first-pass completed; symbols remaining on disk", logtrace.Fields{"taskID": taskID, "left": left, "dir": symbolsDir}) + } else { + logtrace.Info(ctx, "first-pass completed; directory empty", logtrace.Fields{"taskID": taskID, "dir": symbolsDir}) + } + } return totalSymbols, totalAvailable, nil @@ -228,7 +257,7 @@ func walkSymbolTree(root string) ([]string, error) { // storeSymbolsInP2P loads a batch of symbols and stores them via P2P. // Returns (ratePct, requests, count, error) where `count` is the number of symbols in this batch. func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fileKeys []string) (int, error) { - logtrace.Debug(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)}) + logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"taskID": taskID, "count": len(fileKeys)}) symbols, err := utils.LoadSymbols(root, fileKeys) if err != nil { @@ -239,15 +268,21 @@ func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fi symCtx = cm.WithTaskID(symCtx, taskID) defer cancel() + logtrace.Info(ctx, "RPC StoreBatch (symbols batch)", logtrace.Fields{"taskID": taskID, "symbols_in_batch": len(symbols)}) if err := c.p2p.StoreBatch(symCtx, symbols, storage.P2PDataRaptorQSymbol, taskID); err != nil { return len(symbols), fmt.Errorf("p2p store batch: %w", err) } - logtrace.Debug(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)}) + logtrace.Info(ctx, "RPC StoreBatch completed (symbols batch)", logtrace.Fields{"taskID": taskID, "symbols_stored": len(symbols)}) if err := utils.DeleteSymbols(ctx, root, fileKeys); err != nil { return len(symbols), fmt.Errorf("delete symbols: %w", err) } - logtrace.Debug(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)}) + // After deletion, log remaining count in directory + left := -1 + if rem, werr := walkSymbolTree(root); werr == nil { + left = len(rem) + } + logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"taskID": taskID, "count": len(symbols), "symbols_left_on_disk": left}) // No per-RPC metrics propagated from p2p return len(symbols), nil diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go index 8271fc51..e0e77a6b 100644 --- a/supernode/services/cascade/download.go +++ b/supernode/services/cascade/download.go @@ -39,7 +39,7 @@ func (task *CascadeRegistrationTask) Download( send func(resp *DownloadResponse) error, ) (err error) { fields := logtrace.Fields{logtrace.FieldMethod: "Download", logtrace.FieldRequest: req} - logtrace.Debug(ctx, "Cascade download request received", fields) + logtrace.Info(ctx, "Cascade download request received", fields) // Ensure task status is finalized regardless of outcome defer func() { @@ -56,7 +56,7 @@ func (task *CascadeRegistrationTask) Download( fields[logtrace.FieldError] = err return task.wrapErr(ctx, "failed to get action", err, fields) } - logtrace.Debug(ctx, "Action retrieved", fields) + logtrace.Info(ctx, "Action retrieved", fields) task.streamDownloadEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", "", send) if actionDetails.GetAction().State != actiontypes.ActionStateDone { @@ -65,25 +65,26 @@ func (task *CascadeRegistrationTask) Download( fields[logtrace.FieldActionState] = actionDetails.GetAction().State return task.wrapErr(ctx, "action not found", err, fields) } - logtrace.Debug(ctx, "Action state validated", fields) + logtrace.Info(ctx, "Action state validated", fields) metadata, err := task.decodeCascadeMetadata(ctx, actionDetails.GetAction().Metadata, fields) if err != nil { fields[logtrace.FieldError] = err.Error() return task.wrapErr(ctx, "error decoding cascade metadata", err, fields) } - logtrace.Debug(ctx, "Cascade metadata decoded", fields) + logtrace.Info(ctx, "Cascade metadata decoded", fields) task.streamDownloadEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", "", send) // Notify: network retrieval phase begins task.streamDownloadEvent(SupernodeEventTypeNetworkRetrieveStarted, "Network retrieval started", "", "", send) + logtrace.Info(ctx, "Starting network retrieval of artefacts", logtrace.Fields{logtrace.FieldActionID: actionDetails.GetAction().ActionID}) filePath, tmpDir, err := task.downloadArtifacts(ctx, actionDetails.GetAction().ActionID, metadata, fields, send) if err != nil { fields[logtrace.FieldError] = err.Error() return task.wrapErr(ctx, "failed to download artifacts", err, fields) } - logtrace.Debug(ctx, "File reconstructed and hash verified", fields) + logtrace.Info(ctx, "File reconstructed and hash verified", fields) // Notify: decode completed, file ready on disk task.streamDownloadEvent(SupernodeEventTypeDecodeCompleted, "Decode completed", filePath, tmpDir, send) @@ -91,7 +92,7 @@ func (task *CascadeRegistrationTask) Download( } func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, actionID string, metadata actiontypes.CascadeMetadata, fields logtrace.Fields, send func(resp *DownloadResponse) error) (string, string, error) { - logtrace.Debug(ctx, "started downloading the artifacts", fields) + logtrace.Info(ctx, "started downloading the artifacts", fields) var ( layout codec.Layout @@ -101,15 +102,19 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti ) for _, indexID := range metadata.RqIdsIds { + iStart := time.Now() + logtrace.Info(ctx, "RPC Retrieve index file", logtrace.Fields{"index_id": indexID}) indexFile, err := task.P2PClient.Retrieve(ctx, indexID) if err != nil || len(indexFile) == 0 { + logtrace.Warn(ctx, "Retrieve index file failed or empty", logtrace.Fields{"index_id": indexID, logtrace.FieldError: fmt.Sprintf("%v", err)}) continue } + logtrace.Info(ctx, "Retrieve index file completed", logtrace.Fields{"index_id": indexID, "bytes": len(indexFile), "ms": time.Since(iStart).Milliseconds()}) // Parse index file to get layout IDs indexData, err := task.parseIndexFile(indexFile) if err != nil { - logtrace.Debug(ctx, "failed to parse index file", fields) + logtrace.Warn(ctx, "failed to parse index file", logtrace.Fields{"index_id": indexID, logtrace.FieldError: err.Error()}) continue } @@ -117,14 +122,14 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti var netMS, decMS int64 layout, netMS, decMS, layoutAttempts, err = task.retrieveLayoutFromIndex(ctx, indexData, fields) if err != nil { - logtrace.Debug(ctx, "failed to retrieve layout from index", fields) + logtrace.Warn(ctx, "failed to retrieve layout from index", logtrace.Fields{"index_id": indexID, logtrace.FieldError: err.Error(), "attempts": layoutAttempts}) continue } layoutFetchMS = netMS layoutDecodeMS = decMS if len(layout.Blocks) > 0 { - logtrace.Debug(ctx, "layout file retrieved via index", fields) + logtrace.Info(ctx, "layout file retrieved via index", logtrace.Fields{"index_id": indexID, "attempts": layoutAttempts, "net_ms": layoutFetchMS, "decode_ms": layoutDecodeMS}) break } } @@ -163,7 +168,7 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( if targetRequiredCount < 1 && totalSymbols > 0 { targetRequiredCount = 1 } - logtrace.Debug(ctx, "Retrieving target-required symbols for decode", fields) + logtrace.Info(ctx, "Retrieving target-required symbols for decode", logtrace.Fields{"total_symbols": totalSymbols, "target_required_percent": targetRequiredPercent, "target_required_count": targetRequiredCount}) if !task.config.MetricsDisabled { cm.StartRetrieveCapture(actionID) @@ -180,6 +185,8 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( if reqCount > totalSymbols { reqCount = totalSymbols } + rStart := time.Now() + logtrace.Info(ctx, "RPC BatchRetrieve symbols", logtrace.Fields{"action_id": actionID, "requested": reqCount, "total_candidates": totalSymbols}) symbols, err := task.P2PClient.BatchRetrieve(ctxRetrieve, allSymbols, reqCount, actionID) if err != nil { fields[logtrace.FieldError] = err.Error() @@ -187,9 +194,12 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( return "", "", fmt.Errorf("batch retrieve symbols: %w", err) } retrieveMS := time.Since(retrieveStart).Milliseconds() + logtrace.Info(ctx, "RPC BatchRetrieve completed", logtrace.Fields{"action_id": actionID, "received": len(symbols), "ms": time.Since(rStart).Milliseconds()}) // Measure decode duration decodeStart := time.Now() + dStart := time.Now() + logtrace.Info(ctx, "RQ Decode start", logtrace.Fields{"action_id": actionID}) decodeInfo, err := task.RQ.Decode(ctx, adaptors.DecodeRequest{ ActionID: actionID, Symbols: symbols, @@ -201,6 +211,7 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( return "", "", fmt.Errorf("decode symbols using RaptorQ: %w", err) } decodeMS := time.Since(decodeStart).Milliseconds() + logtrace.Info(ctx, "RQ Decode completed", logtrace.Fields{"action_id": actionID, "ms": time.Since(dStart).Milliseconds(), "tmp_dir": decodeInfo.DecodeTmpDir, "file_path": decodeInfo.FilePath}) // Set minimal retrieve summary and emit event strictly from internal collector if !task.config.MetricsDisabled { @@ -237,7 +248,17 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( fields[logtrace.FieldError] = err.Error() return "", decodeInfo.DecodeTmpDir, err } - logtrace.Debug(ctx, "File successfully restored and hash verified", fields) + // Log the state of the temporary decode directory + if decodeInfo.DecodeTmpDir != "" { + if set, derr := utils.ReadDirFilenames(decodeInfo.DecodeTmpDir); derr == nil { + if left := len(set); left > 0 { + logtrace.Info(ctx, "Decode tmp directory has files remaining", logtrace.Fields{"dir": decodeInfo.DecodeTmpDir, "left": left}) + } else { + logtrace.Info(ctx, "Decode tmp directory is empty", logtrace.Fields{"dir": decodeInfo.DecodeTmpDir}) + } + } + } + logtrace.Info(ctx, "File successfully restored and hash verified", fields) return decodeInfo.FilePath, decodeInfo.DecodeTmpDir, nil } @@ -279,20 +300,26 @@ func (task *CascadeRegistrationTask) retrieveLayoutFromIndex(ctx context.Context for _, layoutID := range indexData.LayoutIDs { attempts++ t0 := time.Now() + logtrace.Info(ctx, "RPC Retrieve layout file", logtrace.Fields{"layout_id": layoutID, "attempt": attempts}) layoutFile, err := task.P2PClient.Retrieve(ctx, layoutID) - totalFetchMS += time.Since(t0).Milliseconds() + took := time.Since(t0).Milliseconds() + totalFetchMS += took if err != nil || len(layoutFile) == 0 { + logtrace.Warn(ctx, "Retrieve layout file failed or empty", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "ms": took, logtrace.FieldError: fmt.Sprintf("%v", err)}) continue } t1 := time.Now() layout, _, _, err := parseRQMetadataFile(layoutFile) - totalDecodeMS += time.Since(t1).Milliseconds() + decMS := time.Since(t1).Milliseconds() + totalDecodeMS += decMS if err != nil { + logtrace.Warn(ctx, "Parse layout file failed", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "decode_ms": decMS, logtrace.FieldError: err.Error()}) continue } if len(layout.Blocks) > 0 { + logtrace.Info(ctx, "Layout file retrieved and parsed", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "net_ms": took, "decode_ms": decMS}) return layout, totalFetchMS, totalDecodeMS, attempts, nil } } @@ -306,9 +333,12 @@ func (task *CascadeRegistrationTask) CleanupDownload(ctx context.Context, action } // For now, we use actionID as the directory path to maintain compatibility + logtrace.Info(ctx, "Cleanup download directory", logtrace.Fields{"dir": actionID}) if err := os.RemoveAll(actionID); err != nil { + logtrace.Warn(ctx, "Cleanup download directory failed", logtrace.Fields{"dir": actionID, logtrace.FieldError: err.Error()}) return errors.Errorf("failed to delete download directory: %s, :%s", actionID, err.Error()) } + logtrace.Info(ctx, "Cleanup download directory completed", logtrace.Fields{"dir": actionID}) return nil } diff --git a/supernode/services/cascade/helper.go b/supernode/services/cascade/helper.go index b22ec14a..e6197b41 100644 --- a/supernode/services/cascade/helper.go +++ b/supernode/services/cascade/helper.go @@ -36,7 +36,7 @@ func (task *CascadeRegistrationTask) fetchAction(ctx context.Context, actionID s if res.GetAction().ActionID == "" { return nil, task.wrapErr(ctx, "action not found", errors.New(""), f) } - logtrace.Debug(ctx, "action has been retrieved", f) + logtrace.Info(ctx, "action has been retrieved", f) return res.GetAction(), nil } @@ -46,7 +46,7 @@ func (task *CascadeRegistrationTask) ensureIsTopSupernode(ctx context.Context, b if err != nil { return task.wrapErr(ctx, "failed to get top SNs", err, f) } - logtrace.Debug(ctx, "Fetched Top Supernodes", f) + logtrace.Info(ctx, "Fetched Top Supernodes", f) if !supernode.Exists(top.Supernodes, task.config.SupernodeAccountAddress) { // Build information about supernodes for better error context @@ -54,7 +54,7 @@ func (task *CascadeRegistrationTask) ensureIsTopSupernode(ctx context.Context, b for i, sn := range top.Supernodes { addresses[i] = sn.SupernodeAccount } - logtrace.Debug(ctx, "Supernode not in top list", logtrace.Fields{ + logtrace.Info(ctx, "Supernode not in top list", logtrace.Fields{ "currentAddress": task.config.SupernodeAccountAddress, "topSupernodes": addresses, }) @@ -78,7 +78,7 @@ func (task *CascadeRegistrationTask) verifyDataHash(ctx context.Context, dh []by if string(b64) != expected { return task.wrapErr(ctx, "data hash doesn't match", errors.New(""), f) } - logtrace.Debug(ctx, "request data-hash has been matched with the action data-hash", f) + logtrace.Info(ctx, "request data-hash has been matched with the action data-hash", f) return nil } @@ -110,7 +110,7 @@ func (task *CascadeRegistrationTask) verifySignatureAndDecodeLayout(ctx context. if err := task.LumeraClient.Verify(ctx, creator, []byte(indexFileB64), creatorSigBytes); err != nil { return codec.Layout{}, "", task.wrapErr(ctx, "failed to verify creator signature", err, f) } - logtrace.Debug(ctx, "creator signature successfully verified", f) + logtrace.Info(ctx, "creator signature successfully verified", f) // Decode index file to get the layout signature indexFile, err := decodeIndexFile(indexFileB64) @@ -132,7 +132,7 @@ func (task *CascadeRegistrationTask) verifySignatureAndDecodeLayout(ctx context. if err := task.LumeraClient.Verify(ctx, creator, layoutB64, layoutSigBytes); err != nil { return codec.Layout{}, "", task.wrapErr(ctx, "failed to verify layout signature", err, f) } - logtrace.Debug(ctx, "layout signature successfully verified", f) + logtrace.Info(ctx, "layout signature successfully verified", f) return encodedMeta, indexFile.LayoutSignature, nil } @@ -175,6 +175,20 @@ func (task *CascadeRegistrationTask) generateRQIDFiles(ctx context.Context, meta // storeArtefacts persists cascade artefacts (ID files + RaptorQ symbols) via the // P2P adaptor. P2P does not return metrics; cascade summarizes and emits them. func (task *CascadeRegistrationTask) storeArtefacts(ctx context.Context, actionID string, idFiles [][]byte, symbolsDir string, f logtrace.Fields) error { + if f == nil { + f = logtrace.Fields{} + } + lf := logtrace.Fields{ + logtrace.FieldActionID: actionID, + logtrace.FieldTaskID: task.ID(), + "id_files_count": len(idFiles), + "symbols_dir": symbolsDir, + } + for k, v := range f { + lf[k] = v + } + logtrace.Info(ctx, "storeArtefacts invoked", lf) + return task.P2P.StoreArtefacts(ctx, adaptors.StoreArtefactsRequest{ IDFiles: idFiles, SymbolsDir: symbolsDir, @@ -214,7 +228,7 @@ func (task *CascadeRegistrationTask) emitArtefactsStored( b, _ := json.MarshalIndent(payload, "", " ") msg := string(b) fields["metrics_json"] = msg - logtrace.Debug(ctx, "artefacts have been stored", fields) + logtrace.Info(ctx, "artefacts have been stored", fields) task.streamEvent(SupernodeEventTypeArtefactsStored, msg, "", send) // No central state to clear; adaptor returns calls inline } @@ -279,7 +293,7 @@ func (task *CascadeRegistrationTask) verifyActionFee(ctx context.Context, action requiredFee := sdk.NewCoin("ulume", math.NewInt(amount)) // Log the calculated fee - logtrace.Debug(ctx, "calculated required fee", logtrace.Fields{ + logtrace.Info(ctx, "calculated required fee", logtrace.Fields{ "fee": requiredFee.String(), "dataBytes": dataSize, }) @@ -377,6 +391,6 @@ func (task *CascadeRegistrationTask) VerifyDownloadSignature(ctx context.Context return task.wrapErr(ctx, "failed to verify download signature", err, fields) } - logtrace.Debug(ctx, "download signature successfully verified", fields) + logtrace.Info(ctx, "download signature successfully verified", fields) return nil } diff --git a/supernode/services/cascade/register.go b/supernode/services/cascade/register.go index 4b456734..8d6cfd07 100644 --- a/supernode/services/cascade/register.go +++ b/supernode/services/cascade/register.go @@ -46,7 +46,7 @@ func (task *CascadeRegistrationTask) Register( ) (err error) { fields := logtrace.Fields{logtrace.FieldMethod: "Register", logtrace.FieldRequest: req} - logtrace.Debug(ctx, "Cascade registration request received", fields) + logtrace.Info(ctx, "Cascade registration request received", fields) // Ensure task status and resources are finalized regardless of outcome defer func() { @@ -78,14 +78,14 @@ func (task *CascadeRegistrationTask) Register( fields[logtrace.FieldCreator] = action.Creator fields[logtrace.FieldStatus] = action.State fields[logtrace.FieldPrice] = action.Price - logtrace.Debug(ctx, "Action retrieved", fields) + logtrace.Info(ctx, "Action retrieved", fields) task.streamEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", send) /* 2. Verify action fee -------------------------------------------------------- */ if err := task.verifyActionFee(ctx, action, req.DataSize, fields); err != nil { return err } - logtrace.Debug(ctx, "Action fee verified", fields) + logtrace.Info(ctx, "Action fee verified", fields) task.streamEvent(SupernodeEventTypeActionFeeVerified, "Action fee verified", "", send) /* 3. Ensure this super-node is eligible -------------------------------------- */ @@ -93,7 +93,7 @@ func (task *CascadeRegistrationTask) Register( if err := task.ensureIsTopSupernode(ctx, uint64(action.BlockHeight), fields); err != nil { return err } - logtrace.Debug(ctx, "Top supernode eligibility confirmed", fields) + logtrace.Info(ctx, "Top supernode eligibility confirmed", fields) task.streamEvent(SupernodeEventTypeTopSupernodeCheckPassed, "Top supernode eligibility confirmed", "", send) /* 4. Decode cascade metadata -------------------------------------------------- */ @@ -101,14 +101,14 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Debug(ctx, "Cascade metadata decoded", fields) + logtrace.Info(ctx, "Cascade metadata decoded", fields) task.streamEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", send) /* 5. Verify data hash --------------------------------------------------------- */ if err := task.verifyDataHash(ctx, req.DataHash, cascadeMeta.DataHash, fields); err != nil { return err } - logtrace.Debug(ctx, "Data hash verified", fields) + logtrace.Info(ctx, "Data hash verified", fields) task.streamEvent(SupernodeEventTypeDataHashVerified, "Data hash verified", "", send) /* 6. Encode the raw data ------------------------------------------------------ */ @@ -116,7 +116,7 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Debug(ctx, "Input encoded", fields) + logtrace.Info(ctx, "Input encoded", fields) task.streamEvent(SupernodeEventTypeInputEncoded, "Input encoded", "", send) /* 7. Signature verification + layout decode ---------------------------------- */ @@ -126,7 +126,7 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Debug(ctx, "Signature verified", fields) + logtrace.Info(ctx, "Signature verified", fields) task.streamEvent(SupernodeEventTypeSignatureVerified, "Signature verified", "", send) /* 8. Generate RQ-ID files ----------------------------------------------------- */ @@ -134,14 +134,14 @@ func (task *CascadeRegistrationTask) Register( if err != nil { return err } - logtrace.Debug(ctx, "RQID files generated", fields) + logtrace.Info(ctx, "RQID files generated", fields) task.streamEvent(SupernodeEventTypeRQIDsGenerated, "RQID files generated", "", send) /* 9. Consistency checks ------------------------------------------------------- */ if err := verifyIDs(layout, encResp.Metadata); err != nil { return task.wrapErr(ctx, "failed to verify IDs", err, fields) } - logtrace.Debug(ctx, "RQIDs verified", fields) + logtrace.Info(ctx, "RQIDs verified", fields) task.streamEvent(SupernodeEventTypeRqIDsVerified, "RQIDs verified", "", send) /* 10. Simulate finalize to avoid storing artefacts if it would fail ---------- */ @@ -152,7 +152,7 @@ func (task *CascadeRegistrationTask) Register( task.streamEvent(SupernodeEventTypeFinalizeSimulationFailed, "Finalize simulation failed", "", send) return task.wrapErr(ctx, "finalize action simulation failed", err, fields) } - logtrace.Debug(ctx, "Finalize simulation passed", fields) + logtrace.Info(ctx, "Finalize simulation passed", fields) // Transmit as a standard event so SDK can propagate it (dedicated type) task.streamEvent(SupernodeEventTypeFinalizeSimulated, "Finalize simulation passed", "", send) @@ -175,7 +175,7 @@ func (task *CascadeRegistrationTask) Register( } txHash := resp.TxResponse.TxHash fields[logtrace.FieldTxHash] = txHash - logtrace.Debug(ctx, "Action finalized", fields) + logtrace.Info(ctx, "Action finalized", fields) task.streamEvent(SupernodeEventTypeActionFinalized, "Action finalized", txHash, send) return nil From 83462830086c75775e80d523f1f3b1411ca78e7e Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 30 Sep 2025 08:58:25 +0500 Subject: [PATCH 13/15] Add datadog logs --- .github/workflows/build&release.yml | 7 +- Makefile | 4 +- pkg/logtrace/datadog.go | 204 ++++++++++++++++++++++++++++ pkg/logtrace/log.go | 6 + supernode/cmd/start.go | 8 ++ 5 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 pkg/logtrace/datadog.go diff --git a/.github/workflows/build&release.yml b/.github/workflows/build&release.yml index ead3e013..3dbf21bf 100644 --- a/.github/workflows/build&release.yml +++ b/.github/workflows/build&release.yml @@ -82,6 +82,9 @@ jobs: echo "binary_name=supernode-linux-amd64" >> $GITHUB_OUTPUT - name: Build Release Version + env: + DD_API_KEY: ${{ secrets.DD_API_KEY }} + DD_SITE: ${{ secrets.DD_SITE }} run: | mkdir -p release @@ -94,7 +97,9 @@ jobs: -ldflags="-s -w \ -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.Version=${{ steps.vars.outputs.version }} \ -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.GitCommit=${{ steps.vars.outputs.git_commit }} \ - -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=${{ steps.vars.outputs.build_time }}" \ + -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=${{ steps.vars.outputs.build_time }} \ + -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=${DD_API_KEY} \ + -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=${DD_SITE}" \ -o release/supernode \ ./supernode diff --git a/Makefile b/Makefile index 01272fbf..81773d3b 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,9 @@ BUILD_TIME ?= $(shell date -u '+%Y-%m-%d_%H:%M:%S') # Linker flags for version information LDFLAGS = -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.Version=$(VERSION) \ -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.GitCommit=$(GIT_COMMIT) \ - -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=$(BUILD_TIME) + -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=$(BUILD_TIME) \ + -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=$(DD_API_KEY) \ + -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=$(DD_SITE) # Linker flags for sn-manager SN_MANAGER_LDFLAGS = -X main.Version=$(VERSION) \ diff --git a/pkg/logtrace/datadog.go b/pkg/logtrace/datadog.go new file mode 100644 index 00000000..95830836 --- /dev/null +++ b/pkg/logtrace/datadog.go @@ -0,0 +1,204 @@ +package logtrace + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/json" + "net/http" + "os" + "strings" + "sync" + "time" + + "go.uber.org/zap/zapcore" +) + +// Minimal Datadog Logs Forwarder (hard-coded config) kept separate for cleanliness. + +type ddCfg struct { + APIKey string + Site string // e.g. "datadoghq.com", "datadoghq.eu" + Service string // e.g. used as Datadog 'service'; we will set to node IP + Host string // optional; defaults to machine hostname +} + +var ( + ddOnce sync.Once + ddConfig ddCfg + ddClient = &http.Client{Timeout: 5 * time.Second} + ddQueue chan map[string]any + // Optional build-time injection via -ldflags + // -ldflags "-X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=... -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=us5.datadoghq.com" + DDAPIKey string + DDSite string +) + +// SetupDatadog initializes the Datadog forwarding once. +func SetupDatadog(service string) { + ddOnce.Do(func() { + initDatadog(service) + }) +} + +// ForwardDatadog enqueues a log line for forwarding (non-blocking). +func ForwardDatadog(level zapcore.Level, ctx context.Context, msg string, fields Fields) { + ddForward(level, ctx, msg, fields) +} + +// SetDatadogService allows setting the Datadog service (e.g., to the node IP) +func SetDatadogService(service string) { + if s := strings.TrimSpace(service); s != "" { + ddConfig.Service = s + } +} + +// SetDatadogHost sets the Datadog host field (use the supernode identity) +func SetDatadogHost(host string) { + if h := strings.TrimSpace(host); h != "" { + ddConfig.Host = h + } +} + +func initDatadog(service string) { + // Base defaults (site default chosen based on earlier validation) + ddConfig = ddCfg{Site: "us5.datadoghq.com", Service: service, Host: ""} + + // Resolve from env and build flags + apiKey := strings.TrimSpace(os.Getenv("DD_API_KEY")) + if apiKey == "" { + apiKey = strings.TrimSpace(DDAPIKey) + } + + site := strings.TrimSpace(os.Getenv("DD_SITE")) + if site == "" { + site = strings.TrimSpace(DDSite) + if site == "" { + site = ddConfig.Site + } + } + + ddConfig.APIKey = apiKey + ddConfig.Site = site + + // Only enable forwarding when a real key is present + if ddConfig.APIKey == "" { + return + } + + ddQueue = make(chan map[string]any, 256) + go ddLoop() +} + +// ddForward enqueues a single log entry for Datadog intake. +func ddForward(level zapcore.Level, ctx context.Context, msg string, fields Fields) { + if ddQueue == nil { + return + } + + // Map zap level to Datadog status + status := "info" + switch level { + case zapcore.DebugLevel: + status = "debug" + case zapcore.InfoLevel: + status = "info" + case zapcore.WarnLevel: + status = "warn" + case zapcore.ErrorLevel: + status = "error" + case zapcore.FatalLevel: + status = "critical" + } + + // Build a compact attributes map + attrs := map[string]any{} + for k, v := range fields { + attrs[k] = v + } + // Attach correlation ID if present + if cid := extractCorrelationID(ctx); cid != "unknown" { + attrs["correlation_id"] = cid + } + + entry := map[string]any{ + "message": msg, + "status": status, + "service": ddConfig.Service, + "host": ddConfig.Host, + "attributes": attrs, // avoid collisions with top-level fields + } + + select { + case ddQueue <- entry: + default: + // drop if queue is full to avoid blocking critical paths + } +} + +// ddLoop batches log entries and sends to Datadog intake. +func ddLoop() { + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + + batch := make([]map[string]any, 0, 32) + flush := func() { + if len(batch) == 0 { + return + } + // Marshal batch + buf := &bytes.Buffer{} + if err := json.NewEncoder(buf).Encode(batch); err != nil { + batch = batch[:0] + return + } + _ = ddPost(buf.Bytes()) + batch = batch[:0] + } + + for { + select { + case e, ok := <-ddQueue: + if !ok { + flush() + return + } + batch = append(batch, e) + if len(batch) >= 32 { + flush() + } + case <-ticker.C: + flush() + } + } +} + +func ddPost(payload []byte) error { + url := "https://http-intake.logs." + strings.TrimSpace(ddConfig.Site) + "/api/v2/logs" + + // gzip the JSON payload + var gzBuf bytes.Buffer + gw := gzip.NewWriter(&gzBuf) + if _, err := gw.Write(payload); err == nil { + _ = gw.Close() + } else { + _ = gw.Close() + gzBuf = *bytes.NewBuffer(payload) + } + + req, err := http.NewRequest(http.MethodPost, url, &gzBuf) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("Content-Encoding", "gzip") + req.Header.Set("DD-API-KEY", ddConfig.APIKey) + + resp, err := ddClient.Do(req) + if err != nil { + return err + } + _ = resp.Body.Close() + return nil +} diff --git a/pkg/logtrace/log.go b/pkg/logtrace/log.go index 02b8f36e..d1bcf169 100644 --- a/pkg/logtrace/log.go +++ b/pkg/logtrace/log.go @@ -45,6 +45,9 @@ func Setup(serviceName string) { if err != nil { panic(err) } + + // Initialize Datadog forwarding (minimal integration in separate file) + SetupDatadog(serviceName) } // getLogLevel returns the log level from environment variable LOG_LEVEL @@ -129,6 +132,9 @@ func logWithLevel(level zapcore.Level, ctx context.Context, message string, fiel case zapcore.FatalLevel: logger.Fatal(message, zapFields...) } + + // Forward to Datadog (non-blocking, best-effort) + ForwardDatadog(level, ctx, message, fields) } // Error logs an error message with structured fields diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go index 952113d3..4529aec2 100644 --- a/supernode/cmd/start.go +++ b/supernode/cmd/start.go @@ -78,6 +78,14 @@ The supernode will connect to the Lumera network and begin participating in the logtrace.Debug(ctx, "Configuration verification successful", logtrace.Fields{}) + // Set Datadog host to identity and service to latest IP address from chain + logtrace.SetDatadogHost(appConfig.SupernodeConfig.Identity) + if snInfo, err := lumeraClient.SuperNode().GetSupernodeWithLatestAddress(ctx, appConfig.SupernodeConfig.Identity); err == nil && snInfo != nil { + if ip := strings.TrimSpace(snInfo.LatestAddress); ip != "" { + logtrace.SetDatadogService(ip) + } + } + // Initialize RaptorQ store for Cascade processing rqStore, err := initRQStore(ctx, appConfig) if err != nil { From 0e35cf63852d7fc64d39396dc8dcf1024e859ba6 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 30 Sep 2025 12:48:29 +0500 Subject: [PATCH 14/15] TxResponse --- pkg/lumera/modules/tx/impl.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/lumera/modules/tx/impl.go b/pkg/lumera/modules/tx/impl.go index bcdb694d..2f80ac58 100644 --- a/pkg/lumera/modules/tx/impl.go +++ b/pkg/lumera/modules/tx/impl.go @@ -310,7 +310,9 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou } if len(result.TxResponse.Events) == 0 { - logtrace.Error(ctx, "Failed to retrieve transaction events after 5 attempts", nil) + logtrace.Error(ctx, "Failed to retrieve transaction events after 5 attempts", logtrace.Fields{ + "response": result.TxResponse.String, + }) } return result, nil From 61dcec8028a9b36f307541cccc5237f64131d071 Mon Sep 17 00:00:00 2001 From: Matee Ullah Malik Date: Tue, 30 Sep 2025 16:16:44 +0500 Subject: [PATCH 15/15] skip check for public field --- go.mod | 4 +- go.sum | 4 +- sdk/README.md | 4 +- .../server/cascade/cascade_action_server.go | 20 +---- supernode/services/cascade/download.go | 74 +++++++++++++++---- 5 files changed, 72 insertions(+), 34 deletions(-) diff --git a/go.mod b/go.mod index 46091df2..a581736e 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ replace ( require ( cosmossdk.io/math v1.5.3 github.com/AlecAivazis/survey/v2 v2.3.7 - github.com/LumeraProtocol/lumera v1.7.0 + github.com/LumeraProtocol/lumera v1.7.2 github.com/LumeraProtocol/rq-go v0.2.1 github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce github.com/cenkalti/backoff/v4 v4.3.0 @@ -34,7 +34,6 @@ require ( github.com/pkg/errors v0.9.1 github.com/shirou/gopsutil/v3 v3.24.5 github.com/spf13/cobra v1.8.1 - github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.10.0 go.uber.org/mock v0.5.2 go.uber.org/ratelimit v0.3.1 @@ -166,6 +165,7 @@ require ( github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.7.1 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/spf13/viper v1.19.0 // indirect github.com/stretchr/objx v0.5.2 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect diff --git a/go.sum b/go.sum index d8170371..839f29a2 100644 --- a/go.sum +++ b/go.sum @@ -63,8 +63,8 @@ github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3 github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ= github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= -github.com/LumeraProtocol/lumera v1.7.0 h1:F5zgRBnCtgGfdMB6jz01PFWIzbS8VjQfCu1H9OYt3BU= -github.com/LumeraProtocol/lumera v1.7.0/go.mod h1:c1M+sjewuCvxw+pznwlspUzenDJI8Y+suKB3RFKS2Wo= +github.com/LumeraProtocol/lumera v1.7.2 h1:qA0qwEOfCqW6yY232/MEK6gfLYq4HVYSmbcOCOZqEoc= +github.com/LumeraProtocol/lumera v1.7.2/go.mod h1:c1M+sjewuCvxw+pznwlspUzenDJI8Y+suKB3RFKS2Wo= github.com/LumeraProtocol/rq-go v0.2.1 h1:8B3UzRChLsGMmvZ+UVbJsJj6JZzL9P9iYxbdUwGsQI4= github.com/LumeraProtocol/rq-go v0.2.1/go.mod h1:APnKCZRh1Es2Vtrd2w4kCLgAyaL5Bqrkz/BURoRJ+O8= github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s= diff --git a/sdk/README.md b/sdk/README.md index b0aecb20..cf2501cf 100644 --- a/sdk/README.md +++ b/sdk/README.md @@ -221,11 +221,13 @@ if err != nil { // taskID can be used to track the download progress ``` +Note: If the action's cascade metadata sets `public: true`, the signature may be left empty to allow anonymous download. + **Parameters:** - `ctx context.Context`: Context for the operation - `actionID string`: ID of the action to download - `outputDir string`: Directory where the downloaded file will be saved -- `signature string`: Base64-encoded signature for download authorization +- `signature string`: Base64-encoded signature for download authorization (leave empty for public cascades) **Signature Creation for Download:** The download signature is created by combining the action ID with the creator's address, signing it, and base64 encoding the result. diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go index 180ebd3d..6a38b750 100644 --- a/supernode/node/action/server/cascade/cascade_action_server.go +++ b/supernode/node/action/server/cascade/cascade_action_server.go @@ -229,21 +229,8 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS task := server.factory.NewCascadeRegistrationTask() - // Verify signature if provided - if req.GetSignature() != "" { - // Cast to concrete type to access helper method - if cascadeTask, ok := task.(*cascadeService.CascadeRegistrationTask); ok { - err := cascadeTask.VerifyDownloadSignature(ctx, req.GetActionId(), req.GetSignature()) - if err != nil { - fields[logtrace.FieldError] = err.Error() - logtrace.Error(ctx, "signature verification failed", fields) - return fmt.Errorf("signature verification failed: %w", err) - } - } else { - logtrace.Error(ctx, "unable to cast task to CascadeRegistrationTask", fields) - return fmt.Errorf("unable to verify signature: task type assertion failed") - } - } + // Authorization is enforced inside the task based on metadata.Public. + // If public, signature is skipped; if private, signature is required. var restoredFilePath string var tmpDir string @@ -260,7 +247,8 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS }() err := task.Download(ctx, &cascadeService.DownloadRequest{ - ActionID: req.GetActionId(), + ActionID: req.GetActionId(), + Signature: req.GetSignature(), }, func(resp *cascadeService.DownloadResponse) error { grpcResp := &pb.DownloadResponse{ ResponseType: &pb.DownloadResponse_Event{ diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go index e0e77a6b..ede254a4 100644 --- a/supernode/services/cascade/download.go +++ b/supernode/services/cascade/download.go @@ -24,6 +24,9 @@ const targetRequiredPercent = 17 type DownloadRequest struct { ActionID string + // Signature is required for private downloads. For public cascade + // actions (metadata.Public == true), this is ignored. + Signature string } type DownloadResponse struct { @@ -33,6 +36,12 @@ type DownloadResponse struct { DownloadedDir string } +// Download retrieves a cascade artefact by action ID. +// +// Authorization behavior: +// - If the cascade metadata has Public = true, signature verification is skipped +// and the file is downloadable by anyone. +// - If Public = false, a valid download signature is required. func (task *CascadeRegistrationTask) Download( ctx context.Context, req *DownloadRequest, @@ -53,17 +62,19 @@ func (task *CascadeRegistrationTask) Download( actionDetails, err := task.LumeraClient.GetAction(ctx, req.ActionID) if err != nil { - fields[logtrace.FieldError] = err + // Ensure error is logged as string for consistency + fields[logtrace.FieldError] = err.Error() return task.wrapErr(ctx, "failed to get action", err, fields) } logtrace.Info(ctx, "Action retrieved", fields) task.streamDownloadEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", "", send) if actionDetails.GetAction().State != actiontypes.ActionStateDone { + // Return a clearer error message when action is not yet finalized err = errors.New("action is not in a valid state") fields[logtrace.FieldError] = "action state is not done yet" fields[logtrace.FieldActionState] = actionDetails.GetAction().State - return task.wrapErr(ctx, "action not found", err, fields) + return task.wrapErr(ctx, "action not finalized yet", err, fields) } logtrace.Info(ctx, "Action state validated", fields) @@ -75,6 +86,24 @@ func (task *CascadeRegistrationTask) Download( logtrace.Info(ctx, "Cascade metadata decoded", fields) task.streamDownloadEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", "", send) + // Enforce download authorization based on metadata.Public + // - If public: skip signature verification; allow anonymous downloads + // - If private: require a valid signature + if !metadata.Public { + if req.Signature == "" { + fields[logtrace.FieldError] = "missing signature for private download" + // Provide a descriptive message without a fabricated root error + return task.wrapErr(ctx, "private cascade requires a download signature", nil, fields) + } + if err := task.VerifyDownloadSignature(ctx, req.ActionID, req.Signature); err != nil { + fields[logtrace.FieldError] = err.Error() + return task.wrapErr(ctx, "failed to verify download signature", err, fields) + } + logtrace.Info(ctx, "Download signature verified for private cascade", fields) + } else { + logtrace.Info(ctx, "Public cascade: skipping download signature verification", fields) + } + // Notify: network retrieval phase begins task.streamDownloadEvent(SupernodeEventTypeNetworkRetrieveStarted, "Network retrieval started", "", "", send) @@ -82,6 +111,12 @@ func (task *CascadeRegistrationTask) Download( filePath, tmpDir, err := task.downloadArtifacts(ctx, actionDetails.GetAction().ActionID, metadata, fields, send) if err != nil { fields[logtrace.FieldError] = err.Error() + // Ensure temporary decode directory is cleaned if decode failed after being created + if tmpDir != "" { + if cerr := task.CleanupDownload(ctx, tmpDir); cerr != nil { + logtrace.Warn(ctx, "cleanup of tmp dir after error failed", logtrace.Fields{"tmp_dir": tmpDir, logtrace.FieldError: cerr.Error()}) + } + } return task.wrapErr(ctx, "failed to download artifacts", err, fields) } logtrace.Info(ctx, "File reconstructed and hash verified", fields) @@ -144,6 +179,10 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti return task.restoreFileFromLayout(ctx, layout, metadata.DataHash, actionID, send) } +// restoreFileFromLayout reconstructs the original file from the provided layout +// and a subset of retrieved symbols. The method deduplicates symbol identifiers +// before network retrieval to avoid redundant requests and ensure the requested +// count reflects unique symbols only. func (task *CascadeRegistrationTask) restoreFileFromLayout( ctx context.Context, layout codec.Layout, @@ -155,9 +194,16 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout( fields := logtrace.Fields{ logtrace.FieldActionID: actionID, } - var allSymbols []string + // Deduplicate symbols across blocks to avoid redundant requests + symSet := make(map[string]struct{}) for _, block := range layout.Blocks { - allSymbols = append(allSymbols, block.Symbols...) + for _, s := range block.Symbols { + symSet[s] = struct{}{} + } + } + allSymbols := make([]string, 0, len(symSet)) + for s := range symSet { + allSymbols = append(allSymbols, s) } sort.Strings(allSymbols) @@ -327,18 +373,20 @@ func (task *CascadeRegistrationTask) retrieveLayoutFromIndex(ctx context.Context return codec.Layout{}, totalFetchMS, totalDecodeMS, attempts, errors.New("no valid layout found in index") } -func (task *CascadeRegistrationTask) CleanupDownload(ctx context.Context, actionID string) error { - if actionID == "" { - return errors.New("actionID is empty") +// CleanupDownload removes the temporary directory created during decode. +// The parameter is a directory path (not an action ID). +func (task *CascadeRegistrationTask) CleanupDownload(ctx context.Context, dirPath string) error { + if dirPath == "" { + return errors.New("directory path is empty") } - // For now, we use actionID as the directory path to maintain compatibility - logtrace.Info(ctx, "Cleanup download directory", logtrace.Fields{"dir": actionID}) - if err := os.RemoveAll(actionID); err != nil { - logtrace.Warn(ctx, "Cleanup download directory failed", logtrace.Fields{"dir": actionID, logtrace.FieldError: err.Error()}) - return errors.Errorf("failed to delete download directory: %s, :%s", actionID, err.Error()) + // For now, we use tmp directory path as provided by decoder + logtrace.Info(ctx, "Cleanup download directory", logtrace.Fields{"dir": dirPath}) + if err := os.RemoveAll(dirPath); err != nil { + logtrace.Warn(ctx, "Cleanup download directory failed", logtrace.Fields{"dir": dirPath, logtrace.FieldError: err.Error()}) + return errors.Errorf("failed to delete download directory: %s, :%s", dirPath, err.Error()) } - logtrace.Info(ctx, "Cleanup download directory completed", logtrace.Fields{"dir": actionID}) + logtrace.Info(ctx, "Cleanup download directory completed", logtrace.Fields{"dir": dirPath}) return nil }