diff --git a/.github/workflows/build&release.yml b/.github/workflows/build&release.yml
index ead3e013..3dbf21bf 100644
--- a/.github/workflows/build&release.yml
+++ b/.github/workflows/build&release.yml
@@ -82,6 +82,9 @@ jobs:
echo "binary_name=supernode-linux-amd64" >> $GITHUB_OUTPUT
- name: Build Release Version
+ env:
+ DD_API_KEY: ${{ secrets.DD_API_KEY }}
+ DD_SITE: ${{ secrets.DD_SITE }}
run: |
mkdir -p release
@@ -94,7 +97,9 @@ jobs:
-ldflags="-s -w \
-X github.com/LumeraProtocol/supernode/v2/supernode/cmd.Version=${{ steps.vars.outputs.version }} \
-X github.com/LumeraProtocol/supernode/v2/supernode/cmd.GitCommit=${{ steps.vars.outputs.git_commit }} \
- -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=${{ steps.vars.outputs.build_time }}" \
+ -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=${{ steps.vars.outputs.build_time }} \
+ -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=${DD_API_KEY} \
+ -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=${DD_SITE}" \
-o release/supernode \
./supernode
diff --git a/Makefile b/Makefile
index fe5a9852..81773d3b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
.PHONY: build build-release build-sncli build-sn-manager
-.PHONY: install-lumera setup-supernodes system-test-setup
+.PHONY: install-lumera setup-supernodes system-test-setup install-deps
.PHONY: gen-cascade gen-supernode
.PHONY: test-e2e test-unit test-integration test-system
@@ -11,7 +11,9 @@ BUILD_TIME ?= $(shell date -u '+%Y-%m-%d_%H:%M:%S')
# Linker flags for version information
LDFLAGS = -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.Version=$(VERSION) \
-X github.com/LumeraProtocol/supernode/v2/supernode/cmd.GitCommit=$(GIT_COMMIT) \
- -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=$(BUILD_TIME)
+ -X github.com/LumeraProtocol/supernode/v2/supernode/cmd.BuildTime=$(BUILD_TIME) \
+ -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=$(DD_API_KEY) \
+ -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=$(DD_SITE)
# Linker flags for sn-manager
SN_MANAGER_LDFLAGS = -X main.Version=$(VERSION) \
@@ -140,9 +142,9 @@ test-e2e:
# Run cascade e2e tests only
test-cascade:
@echo "Running cascade e2e tests..."
- @cd tests/system && go test -tags=system_test -v -run TestCascadeE2E .
+ @cd tests/system && go mod tidy && go test -tags=system_test -v -run TestCascadeE2E .
# Run sn-manager e2e tests only
test-sn-manager:
@echo "Running sn-manager e2e tests..."
- @cd tests/system && go test -tags=system_test -v -run '^TestSNManager' .
\ No newline at end of file
+ @cd tests/system && go test -tags=system_test -v -run '^TestSNManager' .
diff --git a/go.mod b/go.mod
index b7f53ec1..a581736e 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,6 @@ module github.com/LumeraProtocol/supernode/v2
go 1.24.1
replace (
- github.com/LumeraProtocol/supernode/v2/supernode => ./supernode
github.com/bytedance/sonic => github.com/bytedance/sonic v1.14.0
github.com/bytedance/sonic/loader => github.com/bytedance/sonic/loader v0.3.0
)
@@ -11,7 +10,7 @@ replace (
require (
cosmossdk.io/math v1.5.3
github.com/AlecAivazis/survey/v2 v2.3.7
- github.com/LumeraProtocol/lumera v1.7.0
+ github.com/LumeraProtocol/lumera v1.7.2
github.com/LumeraProtocol/rq-go v0.2.1
github.com/btcsuite/btcutil v1.0.3-0.20201208143702-a53e38424cce
github.com/cenkalti/backoff/v4 v4.3.0
@@ -35,7 +34,6 @@ require (
github.com/pkg/errors v0.9.1
github.com/shirou/gopsutil/v3 v3.24.5
github.com/spf13/cobra v1.8.1
- github.com/spf13/viper v1.19.0
github.com/stretchr/testify v1.10.0
go.uber.org/mock v0.5.2
go.uber.org/ratelimit v0.3.1
@@ -167,6 +165,7 @@ require (
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.7.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
+ github.com/spf13/viper v1.19.0 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect
diff --git a/go.sum b/go.sum
index d8170371..839f29a2 100644
--- a/go.sum
+++ b/go.sum
@@ -63,8 +63,8 @@ github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3
github.com/DataDog/zstd v1.5.5 h1:oWf5W7GtOLgp6bciQYDmhHHjdhYkALu6S/5Ni9ZgSvQ=
github.com/DataDog/zstd v1.5.5/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw=
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
-github.com/LumeraProtocol/lumera v1.7.0 h1:F5zgRBnCtgGfdMB6jz01PFWIzbS8VjQfCu1H9OYt3BU=
-github.com/LumeraProtocol/lumera v1.7.0/go.mod h1:c1M+sjewuCvxw+pznwlspUzenDJI8Y+suKB3RFKS2Wo=
+github.com/LumeraProtocol/lumera v1.7.2 h1:qA0qwEOfCqW6yY232/MEK6gfLYq4HVYSmbcOCOZqEoc=
+github.com/LumeraProtocol/lumera v1.7.2/go.mod h1:c1M+sjewuCvxw+pznwlspUzenDJI8Y+suKB3RFKS2Wo=
github.com/LumeraProtocol/rq-go v0.2.1 h1:8B3UzRChLsGMmvZ+UVbJsJj6JZzL9P9iYxbdUwGsQI4=
github.com/LumeraProtocol/rq-go v0.2.1/go.mod h1:APnKCZRh1Es2Vtrd2w4kCLgAyaL5Bqrkz/BURoRJ+O8=
github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63nhn5WAunQHLTznkw5W8b1Xc0dNjp83s=
diff --git a/p2p/kademlia/bootstrap.go b/p2p/kademlia/bootstrap.go
index 5b29f44d..25dc3b54 100644
--- a/p2p/kademlia/bootstrap.go
+++ b/p2p/kademlia/bootstrap.go
@@ -102,7 +102,7 @@ func (s *DHT) setBootstrapNodesFromConfigVar(ctx context.Context, bootstrapNodes
})
}
s.options.BootstrapNodes = nodes
- logtrace.Info(ctx, "Bootstrap nodes set from config var", logtrace.Fields{
+ logtrace.Debug(ctx, "Bootstrap nodes set from config var", logtrace.Fields{
logtrace.FieldModule: "p2p",
"bootstrap_nodes": nodes,
})
diff --git a/p2p/kademlia/dht.go b/p2p/kademlia/dht.go
index 69c45023..d0022274 100644
--- a/p2p/kademlia/dht.go
+++ b/p2p/kademlia/dht.go
@@ -42,9 +42,10 @@ const (
delKeysCountThreshold = 10
lowSpaceThreshold = 50 // GB
batchRetrieveSize = 1000
- storeSameSymbolsBatchConcurrency = 3
- fetchSymbolsBatchConcurrency = 6
- minimumDataStoreSuccessRate = 75.0
+
+ storeSameSymbolsBatchConcurrency = 3
+ fetchSymbolsBatchConcurrency = 6
+ minimumDataStoreSuccessRate = 75.0
maxIterations = 4
macConcurrentNetworkStoreCalls = 16
@@ -103,7 +104,7 @@ func (s *DHT) bootstrapIgnoreList(ctx context.Context) error {
}
if added > 0 {
- logtrace.Info(ctx, "Ignore list bootstrapped from replication info", logtrace.Fields{
+ logtrace.Debug(ctx, "Ignore list bootstrapped from replication info", logtrace.Fields{
logtrace.FieldModule: "p2p",
"ignored_count": added,
})
@@ -141,6 +142,9 @@ type Options struct {
// Keyring for credentials
Keyring keyring.Keyring
+
+ // MetricsDisabled gates DHT-level metrics emission (p2pmetrics hooks and snapshots)
+ MetricsDisabled bool
}
// NewDHT returns a new DHT node
@@ -358,7 +362,7 @@ func (s *DHT) Store(ctx context.Context, data []byte, typ int) (string, error) {
// measured success rate for node RPCs is below the configured minimum, an error
// is returned. Metrics are not returned through the API.
func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID string) error {
- logtrace.Info(ctx, "Store DB batch begin", logtrace.Fields{
+ logtrace.Info(ctx, "DHT StoreBatch begin", logtrace.Fields{
logtrace.FieldModule: "dht",
logtrace.FieldTaskID: taskID,
"records": len(values),
@@ -366,7 +370,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s
if err := s.store.StoreBatch(ctx, values, typ, true); err != nil {
return fmt.Errorf("store batch: %v", err)
}
- logtrace.Info(ctx, "Store DB batch done, store network batch begin", logtrace.Fields{
+ logtrace.Info(ctx, "DHT StoreBatch: local stored; network begin", logtrace.Fields{
logtrace.FieldModule: "dht",
logtrace.FieldTaskID: taskID,
})
@@ -376,7 +380,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s
return fmt.Errorf("iterate batch store: %v", err)
}
- logtrace.Info(ctx, "Store network batch workers done", logtrace.Fields{
+ logtrace.Info(ctx, "DHT StoreBatch: network done", logtrace.Fields{
logtrace.FieldModule: "dht",
logtrace.FieldTaskID: taskID,
})
@@ -387,6 +391,7 @@ func (s *DHT) StoreBatch(ctx context.Context, values [][]byte, typ int, taskID s
// Retrieve data from the networking using key. Key is the base58 encoded
// identifier of the data.
func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]byte, error) {
+ start := time.Now()
decoded := base58.Decode(key)
if len(decoded) != B/8 {
return nil, fmt.Errorf("invalid key: %v", key)
@@ -402,6 +407,7 @@ func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]by
// retrieve the key/value from queries storage
value, err := s.store.Retrieve(ctx, decoded)
if err == nil && len(value) > 0 {
+ logtrace.Info(ctx, "DHT Retrieve local hit", logtrace.Fields{"key": hex.EncodeToString(decoded), "ms": time.Since(start).Milliseconds()})
return value, nil
} else if err != nil {
logtrace.Error(ctx, "Error retrieving key from local storage", logtrace.Fields{
@@ -417,20 +423,23 @@ func (s *DHT) Retrieve(ctx context.Context, key string, localOnly ...bool) ([]by
}
// if not found locally, iterative find value from kademlia network
+ logtrace.Info(ctx, "DHT Retrieve network lookup", logtrace.Fields{"key": dbKey})
peerValue, err := s.iterate(ctx, IterateFindValue, decoded, nil, 0)
if err != nil {
return nil, errors.Errorf("retrieve from peer: %w", err)
}
if len(peerValue) > 0 {
- logtrace.Info(ctx, "Not found locally, retrieved from other nodes", logtrace.Fields{
+ logtrace.Info(ctx, "DHT Retrieve network hit", logtrace.Fields{
logtrace.FieldModule: "dht",
"key": dbKey,
"data_len": len(peerValue),
+ "ms": time.Since(start).Milliseconds(),
})
} else {
- logtrace.Info(ctx, "Not found locally, not found in other nodes", logtrace.Fields{
+ logtrace.Info(ctx, "DHT Retrieve miss", logtrace.Fields{
logtrace.FieldModule: "dht",
"key": dbKey,
+ "ms": time.Since(start).Milliseconds(),
})
}
@@ -525,15 +534,18 @@ func (s *DHT) GetValueFromNode(ctx context.Context, target []byte, n *Node) ([]b
cctx, ccancel := context.WithTimeout(ctx, time.Second*5)
defer ccancel()
+ // Minimal per-RPC visibility
+ logtrace.Info(ctx, "RPC FindValue send", logtrace.Fields{"node": n.String(), "key": hex.EncodeToString(target)})
response, err := s.network.Call(cctx, request, false)
if err != nil {
- logtrace.Info(ctx, "Network call request failed", logtrace.Fields{
+ logtrace.Debug(ctx, "Network call request failed", logtrace.Fields{
logtrace.FieldModule: "p2p",
logtrace.FieldError: err.Error(),
"request": request.String(),
})
return nil, fmt.Errorf("network call request %s failed: %w", request.String(), err)
}
+ logtrace.Info(ctx, "RPC FindValue completed", logtrace.Fields{"node": n.String()})
v, ok := response.Data.(*FindValueResponse)
if ok && v.Status.Result == ResultOk && len(v.Value) > 0 {
@@ -593,18 +605,35 @@ func (s *DHT) doMultiWorkers(ctx context.Context, iterativeType int, target []by
// new a request message
request := s.newMessage(messageType, receiver, data)
+ // Minimal per-RPC visibility
+ op := ""
+ switch messageType {
+ case FindNode:
+ op = "FindNode"
+ case FindValue:
+ op = "FindValue"
+ default:
+ op = "RPC"
+ }
+ fields := logtrace.Fields{"node": receiver.String()}
+ if messageType == FindValue {
+ fields["key"] = hex.EncodeToString(target)
+ }
+ logtrace.Info(ctx, "RPC "+op+" send", fields)
// send the request and receive the response
response, err := s.network.Call(ctx, request, false)
if err != nil {
- logtrace.Info(ctx, "Network call request failed", logtrace.Fields{
+ logtrace.Info(ctx, "Iterate worker RPC failed", logtrace.Fields{
logtrace.FieldModule: "p2p",
logtrace.FieldError: err.Error(),
"request": request.String(),
+ "node": receiver.String(),
})
// node is unreachable, remove the node
//removedNodes = append(removedNodes, receiver)
return
}
+ logtrace.Info(ctx, "RPC "+op+" completed", logtrace.Fields{"node": receiver.String()})
// send the response to message channel
responses <- response
@@ -667,6 +696,7 @@ func (s *DHT) fetchAndAddLocalKeys(ctx context.Context, hexKeys []string, result
func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32, txID string, localOnly ...bool) (result map[string][]byte, err error) {
start := time.Now()
+ logtrace.Info(ctx, "DHT BatchRetrieve begin", logtrace.Fields{"txid": txID, "keys": len(keys), "required": required})
result = make(map[string][]byte)
var resMap sync.Map
var foundLocalCount int32
@@ -739,7 +769,9 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32,
return nil, fmt.Errorf("fetch and add local keys: %v", err)
}
// Report how many were found locally, for event metrics
- p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount))
+ if !s.options.MetricsDisabled {
+ p2pmetrics.ReportFoundLocal(p2pmetrics.TaskIDFromContext(ctx), int(foundLocalCount))
+ }
if foundLocalCount >= required {
return result, nil
}
@@ -785,10 +817,13 @@ func (s *DHT) BatchRetrieve(ctx context.Context, keys []string, required int32,
wg.Wait()
netFound := int(atomic.LoadInt32(&networkFound))
+ logtrace.Info(ctx, "DHT BatchRetrieve complete", logtrace.Fields{"txid": txID, "found_local": foundLocalCount, "found_network": netFound, "required": required, "ms": time.Since(start).Milliseconds()})
// Record batch retrieve stats for internal DHT snapshot window
s.metrics.RecordBatchRetrieve(len(keys), int(required), int(foundLocalCount), netFound, time.Since(start))
// Also feed retrieve counts into the per-task collector for stream events
- p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds())
+ if !s.options.MetricsDisabled {
+ p2pmetrics.SetRetrieveBatchSummary(p2pmetrics.TaskIDFromContext(ctx), len(keys), int(required), int(foundLocalCount), netFound, time.Since(start).Milliseconds())
+ }
return result, nil
}
@@ -899,7 +934,7 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node,
defer cancel()
for nodeID, node := range nodes {
if s.ignorelist.Banned(node) {
- logtrace.Info(ctx, "Ignore banned node in iterate batch get values", logtrace.Fields{
+ logtrace.Debug(ctx, "Ignore banned node in iterate batch get values", logtrace.Fields{
logtrace.FieldModule: "dht",
"node": node.String(),
})
@@ -946,14 +981,16 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node,
}
mu.Unlock()
// record failed RPC per-node
- p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
- IP: node.IP,
- Address: node.String(),
- Keys: 0,
- Success: false,
- Error: err.Error(),
- DurationMS: time.Since(callStart).Milliseconds(),
- })
+ if !s.options.MetricsDisabled {
+ p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
+ IP: node.IP,
+ Address: node.String(),
+ Keys: 0,
+ Success: false,
+ Error: err.Error(),
+ DurationMS: time.Since(callStart).Milliseconds(),
+ })
+ }
return
}
@@ -976,14 +1013,16 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node,
}
// record successful RPC per-node (returned may be 0). Success is true when no error.
- p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
- IP: node.IP,
- Address: node.String(),
- Keys: returned,
- Success: true,
- Error: "",
- DurationMS: time.Since(callStart).Milliseconds(),
- })
+ if !s.options.MetricsDisabled {
+ p2pmetrics.RecordRetrieve(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
+ IP: node.IP,
+ Address: node.String(),
+ Keys: returned,
+ Success: true,
+ Error: "",
+ DurationMS: time.Since(callStart).Milliseconds(),
+ })
+ }
}(node, nodeID)
}
@@ -1033,10 +1072,12 @@ func (s *DHT) iterateBatchGetValues(ctx context.Context, nodes map[string]*Node,
func (s *DHT) doBatchGetValuesCall(ctx context.Context, node *Node, requestKeys map[string]KeyValWithClosest) (map[string]KeyValWithClosest, error) {
request := s.newMessage(BatchGetValues, node, &BatchGetValuesRequest{Data: requestKeys})
+ logtrace.Info(ctx, "RPC BatchGetValues send", logtrace.Fields{"node": node.String(), "keys": len(requestKeys)})
response, err := s.network.Call(ctx, request, false)
if err != nil {
return nil, fmt.Errorf("network call request %s failed: %w", request.String(), err)
}
+ logtrace.Info(ctx, "RPC BatchGetValues completed", logtrace.Fields{"node": node.String()})
resp, ok := response.Data.(*BatchGetValuesResponse)
if !ok {
@@ -1153,7 +1194,7 @@ func (s *DHT) iterate(ctx context.Context, iterativeType int, target []byte, dat
}
default:
- logtrace.Error(ctx, "Unknown message type", logtrace.Fields{
+ logtrace.Info(ctx, "Unknown message type", logtrace.Fields{
logtrace.FieldModule: "dht",
"type": response.MessageType,
})
@@ -1295,7 +1336,7 @@ func (s *DHT) iterateFindValue(ctx context.Context, iterativeType int, target []
})
if nl.Len() == 0 {
- logtrace.Error(ctx, "Nodes list length is 0", logtrace.Fields{
+ logtrace.Info(ctx, "Nodes list length is 0", logtrace.Fields{
logtrace.FieldModule: "p2p",
"task_id": taskID,
"key": sKey,
@@ -1514,7 +1555,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte,
for i := Alpha; i < nl.Len() && finalStoreCount < int32(Alpha); i++ {
n := nl.Nodes[i]
if s.ignorelist.Banned(n) {
- logtrace.Info(ctx, "Ignore banned node during sequential store", logtrace.Fields{
+ logtrace.Debug(ctx, "Ignore banned node during sequential store", logtrace.Fields{
logtrace.FieldModule: "p2p",
"node": n.String(),
"task_id": taskID,
@@ -1570,7 +1611,7 @@ func (s *DHT) storeToAlphaNodes(ctx context.Context, nl *NodeList, data []byte,
func (s *DHT) removeNode(ctx context.Context, node *Node) {
// ensure this is not itself address
if bytes.Equal(node.ID, s.ht.self.ID) {
- logtrace.Info(ctx, "Trying to remove itself", logtrace.Fields{
+ logtrace.Debug(ctx, "Trying to remove itself", logtrace.Fields{
logtrace.FieldModule: "p2p",
})
return
@@ -1586,7 +1627,7 @@ func (s *DHT) removeNode(ctx context.Context, node *Node) {
"bucket": index,
})
} else {
- logtrace.Info(ctx, "Removed node from bucket success", logtrace.Fields{
+ logtrace.Debug(ctx, "Removed node from bucket success", logtrace.Fields{
logtrace.FieldModule: "p2p",
"node": node.String(),
"bucket": index,
@@ -1671,6 +1712,7 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i
requests := 0
successful := 0
+ logtrace.Info(ctx, "Iterate batch store: dispatching to nodes", logtrace.Fields{"task_id": id, "nodes": len(knownNodes)})
storeResponses := s.batchStoreNetwork(ctx, values, knownNodes, storageMap, typ)
for response := range storeResponses {
requests++
@@ -1713,14 +1755,16 @@ func (s *DHT) IterateBatchStore(ctx context.Context, values [][]byte, typ int, i
}
// Emit per-node store RPC call via metrics bridge (no P2P API coupling)
- p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
- IP: nodeIP,
- Address: nodeAddr,
- Keys: response.KeysCount,
- Success: errMsg == "" && response.Error == nil,
- Error: errMsg,
- DurationMS: response.DurationMS,
- })
+ if !s.options.MetricsDisabled {
+ p2pmetrics.RecordStore(p2pmetrics.TaskIDFromContext(ctx), p2pmetrics.Call{
+ IP: nodeIP,
+ Address: nodeAddr,
+ Keys: response.KeysCount,
+ Success: errMsg == "" && response.Error == nil,
+ Error: errMsg,
+ DurationMS: response.DurationMS,
+ })
+ }
}
@@ -1763,12 +1807,9 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[
var wg sync.WaitGroup
for key, node := range nodes {
- logtrace.Info(ctx, "Node", logtrace.Fields{
- logtrace.FieldModule: "dht",
- "port": node.String(),
- })
+ logtrace.Info(ctx, "Preparing batch store to node", logtrace.Fields{logtrace.FieldModule: "dht", "node": node.String()})
if s.ignorelist.Banned(node) {
- logtrace.Info(ctx, "Ignoring banned node in batch store network call", logtrace.Fields{
+ logtrace.Debug(ctx, "Ignoring banned node in batch store network call", logtrace.Fields{
logtrace.FieldModule: "dht",
"node": node.String(),
})
@@ -1796,15 +1837,11 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[
totalBytes += len(values[idx])
}
- logtrace.Info(ctx, "Batch store to node", logtrace.Fields{
- logtrace.FieldModule: "dht",
- "keys": len(toStore),
- "size_before_compress": utils.BytesIntToMB(totalBytes),
- })
+ logtrace.Info(ctx, "RPC BatchStoreData send", logtrace.Fields{logtrace.FieldModule: "dht", "node": receiver.String(), "keys": len(toStore), "size_mb": utils.BytesIntToMB(totalBytes)})
// Skip empty payloads: avoid sending empty store RPCs and do not record no-op metrics.
if len(toStore) == 0 {
- logtrace.Info(ctx, "Skipping store RPC with empty payload", logtrace.Fields{
+ logtrace.Debug(ctx, "Skipping store RPC with empty payload", logtrace.Fields{
logtrace.FieldModule: "dht",
"node": receiver.String(),
})
@@ -1821,15 +1858,12 @@ func (s *DHT) batchStoreNetwork(ctx context.Context, values [][]byte, nodes map[
s.metrics.IncHotPathBanIncr()
}
- logtrace.Info(ctx, "Network call batch store request failed", logtrace.Fields{
- logtrace.FieldModule: "p2p",
- logtrace.FieldError: err.Error(),
- "request": request.String(),
- })
+ logtrace.Info(ctx, "RPC BatchStoreData failed", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "node": receiver.String(), "ms": dur})
responses <- &MessageWithError{Error: err, Message: response, KeysCount: len(toStore), Receiver: receiver, DurationMS: dur}
return
}
+ logtrace.Info(ctx, "RPC BatchStoreData completed", logtrace.Fields{logtrace.FieldModule: "p2p", "node": receiver.String(), "keys": len(toStore), "ms": dur})
responses <- &MessageWithError{Message: response, KeysCount: len(toStore), Receiver: receiver, DurationMS: dur}
}
}(node, key)
@@ -1865,7 +1899,7 @@ func (s *DHT) batchFindNode(ctx context.Context, payload [][]byte, nodes map[str
continue
}
if s.ignorelist.Banned(node) {
- logtrace.Info(ctx, "Ignoring banned node in batch find call", logtrace.Fields{
+ logtrace.Debug(ctx, "Ignoring banned node in batch find call", logtrace.Fields{
logtrace.FieldModule: "dht",
"node": node.String(),
"txid": txid,
diff --git a/p2p/kademlia/fetch_and_store.go b/p2p/kademlia/fetch_and_store.go
index 9803bf3d..8f954364 100644
--- a/p2p/kademlia/fetch_and_store.go
+++ b/p2p/kademlia/fetch_and_store.go
@@ -26,12 +26,12 @@ const (
// FetchAndStore fetches all keys from the queries TODO replicate list, fetches value from respective nodes and stores them in the queries store
func (s *DHT) FetchAndStore(ctx context.Context) error {
- logtrace.Info(ctx, "Getting fetch and store keys", logtrace.Fields{})
+ logtrace.Debug(ctx, "Getting fetch and store keys", logtrace.Fields{})
keys, err := s.store.GetAllToDoRepKeys(failedKeysClosestContactsLookupCount+maxBatchAttempts+1, totalMaxAttempts)
if err != nil {
return fmt.Errorf("get all keys error: %w", err)
}
- logtrace.Info(ctx, "got keys from queries store", logtrace.Fields{"count": len(keys)})
+ logtrace.Debug(ctx, "got keys from queries store", logtrace.Fields{"count": len(keys)})
if len(keys) == 0 {
return nil
@@ -79,7 +79,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error {
return
}
- logtrace.Info(cctx, "iterate fetch for replication success", logtrace.Fields{"key": info.Key, "ip": info.IP})
+ logtrace.Debug(cctx, "iterate fetch for replication success", logtrace.Fields{"key": info.Key, "ip": info.IP})
}
if err := s.store.Store(cctx, sKey, value, 0, false); err != nil {
@@ -94,7 +94,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error {
atomic.AddInt32(&successCounter, 1) // Increment the counter atomically
- logtrace.Info(cctx, "fetch & store key success", logtrace.Fields{"key": info.Key, "ip": info.IP})
+ logtrace.Debug(cctx, "fetch & store key success", logtrace.Fields{"key": info.Key, "ip": info.IP})
}(key)
time.Sleep(100 * time.Millisecond)
@@ -102,7 +102,7 @@ func (s *DHT) FetchAndStore(ctx context.Context) error {
//wg.Wait()
- logtrace.Info(ctx, "Successfully fetched & stored keys", logtrace.Fields{"todo-keys": len(keys), "successfully-added-keys": atomic.LoadInt32(&successCounter)}) // Log the final count
+ logtrace.Debug(ctx, "Successfully fetched & stored keys", logtrace.Fields{"todo-keys": len(keys), "successfully-added-keys": atomic.LoadInt32(&successCounter)}) // Log the final count
return nil
}
@@ -114,7 +114,7 @@ func (s *DHT) BatchFetchAndStoreFailedKeys(ctx context.Context) error {
if err != nil {
return fmt.Errorf("get all keys error: %w", err)
}
- logtrace.Info(ctx, "read failed keys from store", logtrace.Fields{"count": len(keys)})
+ logtrace.Debug(ctx, "read failed keys from store", logtrace.Fields{"count": len(keys)})
if len(keys) == 0 {
return nil
@@ -143,7 +143,7 @@ func (s *DHT) BatchFetchAndStoreFailedKeys(ctx context.Context) error {
repKeys = append(repKeys, repKey)
}
}
- logtrace.Info(ctx, "got 2nd tier replication keys from queries store", logtrace.Fields{"count": len(repKeys)})
+ logtrace.Debug(ctx, "got 2nd tier replication keys from queries store", logtrace.Fields{"count": len(repKeys)})
if err := s.GroupAndBatchFetch(ctx, repKeys, 0, false); err != nil {
logtrace.Error(ctx, "group and batch fetch failed-keys error", logtrace.Fields{logtrace.FieldError: err})
@@ -160,7 +160,7 @@ func (s *DHT) BatchFetchAndStore(ctx context.Context) error {
if err != nil {
return fmt.Errorf("get all keys error: %w", err)
}
- logtrace.Info(ctx, "got batch todo rep-keys from queries store", logtrace.Fields{"count": len(keys)})
+ logtrace.Debug(ctx, "got batch todo rep-keys from queries store", logtrace.Fields{"count": len(keys)})
if len(keys) == 0 {
return nil
@@ -213,12 +213,12 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
totalKeysFound := 0
for len(stringKeys) > 0 && iterations < maxSingleBatchIterations {
iterations++
- logtrace.Info(ctx, "fetching batch values from node", logtrace.Fields{"node-ip": node.IP, "count": len(stringKeys), "keys[0]": stringKeys[0], "keys[len()]": stringKeys[len(stringKeys)-1]})
+ logtrace.Debug(ctx, "fetching batch values from node", logtrace.Fields{"node-ip": node.IP, "count": len(stringKeys), "keys[0]": stringKeys[0], "keys[len()]": stringKeys[len(stringKeys)-1]})
isDone, retMap, failedKeys, err := s.GetBatchValuesFromNode(ctx, stringKeys, node)
if err != nil {
// Log the error but don't stop the process, continue to the next node
- logtrace.Info(ctx, "failed to get batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
+ logtrace.Debug(ctx, "failed to get batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
continue
}
@@ -238,7 +238,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
err = s.store.StoreBatch(ctx, response, datatype, isOriginal)
if err != nil {
// Log the error but don't stop the process, continue to the next node
- logtrace.Info(ctx, "failed to store batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
+ logtrace.Debug(ctx, "failed to store batch values", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
continue
}
@@ -246,7 +246,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
err = s.store.BatchDeleteRepKeys(stringDelKeys)
if err != nil {
// Log the error but don't stop the process, continue to the next node
- logtrace.Info(ctx, "failed to delete rep keys", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
+ logtrace.Debug(ctx, "failed to delete rep keys", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
continue
}
} else {
@@ -255,7 +255,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
if isDone && len(failedKeys) > 0 {
if err := s.store.IncrementAttempts(failedKeys); err != nil {
- logtrace.Info(ctx, "failed to increment attempts", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
+ logtrace.Debug(ctx, "failed to increment attempts", logtrace.Fields{"node-ip": node.IP, logtrace.FieldError: err})
// not adding 'continue' here because we want to delete the keys from the todo list
}
} else if isDone {
@@ -265,7 +265,7 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
}
}
- logtrace.Info(ctx, "fetch batch values from node successfully", logtrace.Fields{"node-ip": node.IP, "count": totalKeysFound, "iterations": iterations})
+ logtrace.Debug(ctx, "fetch batch values from node successfully", logtrace.Fields{"node-ip": node.IP, "count": totalKeysFound, "iterations": iterations})
}
}
@@ -274,7 +274,9 @@ func (s *DHT) GroupAndBatchFetch(ctx context.Context, repKeys []domain.ToRepKey,
// GetBatchValuesFromNode get values from node in bateches
func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node) (bool, map[string][]byte, []string, error) {
- logtrace.Info(ctx, "sending batch fetch request", logtrace.Fields{"node-ip": n.IP, "keys": len(keys)})
+ logtrace.Debug(ctx, "sending batch fetch request", logtrace.Fields{"node-ip": n.IP, "keys": len(keys)})
+ // Minimal per-RPC visibility for background replication path
+ logtrace.Info(ctx, "RPC BatchFindValues send", logtrace.Fields{"node": n.String(), "keys": len(keys)})
messageType := BatchFindValues
@@ -347,8 +349,9 @@ func (s *DHT) GetBatchValuesFromNode(ctx context.Context, keys []string, n *Node
if err != nil {
return isDone, nil, nil, fmt.Errorf("failed to verify and filter data: %w", err)
}
- logtrace.Info(ctx, "batch fetch response rcvd and keys verified", logtrace.Fields{"node-ip": n.IP, "received-keys": len(decompressedMap), "verified-keys": len(retMap), "failed-keys": len(failedKeys)})
+ logtrace.Debug(ctx, "batch fetch response rcvd and keys verified", logtrace.Fields{"node-ip": n.IP, "received-keys": len(decompressedMap), "verified-keys": len(retMap), "failed-keys": len(failedKeys)})
+ logtrace.Info(ctx, "RPC BatchFindValues completed", logtrace.Fields{"node": n.String(), "received_keys": len(decompressedMap), "verified_keys": len(retMap)})
return v.Done, retMap, failedKeys, nil
}
diff --git a/p2p/kademlia/network.go b/p2p/kademlia/network.go
index a2322ff7..3bca8f20 100644
--- a/p2p/kademlia/network.go
+++ b/p2p/kademlia/network.go
@@ -2,7 +2,6 @@ package kademlia
import (
"context"
- "encoding/hex"
"fmt"
"io"
"net"
@@ -337,7 +336,7 @@ func (s *Network) handleReplicateRequest(ctx context.Context, req *ReplicateData
return fmt.Errorf("unable to store batch replication keys: %w", err)
}
- logtrace.Info(ctx, "Store batch replication keys stored", logtrace.Fields{
+ logtrace.Debug(ctx, "Store batch replication keys stored", logtrace.Fields{
logtrace.FieldModule: "p2p",
"to-store-keys": len(keysToStore),
"rcvd-keys": len(req.Keys),
@@ -650,7 +649,7 @@ func (s *Network) rpcOnceWrapper(ctx context.Context, cw *connWrapper, remoteAdd
if _, e := cw.secureConn.Write(data); e != nil {
cw.mtx.Unlock()
if isStaleConnError(e) && !retried {
- logtrace.Info(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{
+ logtrace.Debug(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{
logtrace.FieldModule: "p2p",
"remote": remoteAddr,
"message_type": msgType,
@@ -691,7 +690,7 @@ func (s *Network) rpcOnceWrapper(ctx context.Context, cw *connWrapper, remoteAdd
cw.mtx.Unlock()
if e != nil {
if isStaleConnError(e) && !retried {
- logtrace.Info(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{
+ logtrace.Debug(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{
logtrace.FieldModule: "p2p",
"remote": remoteAddr,
"message_type": msgType,
@@ -744,7 +743,7 @@ Retry:
}
if _, err := conn.Write(data); err != nil {
if isStaleConnError(err) && !retried {
- logtrace.Info(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{
+ logtrace.Debug(ctx, "Stale pooled connection on write; redialing", logtrace.Fields{
logtrace.FieldModule: "p2p",
"remote": remoteAddr,
"message_type": msgType,
@@ -778,7 +777,7 @@ Retry:
_ = conn.SetDeadline(time.Time{})
if err != nil {
if isStaleConnError(err) && !retried {
- logtrace.Info(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{
+ logtrace.Debug(ctx, "Stale pooled connection on read; redialing", logtrace.Fields{
logtrace.FieldModule: "p2p",
"remote": remoteAddr,
"message_type": msgType,
@@ -842,7 +841,7 @@ func (s *Network) handleBatchFindValues(ctx context.Context, message *Message, r
// Try to acquire the semaphore, wait up to 1 minute
logtrace.Debug(ctx, "Attempting to acquire semaphore immediately", logtrace.Fields{logtrace.FieldModule: "p2p"})
if !s.sem.TryAcquire(1) {
- logtrace.Info(ctx, "Immediate acquisition failed. Waiting up to 1 minute", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "Immediate acquisition failed. Waiting up to 1 minute", logtrace.Fields{logtrace.FieldModule: "p2p"})
ctxWithTimeout, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
@@ -851,7 +850,7 @@ func (s *Network) handleBatchFindValues(ctx context.Context, message *Message, r
// failed to acquire semaphore within 1 minute
return s.generateResponseMessage(BatchFindValues, message.Sender, ResultFailed, errorBusy)
}
- logtrace.Info(ctx, "Semaphore acquired after waiting", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "Semaphore acquired after waiting", logtrace.Fields{logtrace.FieldModule: "p2p"})
}
// Add a defer function to recover from panic
@@ -951,7 +950,7 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message,
i++
}
- values, count, err := s.dht.store.RetrieveBatchValues(ctx, keys, true)
+ values, count, err := s.dht.store.RetrieveBatchValues(ctx, keys, false)
if err != nil {
err = errors.Errorf("batch find values: %w", err)
s.appendRetrieveEntry(message.Sender.IP, RecentBatchRetrieveEntry{
@@ -976,17 +975,8 @@ func (s *Network) handleGetValuesRequest(ctx context.Context, message *Message,
for i, key := range keys {
val := KeyValWithClosest{
- Value: values[i],
- }
- if len(val.Value) == 0 {
- decodedKey, err := hex.DecodeString(keys[i])
- if err != nil {
- err = errors.Errorf("batch find vals: decode key: %w - key %s", err, keys[i])
- return s.generateResponseMessage(BatchGetValues, message.Sender, ResultFailed, err.Error())
- }
-
- nodes, _ := s.dht.ht.closestContacts(Alpha, decodedKey, []*Node{message.Sender})
- val.Closest = nodes.Nodes
+ Value: values[i],
+ Closest: make([]*Node, 0), // for compatibility, not used - each node now has full view of the whole network
}
request.Data[key] = val
@@ -1025,7 +1015,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi
if len(req.Keys) > 0 {
// log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("keys[0]", req.Keys[0]).WithField("keys[len]", req.Keys[len(req.Keys)-1]).
// WithField("from-ip", ip).Debug("first & last batch keys")
- logtrace.Debug(ctx, "First & last batch keys", logtrace.Fields{
+ logtrace.Info(ctx, "First & last batch keys", logtrace.Fields{
logtrace.FieldModule: "p2p",
"p2p-req-id": reqID,
"keys[0]": req.Keys[0],
@@ -1039,7 +1029,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi
return false, nil, fmt.Errorf("failed to retrieve batch values: %w", err)
}
// log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("values-len", len(values)).WithField("found", count).WithField("from-ip", ip).Info("batch find values request processed")
- logtrace.Info(ctx, "Batch find values request processed", logtrace.Fields{
+ logtrace.Debug(ctx, "Batch find values request processed", logtrace.Fields{
logtrace.FieldModule: "p2p",
"p2p-req-id": reqID,
"values-len": len(values),
@@ -1054,7 +1044,7 @@ func (s *Network) handleBatchFindValuesRequest(ctx context.Context, req *BatchFi
// log.WithContext(ctx).WithField("p2p-req-id", reqID).WithField("compressed-data-len", utils.BytesToMB(uint64(len(compressedData)))).WithField("found", count).
// WithField("from-ip", ip).Info("batch find values response sent")
- logtrace.Info(ctx, "Batch find values response sent", logtrace.Fields{
+ logtrace.Debug(ctx, "Batch find values response sent", logtrace.Fields{
logtrace.FieldModule: "p2p",
"p2p-req-id": reqID,
"compressed-data-len": utils.BytesToMB(uint64(len(compressedData))),
@@ -1293,7 +1283,7 @@ func (s *Network) handleBatchFindNode(ctx context.Context, message *Message) (re
closestMap := make(map[string][]*Node)
// log.WithContext(ctx).WithField("sender", message.Sender.String()).Info("Batch Find Nodes Request Received")
- logtrace.Info(ctx, "Batch Find Nodes Request Received", logtrace.Fields{
+ logtrace.Debug(ctx, "Batch Find Nodes Request Received", logtrace.Fields{
logtrace.FieldModule: "p2p",
"sender": message.Sender.String(),
"hashed-targets": len(request.HashedTarget),
@@ -1304,7 +1294,7 @@ func (s *Network) handleBatchFindNode(ctx context.Context, message *Message) (re
}
response.ClosestNodes = closestMap
// log.WithContext(ctx).WithField("sender", message.Sender.String()).Info("Batch Find Nodes Request Processed")
- logtrace.Info(ctx, "Batch Find Nodes Request Processed", logtrace.Fields{
+ logtrace.Debug(ctx, "Batch Find Nodes Request Processed", logtrace.Fields{
logtrace.FieldModule: "p2p",
"sender": message.Sender.String(),
})
diff --git a/p2p/kademlia/node_activity.go b/p2p/kademlia/node_activity.go
index cc7089d6..88e09f7a 100644
--- a/p2p/kademlia/node_activity.go
+++ b/p2p/kademlia/node_activity.go
@@ -25,7 +25,7 @@ func (s *DHT) checkNodeActivity(ctx context.Context) {
return
case <-ticker.C:
if !utils.CheckInternetConnectivity() {
- logtrace.Info(ctx, "no internet connectivity, not checking node activity", logtrace.Fields{})
+ logtrace.Debug(ctx, "no internet connectivity, not checking node activity", logtrace.Fields{})
continue
}
@@ -115,7 +115,7 @@ func (s *DHT) handlePingSuccess(ctx context.Context, wasActive bool, n *Node) {
s.ignorelist.Delete(n)
if !wasActive {
- logtrace.Info(ctx, "node found to be active again", logtrace.Fields{
+ logtrace.Debug(ctx, "node found to be active again", logtrace.Fields{
logtrace.FieldModule: "p2p",
"ip": n.IP,
"node_id": string(n.ID),
diff --git a/p2p/kademlia/redundant_data.go b/p2p/kademlia/redundant_data.go
index bfe6947d..151269d1 100644
--- a/p2p/kademlia/redundant_data.go
+++ b/p2p/kademlia/redundant_data.go
@@ -13,7 +13,7 @@ import (
)
func (s *DHT) startDisabledKeysCleanupWorker(ctx context.Context) error {
- logtrace.Info(ctx, "disabled keys cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "disabled keys cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
@@ -50,7 +50,7 @@ func (s *DHT) cleanupDisabledKeys(ctx context.Context) error {
}
func (s *DHT) startCleanupRedundantDataWorker(ctx context.Context) {
- logtrace.Info(ctx, "redundant data cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "redundant data cleanup worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
@@ -66,7 +66,7 @@ func (s *DHT) startCleanupRedundantDataWorker(ctx context.Context) {
func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) {
from := time.Now().AddDate(-5, 0, 0) // 5 years ago
- logtrace.Info(ctx, "getting all possible replication keys past five years", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from})
+ logtrace.Debug(ctx, "getting all possible replication keys past five years", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from})
to := time.Now().UTC()
replicationKeys := s.store.GetKeysForReplication(ctx, from, to)
@@ -88,7 +88,7 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) {
removeKeys := make([]domain.DelKey, 0)
for key, closestContacts := range closestContactsMap {
if len(closestContacts) < Alpha {
- logtrace.Info(ctx, "not enough contacts to replicate", logtrace.Fields{logtrace.FieldModule: "p2p", "key": key, "closest contacts": closestContacts})
+ logtrace.Debug(ctx, "not enough contacts to replicate", logtrace.Fields{logtrace.FieldModule: "p2p", "key": key, "closest contacts": closestContacts})
continue
}
@@ -118,9 +118,9 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) {
return
}
- logtrace.Info(ctx, "insert del keys success", logtrace.Fields{logtrace.FieldModule: "p2p", "count-del-keys": len(insertKeys)})
+ logtrace.Debug(ctx, "insert del keys success", logtrace.Fields{logtrace.FieldModule: "p2p", "count-del-keys": len(insertKeys)})
} else {
- logtrace.Info(ctx, "No redundant key found to be stored in the storage", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "No redundant key found to be stored in the storage", logtrace.Fields{logtrace.FieldModule: "p2p"})
}
if len(removeKeys) > 0 {
@@ -133,7 +133,7 @@ func (s *DHT) cleanupRedundantDataWorker(ctx context.Context) {
}
func (s *DHT) startDeleteDataWorker(ctx context.Context) {
- logtrace.Info(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
diff --git a/p2p/kademlia/replication.go b/p2p/kademlia/replication.go
index 5163fd0b..4a36c422 100644
--- a/p2p/kademlia/replication.go
+++ b/p2p/kademlia/replication.go
@@ -34,7 +34,7 @@ var (
// StartReplicationWorker starts replication
func (s *DHT) StartReplicationWorker(ctx context.Context) error {
- logtrace.Info(ctx, "replication worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "replication worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
go s.checkNodeActivity(ctx)
go s.StartBatchFetchAndStoreWorker(ctx)
@@ -54,7 +54,7 @@ func (s *DHT) StartReplicationWorker(ctx context.Context) error {
// StartBatchFetchAndStoreWorker starts replication
func (s *DHT) StartBatchFetchAndStoreWorker(ctx context.Context) error {
- logtrace.Info(ctx, "batch fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "batch fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
@@ -69,7 +69,7 @@ func (s *DHT) StartBatchFetchAndStoreWorker(ctx context.Context) error {
// StartFailedFetchAndStoreWorker starts replication
func (s *DHT) StartFailedFetchAndStoreWorker(ctx context.Context) error {
- logtrace.Info(ctx, "fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "fetch and store worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
@@ -131,7 +131,7 @@ func (s *DHT) Replicate(ctx context.Context) {
historicStart = time.Now().UTC().Add(-24 * time.Hour * 180)
}
- logtrace.Info(ctx, "replicating data", logtrace.Fields{logtrace.FieldModule: "p2p", "historic-start": historicStart})
+ logtrace.Debug(ctx, "replicating data", logtrace.Fields{logtrace.FieldModule: "p2p", "historic-start": historicStart})
for i := 0; i < B; i++ {
if time.Since(s.ht.refreshTime(i)) > defaultRefreshTime {
@@ -150,7 +150,7 @@ func (s *DHT) Replicate(ctx context.Context) {
}
if len(repInfo) == 0 {
- logtrace.Info(ctx, "no replication info found", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "no replication info found", logtrace.Fields{logtrace.FieldModule: "p2p"})
return
}
@@ -159,7 +159,7 @@ func (s *DHT) Replicate(ctx context.Context) {
from = *repInfo[0].LastReplicatedAt
}
- logtrace.Info(ctx, "getting all possible replication keys", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from})
+ logtrace.Debug(ctx, "getting all possible replication keys", logtrace.Fields{logtrace.FieldModule: "p2p", "from": from})
to := time.Now().UTC()
replicationKeys := s.store.GetKeysForReplication(ctx, from, to)
@@ -199,7 +199,7 @@ func (s *DHT) Replicate(ctx context.Context) {
continue
}
countToSendKeys := len(replicationKeys) - idx
- logtrace.Info(ctx, "count of replication keys to be checked", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": countToSendKeys})
+ logtrace.Debug(ctx, "count of replication keys to be checked", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": countToSendKeys})
// Preallocate a slice with a capacity equal to the number of keys.
closestContactKeys := make([]string, 0, countToSendKeys)
@@ -212,13 +212,13 @@ func (s *DHT) Replicate(ctx context.Context) {
}
}
- logtrace.Info(ctx, "closest contact keys count", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": len(closestContactKeys)})
+ logtrace.Debug(ctx, "closest contact keys count", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID), "len-rep-keys": len(closestContactKeys)})
if len(closestContactKeys) == 0 {
if err := s.updateLastReplicated(ctx, info.ID, to); err != nil {
logtrace.Error(ctx, "replicate update lastReplicated failed", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
} else {
- logtrace.Info(ctx, "no closest keys found - replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "closest-contact-keys": 0})
+ logtrace.Debug(ctx, "no closest keys found - replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "closest-contact-keys": 0})
}
continue
@@ -258,17 +258,17 @@ func (s *DHT) Replicate(ctx context.Context) {
if err := s.updateLastReplicated(ctx, info.ID, to); err != nil {
logtrace.Error(ctx, "replicate update lastReplicated failed", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
} else {
- logtrace.Info(ctx, "replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "expected-rep-keys": len(closestContactKeys)})
+ logtrace.Debug(ctx, "replicate update lastReplicated success", logtrace.Fields{logtrace.FieldModule: "p2p", "node": info.IP, "to": to.String(), "expected-rep-keys": len(closestContactKeys)})
}
}
- logtrace.Info(ctx, "Replication done", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "Replication done", logtrace.Fields{logtrace.FieldModule: "p2p"})
}
func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.NodeReplicationInfo) error {
replicationKeys := s.store.GetKeysForReplication(ctx, from, time.Now().UTC())
- logtrace.Info(ctx, "begin adjusting node keys process for offline node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "total-rep-keys": len(replicationKeys), "from": from.String()})
+ logtrace.Debug(ctx, "begin adjusting node keys process for offline node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "total-rep-keys": len(replicationKeys), "from": from.String()})
// prepare ignored nodes list but remove the node we are adjusting
// because we want to find if this node was supposed to hold this key
@@ -315,7 +315,7 @@ func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.No
failureCount := 0
for nodeInfoKey, keys := range nodeKeysMap {
- logtrace.Info(ctx, "sending adjusted replication keys to node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "adjust-to-node": nodeInfoKey, "to-adjust-keys-len": len(keys)})
+ logtrace.Debug(ctx, "sending adjusted replication keys to node", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID), "adjust-to-node": nodeInfoKey, "to-adjust-keys-len": len(keys)})
// Retrieve the node object from the key
node, err := getNodeFromKey(nodeInfoKey)
if err != nil {
@@ -370,14 +370,14 @@ func (s *DHT) adjustNodeKeys(ctx context.Context, from time.Time, info domain.No
return fmt.Errorf("replicate update isAdjusted failed: %v", err)
}
- logtrace.Info(ctx, "offline node was successfully adjusted", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID)})
+ logtrace.Debug(ctx, "offline node was successfully adjusted", logtrace.Fields{logtrace.FieldModule: "p2p", "offline-node-ip": info.IP, "offline-node-id": string(info.ID)})
return nil
}
func isNodeGoneAndShouldBeAdjusted(lastSeen *time.Time, isAlreadyAdjusted bool) bool {
if lastSeen == nil {
- logtrace.Info(context.Background(), "lastSeen is nil - aborting node adjustment", logtrace.Fields{})
+ logtrace.Debug(context.Background(), "lastSeen is nil - aborting node adjustment", logtrace.Fields{})
return false
}
@@ -396,10 +396,10 @@ func (s *DHT) checkAndAdjustNode(ctx context.Context, info domain.NodeReplicatio
if err := s.store.UpdateIsAdjusted(ctx, string(info.ID), true); err != nil {
logtrace.Error(ctx, "failed to update replication info, set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err.Error(), "rep-ip": info.IP, "rep-id": string(info.ID)})
} else {
- logtrace.Info(ctx, "set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
+ logtrace.Debug(ctx, "set isAdjusted to true", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
}
}
}
- logtrace.Info(ctx, "replication node not active, skipping over it.", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
+ logtrace.Debug(ctx, "replication node not active, skipping over it.", logtrace.Fields{logtrace.FieldModule: "p2p", "rep-ip": info.IP, "rep-id": string(info.ID)})
}
diff --git a/p2p/kademlia/rq_symbols.go b/p2p/kademlia/rq_symbols.go
index fbf6563d..819d0944 100644
--- a/p2p/kademlia/rq_symbols.go
+++ b/p2p/kademlia/rq_symbols.go
@@ -16,16 +16,20 @@ const (
)
func (s *DHT) startStoreSymbolsWorker(ctx context.Context) {
- logtrace.Info(ctx, "start delete data worker", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ // Minimal visibility for lifecycle + each tick
+ logtrace.Info(ctx, "rq_symbols worker started", logtrace.Fields{logtrace.FieldModule: "p2p"})
for {
select {
case <-time.After(defaultSoreSymbolsInterval):
+ tickStart := time.Now()
+ logtrace.Info(ctx, "rq_symbols: tick", logtrace.Fields{"interval": defaultSoreSymbolsInterval.String()})
if err := s.storeSymbols(ctx); err != nil {
logtrace.Error(ctx, "store symbols", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err})
}
+ logtrace.Info(ctx, "rq_symbols: tick complete", logtrace.Fields{"ms": time.Since(tickStart).Milliseconds()})
case <-ctx.Done():
- logtrace.Error(ctx, "closing store symbols worker", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Info(ctx, "rq_symbols worker stopping", logtrace.Fields{logtrace.FieldModule: "p2p"})
return
}
}
@@ -37,13 +41,26 @@ func (s *DHT) storeSymbols(ctx context.Context) error {
return fmt.Errorf("get to do store symbol dirs: %w", err)
}
+ // Minimal visibility: how many dirs to process this tick
+ logtrace.Info(ctx, "rq_symbols: todo directories", logtrace.Fields{"count": len(dirs)})
+
for _, dir := range dirs {
- logtrace.Info(ctx, "rq_symbols worker: start scanning dir & storing raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID})
+ // Pre-count symbols in this directory
+ preCount := -1
+ if set, rerr := utils.ReadDirFilenames(dir.Dir); rerr == nil {
+ preCount = len(set)
+ }
+ start := time.Now()
+ logtrace.Info(ctx, "rq_symbols: processing dir", logtrace.Fields{"dir": dir.Dir, "txid": dir.TXID, "symbols": preCount})
if err := s.scanDirAndStoreSymbols(ctx, dir.Dir, dir.TXID); err != nil {
logtrace.Error(ctx, "scan and store symbols", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err})
}
-
- logtrace.Info(ctx, "rq_symbols worker: scanned dir & stored raptorQ symbols", logtrace.Fields{"dir": dir, "txid": dir.TXID})
+ // Post-count remaining symbols
+ remCount := -1
+ if set, rerr := utils.ReadDirFilenames(dir.Dir); rerr == nil {
+ remCount = len(set)
+ }
+ logtrace.Info(ctx, "rq_symbols: processed dir", logtrace.Fields{"dir": dir.Dir, "txid": dir.TXID, "remaining": remCount, "ms": time.Since(start).Milliseconds()})
}
return nil
@@ -66,7 +83,7 @@ func (s *DHT) scanDirAndStoreSymbols(ctx context.Context, dir, txid string) erro
}
sort.Strings(keys)
- logtrace.Info(ctx, "p2p-worker: storing ALL RaptorQ symbols", logtrace.Fields{"txid": txid, "dir": dir, "total": len(keys)})
+ logtrace.Debug(ctx, "p2p-worker: storing ALL RaptorQ symbols", logtrace.Fields{"txid": txid, "dir": dir, "total": len(keys)})
// Batch-flush at loadSymbolsBatchSize
for start := 0; start < len(keys); {
diff --git a/p2p/kademlia/store/meta/meta.go b/p2p/kademlia/store/meta/meta.go
index fa75dc81..c57d05a4 100644
--- a/p2p/kademlia/store/meta/meta.go
+++ b/p2p/kademlia/store/meta/meta.go
@@ -67,7 +67,7 @@ func NewStore(ctx context.Context, dataDir string) (*Store, error) {
quit: make(chan bool),
}
- logtrace.Info(ctx, fmt.Sprintf("p2p data dir: %v", dataDir), logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, fmt.Sprintf("p2p data dir: %v", dataDir), logtrace.Fields{logtrace.FieldModule: "p2p"})
if _, err := os.Stat(dataDir); os.IsNotExist(err) {
if err := os.MkdirAll(dataDir, 0750); err != nil {
return nil, fmt.Errorf("mkdir %q: %w", dataDir, err)
@@ -185,10 +185,10 @@ func (s *Store) startCheckpointWorker(ctx context.Context) {
select {
case <-ctx.Done():
- logtrace.Info(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{})
+ logtrace.Debug(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{})
return
case <-s.worker.quit:
- logtrace.Info(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{})
+ logtrace.Debug(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{})
return
default:
}
@@ -204,10 +204,10 @@ func (s *Store) start(ctx context.Context) {
logtrace.Error(ctx, "Failed to perform job", logtrace.Fields{logtrace.FieldError: err})
}
case <-s.worker.quit:
- logtrace.Info(ctx, "exit sqlite meta db worker - quit signal received", logtrace.Fields{})
+ logtrace.Debug(ctx, "exit sqlite meta db worker - quit signal received", logtrace.Fields{})
return
case <-ctx.Done():
- logtrace.Info(ctx, "exit sqlite meta db worker- ctx done signal received", logtrace.Fields{})
+ logtrace.Debug(ctx, "exit sqlite meta db worker- ctx done signal received", logtrace.Fields{})
return
}
}
diff --git a/p2p/kademlia/store/sqlite/meta_worker.go b/p2p/kademlia/store/sqlite/meta_worker.go
index eb7a968f..6d1207df 100644
--- a/p2p/kademlia/store/sqlite/meta_worker.go
+++ b/p2p/kademlia/store/sqlite/meta_worker.go
@@ -124,7 +124,7 @@ func NewMigrationMetaStore(ctx context.Context, dataDir string, cloud cloud.Stor
go handler.startLastAccessedUpdateWorker(ctx)
go handler.startInsertWorker(ctx)
go handler.startMigrationExecutionWorker(ctx)
- logtrace.Info(ctx, "MigrationMetaStore workers started", logtrace.Fields{})
+ logtrace.Debug(ctx, "MigrationMetaStore workers started", logtrace.Fields{})
return handler, nil
}
@@ -348,7 +348,7 @@ func (d *MigrationMetaStore) startLastAccessedUpdateWorker(ctx context.Context)
case <-d.updateTicker.C:
d.commitLastAccessedUpdates(ctx)
case <-ctx.Done():
- logtrace.Info(ctx, "Shutting down last accessed update worker", logtrace.Fields{})
+ logtrace.Debug(ctx, "Shutting down last accessed update worker", logtrace.Fields{})
return
}
}
@@ -414,7 +414,7 @@ func (d *MigrationMetaStore) commitLastAccessedUpdates(ctx context.Context) {
d.updates.Delete(k)
}
- logtrace.Info(ctx, "Committed last accessed updates", logtrace.Fields{"count": len(keysToUpdate)})
+ logtrace.Debug(ctx, "Committed last accessed updates", logtrace.Fields{"count": len(keysToUpdate)})
}
func PostKeysInsert(updates []UpdateMessage) {
@@ -437,7 +437,7 @@ func (d *MigrationMetaStore) startInsertWorker(ctx context.Context) {
case <-d.insertTicker.C:
d.commitInserts(ctx)
case <-ctx.Done():
- logtrace.Info(ctx, "Shutting down insert meta keys worker", logtrace.Fields{})
+ logtrace.Debug(ctx, "Shutting down insert meta keys worker", logtrace.Fields{})
d.commitInserts(ctx)
return
}
@@ -501,7 +501,7 @@ func (d *MigrationMetaStore) commitInserts(ctx context.Context) {
d.inserts.Delete(k)
}
- logtrace.Info(ctx, "Committed inserts", logtrace.Fields{"count": len(keysToUpdate)})
+ logtrace.Debug(ctx, "Committed inserts", logtrace.Fields{"count": len(keysToUpdate)})
}
// startMigrationExecutionWorker starts the worker that executes a migration
@@ -511,7 +511,7 @@ func (d *MigrationMetaStore) startMigrationExecutionWorker(ctx context.Context)
case <-d.migrationExecutionTicker.C:
d.checkAndExecuteMigration(ctx)
case <-ctx.Done():
- logtrace.Info(ctx, "Shutting down data migration worker", logtrace.Fields{})
+ logtrace.Debug(ctx, "Shutting down data migration worker", logtrace.Fields{})
return
}
}
@@ -544,7 +544,7 @@ func (d *MigrationMetaStore) checkAndExecuteMigration(ctx context.Context) {
//return
//}
- logtrace.Info(ctx, "Starting data migration", logtrace.Fields{"islow": isLow})
+ logtrace.Debug(ctx, "Starting data migration", logtrace.Fields{"islow": isLow})
// Step 1: Fetch pending migrations
var migrations Migrations
@@ -553,11 +553,11 @@ func (d *MigrationMetaStore) checkAndExecuteMigration(ctx context.Context) {
logtrace.Error(ctx, "Failed to fetch pending migrations", logtrace.Fields{logtrace.FieldError: err})
return
}
- logtrace.Info(ctx, "Fetched pending migrations", logtrace.Fields{"count": len(migrations)})
+ logtrace.Debug(ctx, "Fetched pending migrations", logtrace.Fields{"count": len(migrations)})
// Iterate over each migration
for _, migration := range migrations {
- logtrace.Info(ctx, "Processing migration", logtrace.Fields{"migration_id": migration.ID})
+ logtrace.Debug(ctx, "Processing migration", logtrace.Fields{"migration_id": migration.ID})
if err := d.ProcessMigrationInBatches(ctx, migration); err != nil {
logtrace.Error(ctx, "Failed to process migration", logtrace.Fields{logtrace.FieldError: err, "migration_id": migration.ID})
@@ -579,7 +579,7 @@ func (d *MigrationMetaStore) ProcessMigrationInBatches(ctx context.Context, migr
}
if totalKeys < minKeysToMigrate {
- logtrace.Info(ctx, "Skipping migration due to insufficient keys", logtrace.Fields{"migration_id": migration.ID, "keys-count": totalKeys})
+ logtrace.Debug(ctx, "Skipping migration due to insufficient keys", logtrace.Fields{"migration_id": migration.ID, "keys-count": totalKeys})
return nil
}
@@ -630,7 +630,7 @@ func (d *MigrationMetaStore) ProcessMigrationInBatches(ctx context.Context, migr
}
}
- logtrace.Info(ctx, "Migration processed successfully", logtrace.Fields{"migration_id": migration.ID, "tota-keys-count": totalKeys, "migrated_in_current_iteration": nonMigratedKeys})
+ logtrace.Debug(ctx, "Migration processed successfully", logtrace.Fields{"migration_id": migration.ID, "tota-keys-count": totalKeys, "migrated_in_current_iteration": nonMigratedKeys})
return nil
}
@@ -683,7 +683,7 @@ func (d *MigrationMetaStore) uploadInBatches(ctx context.Context, keys []string,
continue
}
- logtrace.Info(ctx, "Successfully uploaded and deleted records for batch", logtrace.Fields{"batch": i + 1, "total_batches": batches})
+ logtrace.Debug(ctx, "Successfully uploaded and deleted records for batch", logtrace.Fields{"batch": i + 1, "total_batches": batches})
}
return lastError
@@ -823,7 +823,7 @@ func (d *MigrationMetaStore) InsertMetaMigrationData(ctx context.Context, migrat
func (d *MigrationMetaStore) batchSetMigrated(keys []string) error {
if len(keys) == 0 {
// log.P2P().Info("no keys provided for batch update (is_migrated)")
- logtrace.Info(context.Background(), "No keys provided for batch update (is_migrated)", logtrace.Fields{})
+ logtrace.Debug(context.Background(), "No keys provided for batch update (is_migrated)", logtrace.Fields{})
return nil
}
diff --git a/p2p/kademlia/store/sqlite/sqlite.go b/p2p/kademlia/store/sqlite/sqlite.go
index 71224a57..d38661d1 100644
--- a/p2p/kademlia/store/sqlite/sqlite.go
+++ b/p2p/kademlia/store/sqlite/sqlite.go
@@ -293,10 +293,10 @@ func (s *Store) startCheckpointWorker(ctx context.Context) {
select {
case <-ctx.Done():
- logtrace.Info(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{})
+ logtrace.Debug(ctx, "Stopping checkpoint worker because of context cancel", logtrace.Fields{})
return
case <-s.worker.quit:
- logtrace.Info(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{})
+ logtrace.Debug(ctx, "Stopping checkpoint worker because of quit signal", logtrace.Fields{})
return
default:
}
@@ -312,10 +312,10 @@ func (s *Store) start(ctx context.Context) {
logtrace.Error(ctx, "Failed to perform job", logtrace.Fields{logtrace.FieldError: err.Error()})
}
case <-s.worker.quit:
- logtrace.Info(ctx, "exit sqlite db worker - quit signal received", logtrace.Fields{})
+ logtrace.Debug(ctx, "exit sqlite db worker - quit signal received", logtrace.Fields{})
return
case <-ctx.Done():
- logtrace.Info(ctx, "exit sqlite db worker- ctx done signal received", logtrace.Fields{})
+ logtrace.Debug(ctx, "exit sqlite db worker- ctx done signal received", logtrace.Fields{})
return
}
}
@@ -737,11 +737,11 @@ func (s *Store) GetOwnCreatedAt(ctx context.Context) (time.Time, error) {
func (s *Store) GetLocalKeys(from time.Time, to time.Time) ([]string, error) {
var keys []string
ctx := context.Background()
- logtrace.Info(ctx, "getting all keys for SC", logtrace.Fields{})
+ logtrace.Debug(ctx, "getting all keys for SC", logtrace.Fields{})
if err := s.db.SelectContext(ctx, &keys, `SELECT key FROM data WHERE createdAt > ? and createdAt < ?`, from, to); err != nil {
return keys, fmt.Errorf("error reading all keys from database: %w", err)
}
- logtrace.Info(ctx, "got all keys for SC", logtrace.Fields{})
+ logtrace.Debug(ctx, "got all keys for SC", logtrace.Fields{})
return keys, nil
}
@@ -762,7 +762,7 @@ func stringArgsToInterface(args []string) []interface{} {
func batchDeleteRecords(db *sqlx.DB, keys []string) error {
if len(keys) == 0 {
- logtrace.Info(context.Background(), "no keys provided for batch delete", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(context.Background(), "no keys provided for batch delete", logtrace.Fields{logtrace.FieldModule: "p2p"})
return nil
}
total := int64(0)
@@ -784,7 +784,7 @@ func batchDeleteRecords(db *sqlx.DB, keys []string) error {
func batchSetMigratedRecords(db *sqlx.DB, keys []string) error {
if len(keys) == 0 {
- logtrace.Info(context.Background(), "no keys provided for batch update (migrated)", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(context.Background(), "no keys provided for batch update (migrated)", logtrace.Fields{logtrace.FieldModule: "p2p"})
return nil
}
total := int64(0)
diff --git a/p2p/p2p.go b/p2p/p2p.go
index e3d6b40a..2e416111 100644
--- a/p2p/p2p.go
+++ b/p2p/p2p.go
@@ -40,14 +40,15 @@ type P2P interface {
// p2p structure to implements interface
type p2p struct {
- store kademlia.Store // the store for kademlia network
- metaStore kademlia.MetaStore
- dht *kademlia.DHT // the kademlia network
- config *Config // the service configuration
- running bool // if the kademlia network is ready
- lumeraClient lumera.Client
- keyring keyring.Keyring // Add the keyring field
- rqstore rqstore.Store
+ store kademlia.Store // the store for kademlia network
+ metaStore kademlia.MetaStore
+ dht *kademlia.DHT // the kademlia network
+ config *Config // the service configuration
+ running bool // if the kademlia network is ready
+ lumeraClient lumera.Client
+ keyring keyring.Keyring // Add the keyring field
+ rqstore rqstore.Store
+ metricsDisabled bool
}
// Run the kademlia network
@@ -64,7 +65,7 @@ func (s *p2p) Run(ctx context.Context) error {
logtrace.Error(ctx, "failed to run kadmelia, retrying.", logtrace.Fields{logtrace.FieldModule: "p2p", logtrace.FieldError: err})
} else {
- logtrace.Info(ctx, "kadmelia started successfully", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "kadmelia started successfully", logtrace.Fields{logtrace.FieldModule: "p2p"})
return nil
}
}
@@ -74,7 +75,7 @@ func (s *p2p) Run(ctx context.Context) error {
// run the kademlia network
func (s *p2p) run(ctx context.Context) error {
- logtrace.Info(ctx, "Running kademlia network", logtrace.Fields{logtrace.FieldModule: "p2p"})
+ logtrace.Debug(ctx, "Running kademlia network", logtrace.Fields{logtrace.FieldModule: "p2p"})
// configure the kademlia dht for p2p service
if err := s.configure(ctx); err != nil {
return errors.Errorf("configure kademlia dht: %w", err)
@@ -95,7 +96,7 @@ func (s *p2p) run(ctx context.Context) error {
}
s.running = true
- logtrace.Info(ctx, "p2p service is started", logtrace.Fields{})
+ logtrace.Debug(ctx, "p2p service is started", logtrace.Fields{})
// block until context is done
<-ctx.Done()
@@ -103,7 +104,7 @@ func (s *p2p) run(ctx context.Context) error {
// stop the node for kademlia network
s.dht.Stop(ctx)
- logtrace.Info(ctx, "p2p service is stopped", logtrace.Fields{})
+ logtrace.Debug(ctx, "p2p service is stopped", logtrace.Fields{})
return nil
}
@@ -227,12 +228,13 @@ func (s *p2p) NClosestNodesWithIncludingNodeList(ctx context.Context, n int, key
func (s *p2p) configure(ctx context.Context) error {
// new the queries storage
kadOpts := &kademlia.Options{
- LumeraClient: s.lumeraClient,
- Keyring: s.keyring, // Pass the keyring
- BootstrapNodes: []*kademlia.Node{},
- IP: s.config.ListenAddress,
- Port: s.config.Port,
- ID: []byte(s.config.ID),
+ LumeraClient: s.lumeraClient,
+ Keyring: s.keyring, // Pass the keyring
+ BootstrapNodes: []*kademlia.Node{},
+ IP: s.config.ListenAddress,
+ Port: s.config.Port,
+ ID: []byte(s.config.ID),
+ MetricsDisabled: s.metricsDisabled,
}
if len(kadOpts.ID) == 0 {
@@ -251,7 +253,7 @@ func (s *p2p) configure(ctx context.Context) error {
}
// New returns a new p2p instance.
-func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr keyring.Keyring, rqstore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore) (P2P, error) {
+func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr keyring.Keyring, rqstore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore, metricsDisabled bool) (P2P, error) {
store, err := sqlite.NewStore(ctx, config.DataDir, cloud, mst)
if err != nil {
return nil, errors.Errorf("new kademlia store: %w", err)
@@ -263,12 +265,13 @@ func New(ctx context.Context, config *Config, lumeraClient lumera.Client, kr key
}
return &p2p{
- store: store,
- metaStore: meta,
- config: config,
- lumeraClient: lumeraClient,
- keyring: kr, // Store the keyring
- rqstore: rqstore,
+ store: store,
+ metaStore: meta,
+ config: config,
+ lumeraClient: lumeraClient,
+ keyring: kr, // Store the keyring
+ rqstore: rqstore,
+ metricsDisabled: metricsDisabled,
}, nil
}
diff --git a/pkg/codec/codec_mock.go b/pkg/codec/codec_mock.go
index 9c3cf864..09484cee 100644
--- a/pkg/codec/codec_mock.go
+++ b/pkg/codec/codec_mock.go
@@ -1,10 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: codec.go
-//
-// Generated by this command:
-//
-// mockgen -destination=codec_mock.go -package=codec -source=codec.go
-//
// Package codec is a generated GoMock package.
package codec
@@ -13,14 +8,13 @@ import (
context "context"
reflect "reflect"
- gomock "go.uber.org/mock/gomock"
+ gomock "github.com/golang/mock/gomock"
)
// MockCodec is a mock of Codec interface.
type MockCodec struct {
ctrl *gomock.Controller
recorder *MockCodecMockRecorder
- isgomock struct{}
}
// MockCodecMockRecorder is the mock recorder for MockCodec.
@@ -50,7 +44,7 @@ func (m *MockCodec) Decode(ctx context.Context, req DecodeRequest) (DecodeRespon
}
// Decode indicates an expected call of Decode.
-func (mr *MockCodecMockRecorder) Decode(ctx, req any) *gomock.Call {
+func (mr *MockCodecMockRecorder) Decode(ctx, req interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Decode", reflect.TypeOf((*MockCodec)(nil).Decode), ctx, req)
}
@@ -65,7 +59,7 @@ func (m *MockCodec) Encode(ctx context.Context, req EncodeRequest) (EncodeRespon
}
// Encode indicates an expected call of Encode.
-func (mr *MockCodecMockRecorder) Encode(ctx, req any) *gomock.Call {
+func (mr *MockCodecMockRecorder) Encode(ctx, req interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Encode", reflect.TypeOf((*MockCodec)(nil).Encode), ctx, req)
}
diff --git a/pkg/codec/decode.go b/pkg/codec/decode.go
index beeed5a8..348894e4 100644
--- a/pkg/codec/decode.go
+++ b/pkg/codec/decode.go
@@ -5,9 +5,13 @@ import (
"encoding/json"
"fmt"
"os"
+ "path"
"path/filepath"
+ "strings"
+ "sync"
raptorq "github.com/LumeraProtocol/rq-go"
+ "github.com/LumeraProtocol/supernode/v2/pkg/errors"
"github.com/LumeraProtocol/supernode/v2/pkg/logtrace"
)
@@ -18,64 +22,160 @@ type DecodeRequest struct {
}
type DecodeResponse struct {
- Path string
+ FilePath string
DecodeTmpDir string
}
-func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) {
+// Workspace holds paths & reverse index for prepared decoding.
+type Workspace struct {
+ ActionID string
+ SymbolsDir string // ...//
+ BlockDirs []string // index = blockID (or 0 if single block)
+ symbolToBlock map[string]int
+ mu sync.RWMutex // protects symbolToBlock reads if you expand it later
+}
+
+// PrepareDecode creates the on-disk workspace for decoding and returns:
+// - blockPaths[0] => where to write symbols for block 0 (your single-block case)
+// - Write(block, id, data) callback that writes symbol bytes directly to disk
+// - Cleanup() to remove the workspace on abort (no-op if you want to keep it)
+func (rq *raptorQ) PrepareDecode(
+ ctx context.Context,
+ actionID string,
+ layout Layout,
+) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *Workspace, err error) {
fields := logtrace.Fields{
- logtrace.FieldMethod: "Decode",
+ logtrace.FieldMethod: "PrepareDecode",
logtrace.FieldModule: "rq",
- logtrace.FieldActionID: req.ActionID,
+ logtrace.FieldActionID: actionID,
}
- logtrace.Info(ctx, "RaptorQ decode request received", fields)
- // Use deterministic processor settings (matching encoder)
- processor, err := raptorq.NewRaptorQProcessor(rqSymbolSize, rqRedundancyFactor, rqMaxMemoryMB, rqConcurrency)
- if err != nil {
+ // Create root symbols dir for this action
+ symbolsDir := filepath.Join(rq.symbolsBaseDir, actionID)
+ if err := os.MkdirAll(symbolsDir, 0o755); err != nil {
fields[logtrace.FieldError] = err.Error()
- return DecodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err)
+ logtrace.Error(ctx, "mkdir symbols base dir failed", fields)
+ return nil, nil, nil, nil, fmt.Errorf("mkdir %s: %w", symbolsDir, err)
}
- defer processor.Free()
- symbolsDir := filepath.Join(rq.symbolsBaseDir, req.ActionID)
- if err := os.MkdirAll(symbolsDir, 0o755); err != nil {
- fields[logtrace.FieldError] = err.Error()
- return DecodeResponse{}, fmt.Errorf("mkdir %s: %w", symbolsDir, err)
+ // Ensure block directories exist; build reverse index symbol -> block
+ maxBlockID := 0
+ for _, b := range layout.Blocks {
+ if int(b.BlockID) > maxBlockID {
+ maxBlockID = int(b.BlockID)
+ }
}
+ blockDirs := make([]string, maxBlockID+1)
+ s2b := make(map[string]int, 1024)
- // Build symbol->block mapping from layout and ensure block directories exist
- symbolToBlock := make(map[string]int)
- for _, blk := range req.Layout.Blocks {
- blockDir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", blk.BlockID))
- if err := os.MkdirAll(blockDir, 0o755); err != nil {
+ for _, b := range layout.Blocks {
+ dir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", b.BlockID))
+ if err := os.MkdirAll(dir, 0o755); err != nil {
fields[logtrace.FieldError] = err.Error()
- return DecodeResponse{}, fmt.Errorf("mkdir %s: %w", blockDir, err)
+ logtrace.Error(ctx, "mkdir block dir failed", fields)
+ return nil, nil, nil, nil, fmt.Errorf("mkdir %s: %w", dir, err)
}
- for _, sym := range blk.Symbols {
- symbolToBlock[sym] = blk.BlockID
+ blockDirs[b.BlockID] = dir
+ for _, sym := range b.Symbols {
+ s2b[sym] = b.BlockID
}
}
- // Write symbols to their respective block directories
- for id, data := range req.Symbols {
- blkID, ok := symbolToBlock[id]
- if !ok {
- fields[logtrace.FieldError] = "symbol not present in layout"
- return DecodeResponse{}, fmt.Errorf("symbol %s not present in layout", id)
+ ws = &Workspace{
+ ActionID: actionID,
+ SymbolsDir: symbolsDir,
+ BlockDirs: blockDirs,
+ symbolToBlock: s2b,
+ }
+
+ // Helper: atomic write (tmp file + rename) to avoid partials on crash
+ writeFileAtomic := func(path string, data []byte) error {
+ tmp := path + ".tmp"
+ if err := os.WriteFile(tmp, data, 0o644); err != nil {
+ return err
}
- blockDir := filepath.Join(symbolsDir, fmt.Sprintf("block_%d", blkID))
- symbolPath := filepath.Join(blockDir, id)
- if err := os.WriteFile(symbolPath, data, 0o644); err != nil {
- fields[logtrace.FieldError] = err.Error()
- return DecodeResponse{}, fmt.Errorf("write symbol %s: %w", id, err)
+ return os.Rename(tmp, path)
+ }
+
+ // Write callback; if block < 0, resolve via layout reverse index; default to 0.
+ Write = func(block int, symbolID string, data []byte) (string, error) {
+ // Quick cancellation check
+ select {
+ case <-ctx.Done():
+ return "", ctx.Err()
+ default:
+ }
+
+ // Resolve block if caller passes default
+ if block < 0 {
+ ws.mu.RLock()
+ bid, ok := ws.symbolToBlock[symbolID]
+ ws.mu.RUnlock()
+ if !ok {
+ // Single-block simplification: default to block 0 if layout maps are absent
+ if len(ws.BlockDirs) == 0 || ws.BlockDirs[0] == "" {
+ return "", errors.Errorf("no block directories prepared")
+ }
+ bid = 0
+ }
+ block = bid
+ }
+
+ if block < 0 || block >= len(ws.BlockDirs) || ws.BlockDirs[block] == "" {
+ return "", errors.Errorf("invalid block index %d", block)
+ }
+
+ // sanitize symbolID to a basename (prevents traversal)
+ clean := path.Clean("/" + symbolID)
+ base := strings.TrimPrefix(clean, "/")
+ if base == "" || strings.Contains(base, "/") {
+ return "", errors.Errorf("invalid symbol id %q", symbolID)
+
}
+
+ dest := filepath.Join(ws.BlockDirs[block], base)
+ if err := writeFileAtomic(dest, data); err != nil {
+ return "", fmt.Errorf("write symbol %s (block %d): %w", base, block, err)
+ }
+ return dest, nil
+ }
+
+ Cleanup = func() error {
+ // Remove the whole workspace directory (symbols + layout + output if any)
+ return os.RemoveAll(symbolsDir)
}
- logtrace.Info(ctx, "symbols written to block directories", fields)
- // ---------- write layout.json ----------
- layoutPath := filepath.Join(symbolsDir, "layout.json")
- layoutBytes, err := json.Marshal(req.Layout)
+ logtrace.Debug(ctx, "prepare decode workspace created", logtrace.Fields{
+ "symbols_dir": symbolsDir,
+ "blocks": len(blockDirs),
+ })
+ return blockDirs, Write, Cleanup, ws, nil
+}
+
+// DecodeFromPrepared performs RaptorQ decode using an already-prepared workspace.
+// It writes layout.json under the workspace, runs decode, and returns output paths.
+func (rq *raptorQ) DecodeFromPrepared(
+ ctx context.Context,
+ ws *Workspace,
+ layout Layout,
+) (DecodeResponse, error) {
+ fields := logtrace.Fields{
+ logtrace.FieldMethod: "DecodeFromPrepared",
+ logtrace.FieldModule: "rq",
+ logtrace.FieldActionID: ws.ActionID,
+ }
+ logtrace.Debug(ctx, "RaptorQ decode (prepared) requested", fields)
+
+ processor, err := raptorq.NewRaptorQProcessor(rqSymbolSize, rqRedundancyFactor, rqMaxMemoryMB, rqConcurrency)
+ if err != nil {
+ fields[logtrace.FieldError] = err.Error()
+ return DecodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err)
+ }
+ defer processor.Free()
+
+ // Write layout.json (idempotent)
+ layoutPath := filepath.Join(ws.SymbolsDir, "layout.json")
+ layoutBytes, err := json.Marshal(layout)
if err != nil {
fields[logtrace.FieldError] = err.Error()
return DecodeResponse{}, fmt.Errorf("marshal layout: %w", err)
@@ -84,16 +184,74 @@ func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeRespons
fields[logtrace.FieldError] = err.Error()
return DecodeResponse{}, fmt.Errorf("write layout file: %w", err)
}
- logtrace.Info(ctx, "layout.json written", fields)
+ logtrace.Debug(ctx, "layout.json written (prepared)", fields)
- // Decode
- outputPath := filepath.Join(symbolsDir, "output")
- if err := processor.DecodeSymbols(symbolsDir, outputPath, layoutPath); err != nil {
+ // Decode to output (idempotent-safe: overwrite on success)
+ outputPath := filepath.Join(ws.SymbolsDir, "output")
+ if err := processor.DecodeSymbols(ws.SymbolsDir, outputPath, layoutPath); err != nil {
fields[logtrace.FieldError] = err.Error()
- _ = os.Remove(outputPath)
+ _ = os.Remove(outputPath) // best-effort cleanup of partial output
return DecodeResponse{}, fmt.Errorf("raptorq decode: %w", err)
}
- logtrace.Info(ctx, "RaptorQ decoding completed successfully", fields)
- return DecodeResponse{Path: outputPath, DecodeTmpDir: symbolsDir}, nil
+ logtrace.Debug(ctx, "RaptorQ decoding completed successfully (prepared)", logtrace.Fields{
+ "output_path": outputPath,
+ })
+ return DecodeResponse{FilePath: outputPath, DecodeTmpDir: ws.SymbolsDir}, nil
+}
+
+func (rq *raptorQ) Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error) {
+ fields := logtrace.Fields{
+ logtrace.FieldMethod: "Decode",
+ logtrace.FieldModule: "rq",
+ logtrace.FieldActionID: req.ActionID,
+ }
+ logtrace.Debug(ctx, "RaptorQ decode request received", fields)
+
+ // 1) Validate layout (the check)
+ if len(req.Layout.Blocks) == 0 {
+ fields[logtrace.FieldError] = "empty layout"
+ return DecodeResponse{}, fmt.Errorf("invalid layout: no blocks present")
+ }
+ for _, blk := range req.Layout.Blocks {
+ if len(blk.Symbols) == 0 {
+ fields[logtrace.FieldError] = fmt.Sprintf("block_%d has no symbols", blk.BlockID)
+ return DecodeResponse{}, fmt.Errorf("invalid layout: block %d has no symbols", blk.BlockID)
+ }
+ }
+
+ // 2) Prepare workspace (functionality)
+ _, Write, Cleanup, ws, err := rq.PrepareDecode(ctx, req.ActionID, req.Layout)
+ if err != nil {
+ fields[logtrace.FieldError] = err.Error()
+ return DecodeResponse{}, fmt.Errorf("prepare decode workspace: %w", err)
+ }
+
+ // Ensure workspace cleanup on failure. On success, caller cleans up via returned path.
+ success := false
+ defer func() {
+ if !success && Cleanup != nil {
+ _ = Cleanup()
+ }
+ }()
+
+ // 3) Persist provided in-memory symbols via Write (functionality)
+ if len(req.Symbols) > 0 {
+ for id, data := range req.Symbols {
+ if _, werr := Write(-1, id, data); werr != nil {
+ fields[logtrace.FieldError] = werr.Error()
+ return DecodeResponse{}, werr
+ }
+ }
+ logtrace.Debug(ctx, "symbols persisted via Write()", fields)
+ }
+
+ // 4) Decode using the prepared workspace (functionality)
+ resp, derr := rq.DecodeFromPrepared(ctx, ws, req.Layout)
+ if derr != nil {
+ fields[logtrace.FieldError] = derr.Error()
+ return DecodeResponse{}, derr
+ }
+ success = true
+ return resp, nil
}
diff --git a/pkg/codec/raptorq.go b/pkg/codec/raptorq.go
index 8e0c1c6c..7c28d0c5 100644
--- a/pkg/codec/raptorq.go
+++ b/pkg/codec/raptorq.go
@@ -17,7 +17,7 @@ const (
// Limit RaptorQ processor memory usage to ~2 GiB
rqMaxMemoryMB uint64 = 2 * 1024 // MB
// Concurrency tuned for 2 GiB limit and typical 8+ core CPUs
- rqConcurrency uint64 = 6
+ rqConcurrency uint64 = 1
// Target single-block output for up to 1 GiB files with padding headroom (~1.25 GiB)
rqBlockSize int = 1280 * 1024 * 1024 // bytes (1,280 MiB)
)
@@ -48,7 +48,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons
return EncodeResponse{}, fmt.Errorf("create RaptorQ processor: %w", err)
}
defer processor.Free()
- logtrace.Info(ctx, "RaptorQ processor created", fields)
+ logtrace.Debug(ctx, "RaptorQ processor created", fields)
/* ---------- 1. run the encoder ---------- */
// Deterministic: force single block
@@ -60,7 +60,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons
os.Remove(req.Path)
return EncodeResponse{}, fmt.Errorf("mkdir %s: %w", symbolsDir, err)
}
- logtrace.Info(ctx, "RaptorQ processor encoding", fields)
+ logtrace.Debug(ctx, "RaptorQ processor encoding", fields)
resp, err := processor.EncodeFile(req.Path, symbolsDir, blockSize)
if err != nil {
@@ -74,7 +74,7 @@ func (rq *raptorQ) Encode(ctx context.Context, req EncodeRequest) (EncodeRespons
/* ---------- 2. read the layout JSON ---------- */
layoutData, err := os.ReadFile(resp.LayoutFilePath)
- logtrace.Info(ctx, "RaptorQ processor layout file", logtrace.Fields{
+ logtrace.Debug(ctx, "RaptorQ processor layout file", logtrace.Fields{
"layout-file": resp.LayoutFilePath})
if err != nil {
fields[logtrace.FieldError] = err.Error()
diff --git a/pkg/common/task/task.go b/pkg/common/task/task.go
index e4bb062a..adf173e4 100644
--- a/pkg/common/task/task.go
+++ b/pkg/common/task/task.go
@@ -92,13 +92,13 @@ func (task *task) RunAction(ctx context.Context) error {
for {
select {
case <-ctx.Done():
- logtrace.Info(ctx, "context done", logtrace.Fields{"task_id": task.ID()})
+ logtrace.Debug(ctx, "context done", logtrace.Fields{"task_id": task.ID()})
case <-task.Done():
- logtrace.Info(ctx, "task done", logtrace.Fields{"task_id": task.ID()})
+ logtrace.Debug(ctx, "task done", logtrace.Fields{"task_id": task.ID()})
cancel()
case action, ok := <-task.actionCh:
if !ok {
- logtrace.Info(ctx, "action channel closed", logtrace.Fields{"task_id": task.ID()})
+ logtrace.Debug(ctx, "action channel closed", logtrace.Fields{"task_id": task.ID()})
return group.Wait()
}
diff --git a/pkg/common/task/worker.go b/pkg/common/task/worker.go
index 280b5fb8..14043079 100644
--- a/pkg/common/task/worker.go
+++ b/pkg/common/task/worker.go
@@ -91,7 +91,7 @@ func (worker *Worker) Run(ctx context.Context) error {
logtrace.Error(ctx, "Recovered from panic in common task's worker run", logtrace.Fields{"task": currentTask.ID(), "error": r})
}
- logtrace.Info(ctx, "Task Removed", logtrace.Fields{"task": currentTask.ID()})
+ logtrace.Debug(ctx, "Task Removed", logtrace.Fields{"task": currentTask.ID()})
// Remove the task from the worker's task list
worker.RemoveTask(currentTask)
}()
diff --git a/pkg/dd/image_rareness.go b/pkg/dd/image_rareness.go
index d021da1b..74fec800 100644
--- a/pkg/dd/image_rareness.go
+++ b/pkg/dd/image_rareness.go
@@ -56,7 +56,7 @@ func (c *ddServerClientImpl) ImageRarenessScore(ctx context.Context, req Rarenes
logtrace.FieldMethod: "ImageRarenessScore",
logtrace.FieldRequest: req,
}
- logtrace.Info(ctx, "getting image rareness score", fields)
+ logtrace.Debug(ctx, "getting image rareness score", fields)
res, err := c.ddService.ImageRarenessScore(ctx, &ddService.RarenessScoreRequest{ImageFilepath: req.Filepath})
if err != nil {
@@ -65,7 +65,7 @@ func (c *ddServerClientImpl) ImageRarenessScore(ctx context.Context, req Rarenes
return ImageRarenessScoreResponse{}, fmt.Errorf("dd image rareness score error: %w", err)
}
- logtrace.Info(ctx, "successfully got image rareness score", fields)
+ logtrace.Debug(ctx, "successfully got image rareness score", fields)
return toImageRarenessScoreResponse(res), nil
}
diff --git a/pkg/dd/status.go b/pkg/dd/status.go
index fc7f4d30..812b62d6 100644
--- a/pkg/dd/status.go
+++ b/pkg/dd/status.go
@@ -26,7 +26,7 @@ func (c *ddServerClientImpl) GetStatus(ctx context.Context, req GetStatusRequest
logtrace.FieldMethod: "GetStatus",
logtrace.FieldRequest: req,
}
- logtrace.Info(ctx, "getting status", fields)
+ logtrace.Debug(ctx, "getting status", fields)
res, err := c.ddService.GetStatus(ctx, &ddService.GetStatusRequest{})
if err != nil {
@@ -35,7 +35,7 @@ func (c *ddServerClientImpl) GetStatus(ctx context.Context, req GetStatusRequest
return GetStatusResponse{}, fmt.Errorf("dd get status error: %w", err)
}
- logtrace.Info(ctx, "successfully got status", fields)
+ logtrace.Debug(ctx, "successfully got status", fields)
return GetStatusResponse{
Version: res.GetVersion(),
TaskCount: res.GetTaskCount(),
diff --git a/pkg/logtrace/datadog.go b/pkg/logtrace/datadog.go
new file mode 100644
index 00000000..95830836
--- /dev/null
+++ b/pkg/logtrace/datadog.go
@@ -0,0 +1,204 @@
+package logtrace
+
+import (
+ "bytes"
+ "compress/gzip"
+ "context"
+ "encoding/json"
+ "net/http"
+ "os"
+ "strings"
+ "sync"
+ "time"
+
+ "go.uber.org/zap/zapcore"
+)
+
+// Minimal Datadog Logs Forwarder (hard-coded config) kept separate for cleanliness.
+
+type ddCfg struct {
+ APIKey string
+ Site string // e.g. "datadoghq.com", "datadoghq.eu"
+ Service string // e.g. used as Datadog 'service'; we will set to node IP
+ Host string // optional; defaults to machine hostname
+}
+
+var (
+ ddOnce sync.Once
+ ddConfig ddCfg
+ ddClient = &http.Client{Timeout: 5 * time.Second}
+ ddQueue chan map[string]any
+ // Optional build-time injection via -ldflags
+ // -ldflags "-X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDAPIKey=... -X github.com/LumeraProtocol/supernode/v2/pkg/logtrace.DDSite=us5.datadoghq.com"
+ DDAPIKey string
+ DDSite string
+)
+
+// SetupDatadog initializes the Datadog forwarding once.
+func SetupDatadog(service string) {
+ ddOnce.Do(func() {
+ initDatadog(service)
+ })
+}
+
+// ForwardDatadog enqueues a log line for forwarding (non-blocking).
+func ForwardDatadog(level zapcore.Level, ctx context.Context, msg string, fields Fields) {
+ ddForward(level, ctx, msg, fields)
+}
+
+// SetDatadogService allows setting the Datadog service (e.g., to the node IP)
+func SetDatadogService(service string) {
+ if s := strings.TrimSpace(service); s != "" {
+ ddConfig.Service = s
+ }
+}
+
+// SetDatadogHost sets the Datadog host field (use the supernode identity)
+func SetDatadogHost(host string) {
+ if h := strings.TrimSpace(host); h != "" {
+ ddConfig.Host = h
+ }
+}
+
+func initDatadog(service string) {
+ // Base defaults (site default chosen based on earlier validation)
+ ddConfig = ddCfg{Site: "us5.datadoghq.com", Service: service, Host: ""}
+
+ // Resolve from env and build flags
+ apiKey := strings.TrimSpace(os.Getenv("DD_API_KEY"))
+ if apiKey == "" {
+ apiKey = strings.TrimSpace(DDAPIKey)
+ }
+
+ site := strings.TrimSpace(os.Getenv("DD_SITE"))
+ if site == "" {
+ site = strings.TrimSpace(DDSite)
+ if site == "" {
+ site = ddConfig.Site
+ }
+ }
+
+ ddConfig.APIKey = apiKey
+ ddConfig.Site = site
+
+ // Only enable forwarding when a real key is present
+ if ddConfig.APIKey == "" {
+ return
+ }
+
+ ddQueue = make(chan map[string]any, 256)
+ go ddLoop()
+}
+
+// ddForward enqueues a single log entry for Datadog intake.
+func ddForward(level zapcore.Level, ctx context.Context, msg string, fields Fields) {
+ if ddQueue == nil {
+ return
+ }
+
+ // Map zap level to Datadog status
+ status := "info"
+ switch level {
+ case zapcore.DebugLevel:
+ status = "debug"
+ case zapcore.InfoLevel:
+ status = "info"
+ case zapcore.WarnLevel:
+ status = "warn"
+ case zapcore.ErrorLevel:
+ status = "error"
+ case zapcore.FatalLevel:
+ status = "critical"
+ }
+
+ // Build a compact attributes map
+ attrs := map[string]any{}
+ for k, v := range fields {
+ attrs[k] = v
+ }
+ // Attach correlation ID if present
+ if cid := extractCorrelationID(ctx); cid != "unknown" {
+ attrs["correlation_id"] = cid
+ }
+
+ entry := map[string]any{
+ "message": msg,
+ "status": status,
+ "service": ddConfig.Service,
+ "host": ddConfig.Host,
+ "attributes": attrs, // avoid collisions with top-level fields
+ }
+
+ select {
+ case ddQueue <- entry:
+ default:
+ // drop if queue is full to avoid blocking critical paths
+ }
+}
+
+// ddLoop batches log entries and sends to Datadog intake.
+func ddLoop() {
+ ticker := time.NewTicker(2 * time.Second)
+ defer ticker.Stop()
+
+ batch := make([]map[string]any, 0, 32)
+ flush := func() {
+ if len(batch) == 0 {
+ return
+ }
+ // Marshal batch
+ buf := &bytes.Buffer{}
+ if err := json.NewEncoder(buf).Encode(batch); err != nil {
+ batch = batch[:0]
+ return
+ }
+ _ = ddPost(buf.Bytes())
+ batch = batch[:0]
+ }
+
+ for {
+ select {
+ case e, ok := <-ddQueue:
+ if !ok {
+ flush()
+ return
+ }
+ batch = append(batch, e)
+ if len(batch) >= 32 {
+ flush()
+ }
+ case <-ticker.C:
+ flush()
+ }
+ }
+}
+
+func ddPost(payload []byte) error {
+ url := "https://http-intake.logs." + strings.TrimSpace(ddConfig.Site) + "/api/v2/logs"
+
+ // gzip the JSON payload
+ var gzBuf bytes.Buffer
+ gw := gzip.NewWriter(&gzBuf)
+ if _, err := gw.Write(payload); err == nil {
+ _ = gw.Close()
+ } else {
+ _ = gw.Close()
+ gzBuf = *bytes.NewBuffer(payload)
+ }
+
+ req, err := http.NewRequest(http.MethodPost, url, &gzBuf)
+ if err != nil {
+ return err
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Accept", "application/json")
+ req.Header.Set("Content-Encoding", "gzip")
+ req.Header.Set("DD-API-KEY", ddConfig.APIKey)
+
+ resp, err := ddClient.Do(req)
+ if err != nil {
+ return err
+ }
+ _ = resp.Body.Close()
+ return nil
+}
diff --git a/pkg/logtrace/log.go b/pkg/logtrace/log.go
index 02b8f36e..d1bcf169 100644
--- a/pkg/logtrace/log.go
+++ b/pkg/logtrace/log.go
@@ -45,6 +45,9 @@ func Setup(serviceName string) {
if err != nil {
panic(err)
}
+
+ // Initialize Datadog forwarding (minimal integration in separate file)
+ SetupDatadog(serviceName)
}
// getLogLevel returns the log level from environment variable LOG_LEVEL
@@ -129,6 +132,9 @@ func logWithLevel(level zapcore.Level, ctx context.Context, message string, fiel
case zapcore.FatalLevel:
logger.Fatal(message, zapFields...)
}
+
+ // Forward to Datadog (non-blocking, best-effort)
+ ForwardDatadog(level, ctx, message, fields)
}
// Error logs an error message with structured fields
diff --git a/pkg/lumera/connection.go b/pkg/lumera/connection.go
index ab28702c..06c39748 100644
--- a/pkg/lumera/connection.go
+++ b/pkg/lumera/connection.go
@@ -127,7 +127,7 @@ func newGRPCConnection(ctx context.Context, rawAddr string) (Connection, error)
if firstCand.useTLS {
scheme = "tls"
}
- logtrace.Info(ctx, "gRPC connection established", logtrace.Fields{
+ logtrace.Debug(ctx, "gRPC connection established", logtrace.Fields{
"target": firstCand.target,
"scheme": scheme,
})
diff --git a/pkg/lumera/modules/auth/impl.go b/pkg/lumera/modules/auth/impl.go
index 3597d7a9..a3ad3bca 100644
--- a/pkg/lumera/modules/auth/impl.go
+++ b/pkg/lumera/modules/auth/impl.go
@@ -46,7 +46,7 @@ func (m *module) Verify(ctx context.Context, accAddress string, data, signature
return fmt.Errorf("invalid address: %w", err)
}
- logtrace.Info(ctx, "Verifying signature", logtrace.Fields{"address": addr.String()})
+ logtrace.Debug(ctx, "Verifying signature", logtrace.Fields{"address": addr.String()})
// Use Account RPC instead of AccountInfo to get the full account with public key
accResp, err := m.client.Account(ctx, &authtypes.QueryAccountRequest{
@@ -66,7 +66,7 @@ func (m *module) Verify(ctx context.Context, accAddress string, data, signature
if pubKey == nil {
return fmt.Errorf("public key is nil")
}
- logtrace.Info(ctx, "Public key retrieved", logtrace.Fields{"pubKey": pubKey.String()})
+ logtrace.Debug(ctx, "Public key retrieved", logtrace.Fields{"pubKey": pubKey.String()})
if !pubKey.VerifySignature(data, signature) {
return fmt.Errorf("invalid signature")
}
diff --git a/pkg/lumera/modules/tx/impl.go b/pkg/lumera/modules/tx/impl.go
index d342601b..2f80ac58 100644
--- a/pkg/lumera/modules/tx/impl.go
+++ b/pkg/lumera/modules/tx/impl.go
@@ -103,7 +103,7 @@ func (m *module) SimulateTransaction(ctx context.Context, msgs []types.Msg, acco
return nil, fmt.Errorf("simulation error: %w", err)
}
- logtrace.Info(ctx, fmt.Sprintf("simulation complete | gasUsed=%d", simRes.GasInfo.GasUsed), nil)
+ logtrace.Debug(ctx, fmt.Sprintf("simulation complete | gasUsed=%d", simRes.GasInfo.GasUsed), nil)
return simRes, nil
}
@@ -143,7 +143,7 @@ func (m *module) BuildAndSignTransaction(ctx context.Context, msgs []types.Msg,
return nil, fmt.Errorf("failed to sign transaction: %w", err)
}
- logtrace.Info(ctx, "transaction signed successfully", nil)
+ logtrace.Debug(ctx, "transaction signed successfully", nil)
// Encode signed transaction
txBytes, err := clientCtx.TxConfig.TxEncoder()(txBuilder.GetTx())
@@ -273,7 +273,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou
// Step 3: Calculate fee based on adjusted gas
fee := m.CalculateFee(gasToUse, config)
- logtrace.Info(ctx, fmt.Sprintf("using simulated gas and calculated fee | simulatedGas=%d adjustedGas=%d fee=%s", simulatedGasUsed, gasToUse, fee), nil)
+ logtrace.Debug(ctx, fmt.Sprintf("using simulated gas and calculated fee | simulatedGas=%d adjustedGas=%d fee=%s", simulatedGasUsed, gasToUse, fee), nil)
// Step 4: Build and sign transaction
txBytes, err := m.BuildAndSignTransaction(ctx, msgs, accountInfo, gasToUse, fee, config)
@@ -288,7 +288,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou
}
if result != nil && result.TxResponse != nil && result.TxResponse.Code == 0 && len(result.TxResponse.Events) == 0 {
- logtrace.Info(ctx, "Transaction broadcast successful, waiting for inclusion to get events...", nil)
+ logtrace.Debug(ctx, "Transaction broadcast successful, waiting for inclusion to get events...", nil)
// Retry 5 times with 1 second intervals
var txResp *sdktx.GetTxResponse
@@ -298,7 +298,7 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou
txResp, err = m.GetTransaction(ctx, result.TxResponse.TxHash)
if err == nil && txResp != nil && txResp.TxResponse != nil {
// Successfully got the transaction with events
- logtrace.Info(ctx, fmt.Sprintf("Retrieved transaction with %d events", len(txResp.TxResponse.Events)), nil)
+ logtrace.Debug(ctx, fmt.Sprintf("Retrieved transaction with %d events", len(txResp.TxResponse.Events)), nil)
result.TxResponse = txResp.TxResponse
break
}
@@ -310,7 +310,9 @@ func (m *module) ProcessTransaction(ctx context.Context, msgs []types.Msg, accou
}
if len(result.TxResponse.Events) == 0 {
- logtrace.Error(ctx, "Failed to retrieve transaction events after 5 attempts", nil)
+ logtrace.Error(ctx, "Failed to retrieve transaction events after 5 attempts", logtrace.Fields{
+ "response": result.TxResponse.String,
+ })
}
return result, nil
diff --git a/pkg/net/grpc/client/client.go b/pkg/net/grpc/client/client.go
index dc4f45de..907c5b58 100644
--- a/pkg/net/grpc/client/client.go
+++ b/pkg/net/grpc/client/client.go
@@ -112,9 +112,9 @@ var defaultBackoffConfig = backoff.Config{
func DefaultClientOptions() *ClientOptions {
return &ClientOptions{
MaxRecvMsgSize: 100 * MB,
- MaxSendMsgSize: 100 * MB, // 100MB
- InitialWindowSize: (int32)(1 * MB), // 1MB - controls initial frame size for streams
- InitialConnWindowSize: (int32)(1 * MB), // 1MB - controls initial frame size for connection
+ MaxSendMsgSize: 100 * MB, // 100MB
+ InitialWindowSize: (int32)(32 * MB), // 32MB - controls initial frame size for streams
+ InitialConnWindowSize: (int32)(128 * MB), // 128MB - controls initial frame size for connection
ConnWaitTime: defaultConnWaitTime,
KeepAliveTime: 30 * time.Minute,
diff --git a/pkg/net/grpc/client/client_test.go b/pkg/net/grpc/client/client_test.go
index 12b196fa..0b4a10e1 100644
--- a/pkg/net/grpc/client/client_test.go
+++ b/pkg/net/grpc/client/client_test.go
@@ -86,8 +86,8 @@ func TestDefaultClientOptions(t *testing.T) {
assert.NotNil(t, opts, "ClientOptions should be initialized")
assert.Equal(t, 100*MB, opts.MaxRecvMsgSize, "MaxRecvMsgSize should be 100 MB")
assert.Equal(t, 100*MB, opts.MaxSendMsgSize, "MaxSendMsgSize should be 100 MB")
- assert.Equal(t, int32(1*MB), opts.InitialWindowSize, "InitialWindowSize should be 1 MB")
- assert.Equal(t, int32(1*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 1 MB")
+ assert.Equal(t, int32(32*MB), opts.InitialWindowSize, "InitialWindowSize should be 32 MB")
+ assert.Equal(t, int32(128*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 128 MB")
assert.Equal(t, defaultConnWaitTime, opts.ConnWaitTime, "ConnWaitTime should be 10 seconds")
assert.True(t, opts.EnableRetries, "EnableRetries should be true")
assert.Equal(t, maxRetries, opts.MaxRetries, "MaxRetries should be 5")
diff --git a/pkg/net/grpc/server/server.go b/pkg/net/grpc/server/server.go
index 10a2452c..ae1a3524 100644
--- a/pkg/net/grpc/server/server.go
+++ b/pkg/net/grpc/server/server.go
@@ -94,7 +94,7 @@ func DefaultServerOptions() *ServerOptions {
return &ServerOptions{
MaxRecvMsgSize: 100 * MB,
MaxSendMsgSize: 100 * MB,
- InitialWindowSize: (int32)(1 * MB),
+ InitialWindowSize: (int32)(32 * MB),
InitialConnWindowSize: (int32)(1 * MB),
MaxConcurrentStreams: 1000,
GracefulShutdownTime: defaultGracefulShutdownTimeout,
@@ -102,13 +102,13 @@ func DefaultServerOptions() *ServerOptions {
MaxConnectionIdle: 2 * time.Hour,
MaxConnectionAge: 2 * time.Hour,
MaxConnectionAgeGrace: 1 * time.Hour,
- Time: 1 * time.Hour,
+ Time: 30 * time.Minute,
Timeout: 30 * time.Minute,
MinTime: 5 * time.Minute,
PermitWithoutStream: true,
- WriteBufferSize: 32 * KB,
- ReadBufferSize: 32 * KB,
+ WriteBufferSize: 512 * KB,
+ ReadBufferSize: 512 * KB,
}
}
@@ -203,7 +203,7 @@ func (s *Server) createListener(ctx context.Context, address string) (net.Listen
if err != nil {
return nil, errors.Errorf("failed to create listener: %w", err).WithField("address", address)
}
- logtrace.Info(ctx, "gRPC server listening", logtrace.Fields{"address": address})
+ logtrace.Debug(ctx, "gRPC server listening", logtrace.Fields{"address": address})
return lis, nil
}
@@ -256,7 +256,7 @@ func (s *Server) Serve(ctx context.Context, address string, opts *ServerOptions)
// Wait for context cancellation or error
select {
case <-ctx.Done():
- logtrace.Info(ctx, "Shutting down gRPC server", logtrace.Fields{"address": address})
+ logtrace.Debug(ctx, "Shutting down gRPC server", logtrace.Fields{"address": address})
return s.Stop(opts.GracefulShutdownTime)
case err := <-serveErr:
return err
diff --git a/pkg/net/grpc/server/server_test.go b/pkg/net/grpc/server/server_test.go
index 4c6067e0..f2308436 100644
--- a/pkg/net/grpc/server/server_test.go
+++ b/pkg/net/grpc/server/server_test.go
@@ -55,18 +55,18 @@ func TestDefaultServerOptions(t *testing.T) {
assert.NotNil(t, opts, "Server options should be initialized")
assert.Equal(t, 100*MB, opts.MaxRecvMsgSize, "MaxRecvMsgSize should be 100 MB")
assert.Equal(t, 100*MB, opts.MaxSendMsgSize, "MaxSendMsgSize should be 100 MB")
- assert.Equal(t, int32(1*MB), opts.InitialWindowSize, "InitialWindowSize should be 1 MB")
+ assert.Equal(t, int32(32*MB), opts.InitialWindowSize, "InitialWindowSize should be 32 MB")
assert.Equal(t, int32(1*MB), opts.InitialConnWindowSize, "InitialConnWindowSize should be 1 MB")
assert.Equal(t, uint32(1000), opts.MaxConcurrentStreams, "MaxConcurrentStreams should be 1000")
assert.Equal(t, defaultGracefulShutdownTimeout, opts.GracefulShutdownTime,
fmt.Sprintf("GracefulShutdownTimeout should be %v", defaultGracefulShutdownTimeout))
assert.Equal(t, uint32(0), opts.NumServerWorkers, "NumServerWorkers should be 0")
- assert.Equal(t, 32*KB, opts.WriteBufferSize, "WriteBufferSize should be 32 KB")
- assert.Equal(t, 32*KB, opts.ReadBufferSize, "ReadBufferSize should be 32 KB")
+ assert.Equal(t, 512*KB, opts.WriteBufferSize, "WriteBufferSize should be 512 KB")
+ assert.Equal(t, 512*KB, opts.ReadBufferSize, "ReadBufferSize should be 512 KB")
assert.Equal(t, 2*time.Hour, opts.MaxConnectionIdle, "MaxConnectionIdle should be 2 hours")
assert.Equal(t, 2*time.Hour, opts.MaxConnectionAge, "MaxConnectionAge should be 2 hours")
assert.Equal(t, 1*time.Hour, opts.MaxConnectionAgeGrace, "MaxConnectionAgeGrace should be 1 hour")
- assert.Equal(t, 1*time.Hour, opts.Time, "Time should be 1 hour")
+ assert.Equal(t, 30*time.Minute, opts.Time, "Time should be 30 minutes")
assert.Equal(t, 30*time.Minute, opts.Timeout, "Timeout should be 30 minutes")
assert.Equal(t, 5*time.Minute, opts.MinTime, "MinTime should be 5 minutes")
assert.True(t, opts.PermitWithoutStream, "PermitWithoutStream should be true")
diff --git a/pkg/net/interceptor.go b/pkg/net/interceptor.go
index f29d88a1..b33aadcf 100644
--- a/pkg/net/interceptor.go
+++ b/pkg/net/interceptor.go
@@ -34,7 +34,7 @@ func UnaryServerInterceptor() grpc.UnaryServerInterceptor {
logtrace.FieldMethod: info.FullMethod,
logtrace.FieldCorrelationID: correlationID,
}
- logtrace.Info(ctx, "received gRPC request", fields)
+ logtrace.Debug(ctx, "received gRPC request", fields)
resp, err := handler(ctx, req)
@@ -42,7 +42,7 @@ func UnaryServerInterceptor() grpc.UnaryServerInterceptor {
fields[logtrace.FieldError] = err.Error()
logtrace.Error(ctx, "gRPC request failed", fields)
} else {
- logtrace.Info(ctx, "gRPC request processed successfully", fields)
+ logtrace.Debug(ctx, "gRPC request processed successfully", fields)
}
return resp, err
diff --git a/pkg/p2pmetrics/metrics.go b/pkg/p2pmetrics/metrics.go
index 945225db..165f0eaa 100644
--- a/pkg/p2pmetrics/metrics.go
+++ b/pkg/p2pmetrics/metrics.go
@@ -197,6 +197,7 @@ func BuildStoreEventPayloadFromCollector(taskID string) map[string]any {
// Retrieve session
type retrieveSession struct {
+ mu sync.RWMutex
CallsByIP map[string][]Call
FoundLocal int
FoundNet int
@@ -208,33 +209,94 @@ type retrieveSession struct {
var retrieveSessions = struct{ m map[string]*retrieveSession }{m: map[string]*retrieveSession{}}
-// RegisterRetrieveBridge hooks retrieve callbacks into the retrieve collector.
+// internal event channel for retrieve metrics (per task)
+type retrieveEvent struct {
+ typ int // 0: per-node call, 1: found-local update
+ call Call
+ n int
+}
+
+var retrieveEventChans = struct {
+ mu sync.Mutex
+ m map[string]chan retrieveEvent
+}{m: map[string]chan retrieveEvent{}}
+
+// StartRetrieveCapture hooks retrieve callbacks into a buffered channel and a
+// single goroutine that serializes updates to avoid concurrent map writes.
func StartRetrieveCapture(taskID string) {
- RegisterRetrieveHook(taskID, func(c Call) {
- s := retrieveSessions.m[taskID]
- if s == nil {
- s = &retrieveSession{CallsByIP: map[string][]Call{}}
- retrieveSessions.m[taskID] = s
+ // Create or get session upfront
+ s := retrieveSessions.m[taskID]
+ if s == nil {
+ s = &retrieveSession{CallsByIP: map[string][]Call{}}
+ retrieveSessions.m[taskID] = s
+ }
+
+ // Per-task buffered channel
+ ch := make(chan retrieveEvent, 4096)
+ retrieveEventChans.mu.Lock()
+ retrieveEventChans.m[taskID] = ch
+ retrieveEventChans.mu.Unlock()
+
+ // Worker goroutine to serialize writes
+ go func(taskID string, ch <-chan retrieveEvent) {
+ for ev := range ch {
+ sess := retrieveSessions.m[taskID]
+ if sess == nil {
+ sess = &retrieveSession{CallsByIP: map[string][]Call{}}
+ retrieveSessions.m[taskID] = sess
+ }
+ switch ev.typ {
+ case 0: // per-node call
+ key := ev.call.IP
+ if key == "" {
+ key = ev.call.Address
+ }
+ sess.mu.Lock()
+ if sess.CallsByIP == nil {
+ sess.CallsByIP = map[string][]Call{}
+ }
+ sess.CallsByIP[key] = append(sess.CallsByIP[key], ev.call)
+ sess.mu.Unlock()
+ case 1: // found-local update
+ sess.FoundLocal = ev.n
+ }
}
- key := c.IP
- if key == "" {
- key = c.Address
+ }(taskID, ch)
+
+ // Register hooks that enqueue events (non-blocking)
+ RegisterRetrieveHook(taskID, func(c Call) {
+ retrieveEventChans.mu.Lock()
+ ch, ok := retrieveEventChans.m[taskID]
+ retrieveEventChans.mu.Unlock()
+ if ok {
+ select {
+ case ch <- retrieveEvent{typ: 0, call: c}:
+ default: // drop if buffer is full
+ }
}
- s.CallsByIP[key] = append(s.CallsByIP[key], c)
})
RegisterFoundLocalHook(taskID, func(n int) {
- s := retrieveSessions.m[taskID]
- if s == nil {
- s = &retrieveSession{CallsByIP: map[string][]Call{}}
- retrieveSessions.m[taskID] = s
+ retrieveEventChans.mu.Lock()
+ ch, ok := retrieveEventChans.m[taskID]
+ retrieveEventChans.mu.Unlock()
+ if ok {
+ select {
+ case ch <- retrieveEvent{typ: 1, n: n}:
+ default:
+ }
}
- s.FoundLocal = n
})
}
func StopRetrieveCapture(taskID string) {
UnregisterRetrieveHook(taskID)
UnregisterFoundLocalHook(taskID)
+ retrieveEventChans.mu.Lock()
+ if ch, ok := retrieveEventChans.m[taskID]; ok {
+ delete(retrieveEventChans.m, taskID)
+ close(ch)
+ }
+ retrieveEventChans.mu.Unlock()
}
// SetRetrieveBatchSummary sets counts for a retrieval attempt.
@@ -284,6 +346,16 @@ func BuildDownloadEventPayloadFromCollector(taskID string) map[string]any {
},
}
}
+ // Create a snapshot copy of CallsByIP to avoid concurrent map access
+ s.mu.RLock()
+ callsCopy := make(map[string][]Call, len(s.CallsByIP))
+ for k, v := range s.CallsByIP {
+ vv := make([]Call, len(v))
+ copy(vv, v)
+ callsCopy[k] = vv
+ }
+ s.mu.RUnlock()
+
return map[string]any{
"retrieve": map[string]any{
"keys": s.Keys,
@@ -292,7 +364,7 @@ func BuildDownloadEventPayloadFromCollector(taskID string) map[string]any {
"found_net": s.FoundNet,
"retrieve_ms": s.RetrieveMS,
"decode_ms": s.DecodeMS,
- "calls_by_ip": s.CallsByIP,
+ "calls_by_ip": callsCopy,
},
}
}
diff --git a/pkg/storage/queries/health_check.go b/pkg/storage/queries/health_check.go
index e76799da..96802dd8 100644
--- a/pkg/storage/queries/health_check.go
+++ b/pkg/storage/queries/health_check.go
@@ -98,10 +98,10 @@ func (s *SQLiteStore) GetHCSummaryStats(from time.Time) (hcMetrics metrics.HCMet
if err != nil {
return hcMetrics, err
}
- logtrace.Info(context.Background(), "observer evaluations retrieved", logtrace.Fields{"observer_evaluations": len(hcObserversEvaluations), "from": from})
+ logtrace.Debug(context.Background(), "observer evaluations retrieved", logtrace.Fields{"observer_evaluations": len(hcObserversEvaluations), "from": from})
observerEvaluationMetrics := processHCObserverEvaluations(hcObserversEvaluations)
- logtrace.Info(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{"observer_evaluation_metrics": len(observerEvaluationMetrics), "from": from})
+ logtrace.Debug(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{"observer_evaluation_metrics": len(observerEvaluationMetrics), "from": from})
for _, obMetrics := range observerEvaluationMetrics {
if obMetrics.ChallengesVerified >= 3 {
@@ -154,7 +154,7 @@ func (s *SQLiteStore) GetMetricsDataByHealthCheckChallengeID(ctx context.Context
if err != nil {
return healthCheckChallengeMessages, err
}
- logtrace.Info(ctx, "health-check-challenge metrics row count", logtrace.Fields{"rows": len(hcMetrics), "challenge_id": challengeID})
+ logtrace.Debug(ctx, "health-check-challenge metrics row count", logtrace.Fields{"rows": len(hcMetrics), "challenge_id": challengeID})
for _, hcMetric := range hcMetrics {
msg := types.HealthCheckMessageData{}
diff --git a/pkg/storage/queries/self_healing.go b/pkg/storage/queries/self_healing.go
index 47145a0b..61e7c63c 100644
--- a/pkg/storage/queries/self_healing.go
+++ b/pkg/storage/queries/self_healing.go
@@ -257,7 +257,7 @@ func (s *SQLiteStore) GetSHExecutionMetrics(ctx context.Context, from time.Time)
if err != nil {
return m, err
}
- logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
+ logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
challenges := make(map[string]SHChallengeMetric)
for _, row := range rows {
@@ -361,11 +361,11 @@ func (s *SQLiteStore) GetSHExecutionMetrics(ctx context.Context, from time.Time)
}
}
- logtrace.Info(ctx, "self-healing execution metrics challenges count", logtrace.Fields{"challenges": len(challenges)})
+ logtrace.Debug(ctx, "self-healing execution metrics challenges count", logtrace.Fields{"challenges": len(challenges)})
for _, challenge := range challenges {
- logtrace.Info(ctx, "self-healing challenge metric", logtrace.Fields{
+ logtrace.Debug(ctx, "self-healing challenge metric", logtrace.Fields{
"challenge-id": challenge.ChallengeID,
"is-accepted": challenge.IsAccepted,
"is-verified": challenge.IsVerified,
@@ -475,7 +475,7 @@ func (s *SQLiteStore) GetLastNSHChallenges(ctx context.Context, n int) (types.Se
if err != nil {
return challenges, err
}
- logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
+ logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
challengesInserted := 0
for _, row := range rows {
@@ -507,7 +507,7 @@ func (s *SQLiteStore) GetSHChallengeReport(ctx context.Context, challengeID stri
if err != nil {
return challenges, err
}
- logtrace.Info(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
+ logtrace.Debug(ctx, "self-healing execution metrics row count", logtrace.Fields{"rows": len(rows)})
for _, row := range rows {
if row.ChallengeID == challengeID {
diff --git a/pkg/storage/queries/storage_challenge.go b/pkg/storage/queries/storage_challenge.go
index 574e7f4f..164ed2be 100644
--- a/pkg/storage/queries/storage_challenge.go
+++ b/pkg/storage/queries/storage_challenge.go
@@ -97,7 +97,7 @@ func (s *SQLiteStore) GetMetricsDataByStorageChallengeID(ctx context.Context, ch
return storageChallengeMessages, err
}
// log.WithContext(ctx).WithField("rows", len(scMetrics)).Info("storage-challenge metrics row count")
- logtrace.Info(ctx, "storage-challenge metrics row count", logtrace.Fields{
+ logtrace.Debug(ctx, "storage-challenge metrics row count", logtrace.Fields{
"rows": len(scMetrics),
})
@@ -210,13 +210,13 @@ func (s *SQLiteStore) GetSCSummaryStats(from time.Time) (scMetrics metrics.SCMet
return scMetrics, err
}
// log.WithField("observer_evaluations", len(observersEvaluations)).Info("observer evaluations retrieved")
- logtrace.Info(context.Background(), "observer evaluations retrieved", logtrace.Fields{
+ logtrace.Debug(context.Background(), "observer evaluations retrieved", logtrace.Fields{
"observer_evaluations": len(observersEvaluations),
})
observerEvaluationMetrics := processObserverEvaluations(observersEvaluations)
// log.WithField("observer_evaluation_metrics", len(observerEvaluationMetrics)).Info("observer evaluation metrics retrieved")
- logtrace.Info(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{
+ logtrace.Debug(context.Background(), "observer evaluation metrics retrieved", logtrace.Fields{
"observer_evaluation_metrics": len(observerEvaluationMetrics),
})
diff --git a/pkg/storage/queries/task_history.go b/pkg/storage/queries/task_history.go
index 73a55ef8..29539a49 100644
--- a/pkg/storage/queries/task_history.go
+++ b/pkg/storage/queries/task_history.go
@@ -59,7 +59,7 @@ func (s *SQLiteStore) QueryTaskHistory(taskID string) (history []types.TaskHisto
err = json.Unmarshal([]byte(details), &i.Details)
if err != nil {
- logtrace.Info(context.Background(), "Detals", logtrace.Fields{"details": details})
+ logtrace.Debug(context.Background(), "Detals", logtrace.Fields{"details": details})
logtrace.Error(context.Background(), fmt.Sprintf("cannot unmarshal task history details: %s", details), logtrace.Fields{"error": err})
i.Details = nil
}
diff --git a/profile_cascade.sh b/profile_cascade.sh
new file mode 100755
index 00000000..7fe0af5e
--- /dev/null
+++ b/profile_cascade.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+# Cascade Download Heap Profiling Script
+# Samples heap every 30 seconds during cascade downloads
+
+# Configuration - modify these as needed
+PROFILE_URL="http://localhost:6062/debug/pprof/heap"
+INTERVAL=30
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+PROFILE_DIR="profiles_${TIMESTAMP}"
+
+# Allow override via command line
+if [ "$1" != "" ]; then
+ PROFILE_URL="$1"
+fi
+
+echo "=== Cascade Heap Profiling ==="
+echo "Profile URL: $PROFILE_URL"
+echo "Interval: ${INTERVAL}s"
+echo "Output Dir: $PROFILE_DIR"
+echo
+
+# Create profile directory
+mkdir -p "$PROFILE_DIR"
+cd "$PROFILE_DIR"
+
+# Test connection first
+echo "Testing connection to profiling server..."
+if ! curl -s --fail "$PROFILE_URL" > /dev/null; then
+ echo "ERROR: Cannot connect to profiling server at $PROFILE_URL"
+ echo "Make sure your supernode is running on testnet!"
+ exit 1
+fi
+
+echo "✓ Connected to profiling server"
+echo
+
+# Take baseline
+echo "Taking baseline heap snapshot..."
+curl -s -o "heap_00s.prof" "$PROFILE_URL"
+echo "✓ Baseline saved: heap_00s.prof"
+echo
+
+echo "*** NOW START YOUR CASCADE DOWNLOAD ***"
+echo "Press ENTER when download has started..."
+read
+
+echo "Starting heap profiling every ${INTERVAL}s..."
+echo "Press Ctrl+C to stop"
+echo
+
+# Counter for snapshots
+counter=1
+
+# Function to handle cleanup on exit
+cleanup() {
+ echo
+ echo "Profiling stopped. Taking final snapshot..."
+ final_elapsed=$((counter * INTERVAL))
+ curl -s -o "heap_${final_elapsed}s_final.prof" "$PROFILE_URL"
+
+ echo
+ echo "=== Profiling Complete ==="
+ echo "Location: $(pwd)"
+ echo "Files created:"
+ ls -la *.prof
+ echo
+ echo "Analysis commands:"
+ echo "# Compare baseline to final:"
+ echo "go tool pprof -http=:8080 -base heap_00s.prof heap_${final_elapsed}s_final.prof"
+ exit 0
+}
+
+# Set up signal handler
+trap cleanup INT TERM
+
+# Main profiling loop
+while true; do
+ sleep $INTERVAL
+
+ elapsed=$((counter * INTERVAL))
+ minutes=$((elapsed / 60))
+ seconds=$((elapsed % 60))
+
+ timestamp=$(date +%H:%M:%S)
+ filename="heap_${elapsed}s.prof"
+
+ echo "[$timestamp] Taking snapshot $counter (${minutes}m ${seconds}s elapsed)..."
+
+ if curl -s -o "$filename" "$PROFILE_URL"; then
+ size=$(ls -lh "$filename" | awk '{print $5}')
+ echo "✓ Saved: $filename ($size)"
+ else
+ echo "✗ Failed to get snapshot $counter"
+ fi
+
+ ((counter++))
+done
\ No newline at end of file
diff --git a/sdk/README.md b/sdk/README.md
index b0aecb20..cf2501cf 100644
--- a/sdk/README.md
+++ b/sdk/README.md
@@ -221,11 +221,13 @@ if err != nil {
// taskID can be used to track the download progress
```
+Note: If the action's cascade metadata sets `public: true`, the signature may be left empty to allow anonymous download.
+
**Parameters:**
- `ctx context.Context`: Context for the operation
- `actionID string`: ID of the action to download
- `outputDir string`: Directory where the downloaded file will be saved
-- `signature string`: Base64-encoded signature for download authorization
+- `signature string`: Base64-encoded signature for download authorization (leave empty for public cascades)
**Signature Creation for Download:**
The download signature is created by combining the action ID with the creator's address, signing it, and base64 encoding the result.
diff --git a/sdk/adapters/supernodeservice/adapter.go b/sdk/adapters/supernodeservice/adapter.go
index f9e9e6da..0d326a17 100644
--- a/sdk/adapters/supernodeservice/adapter.go
+++ b/sdk/adapters/supernodeservice/adapter.go
@@ -446,6 +446,7 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
bytesWritten int64
chunkIndex int
startedEmitted bool
+ downloadStart time.Time
)
// 3. Receive streamed responses
@@ -509,7 +510,11 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
}
}
}
- in.EventLogger(ctx, toSdkEvent(x.Event.EventType), x.Event.Message, edata)
+ // Avoid blocking Recv loop on event handling; dispatch asynchronously
+ evtType := toSdkEvent(x.Event.EventType)
+ go func(ed event.EventData, et event.EventType, msg string) {
+ in.EventLogger(ctx, et, msg, ed)
+ }(edata, evtType, x.Event.Message)
}
// 3b. Actual data chunk
@@ -520,7 +525,10 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
}
if !startedEmitted {
if in.EventLogger != nil {
- in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID})
+ // mark start to compute throughput at completion
+ downloadStart = time.Now()
+ // Emit started asynchronously to avoid blocking
+ go in.EventLogger(ctx, event.SDKDownloadStarted, "Download started", event.EventData{event.KeyActionID: in.ActionID})
}
startedEmitted = true
}
@@ -538,7 +546,25 @@ func (a *cascadeAdapter) CascadeSupernodeDownload(
a.logger.Info(ctx, "download complete", "bytes_written", bytesWritten, "path", in.OutputPath, "action_id", in.ActionID)
if in.EventLogger != nil {
- in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{event.KeyActionID: in.ActionID, event.KeyOutputPath: in.OutputPath})
+ // Compute metrics if we marked a start
+ var elapsed float64
+ var throughput float64
+ if !downloadStart.IsZero() {
+ elapsed = time.Since(downloadStart).Seconds()
+ mb := float64(bytesWritten) / (1024.0 * 1024.0)
+ if elapsed > 0 {
+ throughput = mb / elapsed
+ }
+ }
+ // Emit completion asynchronously with metrics
+ go in.EventLogger(ctx, event.SDKDownloadCompleted, "Download completed", event.EventData{
+ event.KeyActionID: in.ActionID,
+ event.KeyOutputPath: in.OutputPath,
+ event.KeyBytesTotal: bytesWritten,
+ event.KeyChunks: chunkIndex,
+ event.KeyElapsedSeconds: elapsed,
+ event.KeyThroughputMBS: throughput,
+ })
}
return &CascadeSupernodeDownloadResponse{
Success: true,
diff --git a/sdk/net/factory.go b/sdk/net/factory.go
index b9fad9fd..f3486780 100644
--- a/sdk/net/factory.go
+++ b/sdk/net/factory.go
@@ -39,9 +39,10 @@ func NewClientFactory(ctx context.Context, logger log.Logger, keyring keyring.Ke
// Tuned for 1GB max files with 4MB chunks
// Reduce in-flight memory by aligning windows and msg sizes to chunk size.
opts := client.DefaultClientOptions()
- opts.MaxRecvMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
- opts.MaxSendMsgSize = 8 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
- opts.InitialWindowSize = 4 * 1024 * 1024 // 4MB per-stream window ≈ chunk size
+ opts.MaxRecvMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
+ opts.MaxSendMsgSize = 12 * 1024 * 1024 // 8MB: supports 4MB chunks + overhead
+ // Increase per-stream window to provide headroom for first data chunk + events
+ opts.InitialWindowSize = 12 * 1024 * 1024 // 8MB per-stream window
opts.InitialConnWindowSize = 64 * 1024 * 1024 // 64MB per-connection window
return &ClientFactory{
diff --git a/sdk/task/download.go b/sdk/task/download.go
index 3e85007a..2c727ae9 100644
--- a/sdk/task/download.go
+++ b/sdk/task/download.go
@@ -4,7 +4,6 @@ import (
"context"
"fmt"
"os"
- "sort"
"time"
"github.com/LumeraProtocol/supernode/v2/sdk/adapters/lumera"
@@ -77,51 +76,6 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern
}
}
- // Optionally rank supernodes by available memory to improve success for large files
- // We keep a short timeout per status fetch to avoid delaying downloads.
- type rankedSN struct {
- sn lumera.Supernode
- availGB float64
- hasStatus bool
- }
- ranked := make([]rankedSN, 0, len(supernodes))
- for _, sn := range supernodes {
- ranked = append(ranked, rankedSN{sn: sn})
- }
-
- // Probe supernode status with short timeouts and close clients promptly
- for i := range ranked {
- sn := ranked[i].sn
- // 2s status timeout to keep this pass fast
- stx, cancel := context.WithTimeout(ctx, 2*time.Second)
- client, err := clientFactory.CreateClient(stx, sn)
- if err != nil {
- cancel()
- continue
- }
- status, err := client.GetSupernodeStatus(stx)
- _ = client.Close(stx)
- cancel()
- if err != nil {
- continue
- }
- ranked[i].hasStatus = true
- ranked[i].availGB = status.Resources.Memory.AvailableGB
- }
-
- // Sort: nodes with status first, higher available memory first
- sort.Slice(ranked, func(i, j int) bool {
- if ranked[i].hasStatus != ranked[j].hasStatus {
- return ranked[i].hasStatus && !ranked[j].hasStatus
- }
- return ranked[i].availGB > ranked[j].availGB
- })
-
- // Rebuild the supernodes list in the sorted order
- for i := range ranked {
- supernodes[i] = ranked[i].sn
- }
-
// Try supernodes sequentially, one by one (now sorted)
var lastErr error
for idx, sn := range supernodes {
@@ -146,8 +100,8 @@ func (t *CascadeDownloadTask) downloadFromSupernodes(ctx context.Context, supern
continue
}
- // Success; return to caller
- return nil
+ // Success; return to caller
+ return nil
}
if lastErr != nil {
@@ -176,15 +130,15 @@ func (t *CascadeDownloadTask) attemptDownload(
t.LogEvent(ctx, evt, msg, data)
}
- resp, err := client.Download(ctx, req)
- if err != nil {
- return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err)
- }
- if !resp.Success {
- return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message)
- }
+ resp, err := client.Download(ctx, req)
+ if err != nil {
+ return fmt.Errorf("download from %s: %w", sn.CosmosAddress, err)
+ }
+ if !resp.Success {
+ return fmt.Errorf("download rejected by %s: %s", sn.CosmosAddress, resp.Message)
+ }
- return nil
+ return nil
}
// downloadResult holds the result of a successful download attempt
diff --git a/sdk/task/task.go b/sdk/task/task.go
index e359c907..976725a0 100644
--- a/sdk/task/task.go
+++ b/sdk/task/task.go
@@ -126,7 +126,7 @@ func (t *BaseTask) isServing(parent context.Context, sn lumera.Supernode) bool {
PeerType: t.config.Account.PeerType,
}).CreateClient(ctx, sn)
if err != nil {
- logtrace.Info(ctx, "Failed to create client for supernode", logtrace.Fields{logtrace.FieldMethod: "isServing"})
+ logtrace.Debug(ctx, "Failed to create client for supernode", logtrace.Fields{logtrace.FieldMethod: "isServing"})
return false
}
defer client.Close(ctx)
diff --git a/supernode/cmd/service.go b/supernode/cmd/service.go
index d4af1269..8cd8708f 100644
--- a/supernode/cmd/service.go
+++ b/supernode/cmd/service.go
@@ -23,7 +23,7 @@ func RunServices(ctx context.Context, services ...service) error {
if err != nil {
logtrace.Error(ctx, "service stopped with an error", logtrace.Fields{"service": reflect.TypeOf(service).String(), "error": err})
} else {
- logtrace.Info(ctx, "service stopped", logtrace.Fields{"service": reflect.TypeOf(service).String()})
+ logtrace.Debug(ctx, "service stopped", logtrace.Fields{"service": reflect.TypeOf(service).String()})
}
return err
})
diff --git a/supernode/cmd/start.go b/supernode/cmd/start.go
index befaf85d..4529aec2 100644
--- a/supernode/cmd/start.go
+++ b/supernode/cmd/start.go
@@ -3,9 +3,12 @@ package cmd
import (
"context"
"fmt"
+ "net/http"
+ _ "net/http/pprof"
"os"
"os/signal"
"path/filepath"
+ "strings"
"syscall"
"github.com/LumeraProtocol/supernode/v2/p2p"
@@ -43,7 +46,7 @@ The supernode will connect to the Lumera network and begin participating in the
// Log configuration info
cfgFile := filepath.Join(baseDir, DefaultConfigFile)
- logtrace.Info(ctx, "Starting supernode with configuration", logtrace.Fields{"config_file": cfgFile, "keyring_dir": appConfig.GetKeyringDir(), "key_name": appConfig.SupernodeConfig.KeyName})
+ logtrace.Debug(ctx, "Starting supernode with configuration", logtrace.Fields{"config_file": cfgFile, "keyring_dir": appConfig.GetKeyringDir(), "key_name": appConfig.SupernodeConfig.KeyName})
// Initialize keyring
kr, err := initKeyringFromConfig(appConfig)
@@ -58,7 +61,7 @@ The supernode will connect to the Lumera network and begin participating in the
}
// Verify config matches chain registration before starting services
- logtrace.Info(ctx, "Verifying configuration against chain registration", logtrace.Fields{})
+ logtrace.Debug(ctx, "Verifying configuration against chain registration", logtrace.Fields{})
configVerifier := verifier.NewConfigVerifier(appConfig, lumeraClient, kr)
verificationResult, err := configVerifier.VerifyConfig(ctx)
if err != nil {
@@ -73,7 +76,15 @@ The supernode will connect to the Lumera network and begin participating in the
logtrace.Warn(ctx, "Config verification warnings", logtrace.Fields{"summary": verificationResult.Summary()})
}
- logtrace.Info(ctx, "Configuration verification successful", logtrace.Fields{})
+ logtrace.Debug(ctx, "Configuration verification successful", logtrace.Fields{})
+
+ // Set Datadog host to identity and service to latest IP address from chain
+ logtrace.SetDatadogHost(appConfig.SupernodeConfig.Identity)
+ if snInfo, err := lumeraClient.SuperNode().GetSupernodeWithLatestAddress(ctx, appConfig.SupernodeConfig.Identity); err == nil && snInfo != nil {
+ if ip := strings.TrimSpace(snInfo.LatestAddress); ip != "" {
+ logtrace.SetDatadogService(ip)
+ }
+ }
// Initialize RaptorQ store for Cascade processing
rqStore, err := initRQStore(ctx, appConfig)
@@ -81,8 +92,11 @@ The supernode will connect to the Lumera network and begin participating in the
logtrace.Fatal(ctx, "Failed to initialize RaptorQ store", logtrace.Fields{"error": err.Error()})
}
- // Initialize P2P service
- p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil)
+ // Manually set the disable flag at the highest level
+ disableMetrics := true
+
+ // Initialize P2P service with explicit disable flag
+ p2pService, err := initP2PService(ctx, appConfig, lumeraClient, kr, rqStore, nil, nil, disableMetrics)
if err != nil {
logtrace.Fatal(ctx, "Failed to initialize P2P service", logtrace.Fields{"error": err.Error()})
}
@@ -99,7 +113,8 @@ The supernode will connect to the Lumera network and begin participating in the
Config: common.Config{
SupernodeAccountAddress: appConfig.SupernodeConfig.Identity,
},
- RqFilesDir: appConfig.GetRaptorQFilesDir(),
+ RqFilesDir: appConfig.GetRaptorQFilesDir(),
+ MetricsDisabled: disableMetrics,
},
lumeraClient,
*p2pService,
@@ -139,6 +154,25 @@ The supernode will connect to the Lumera network and begin participating in the
return fmt.Errorf("failed to create gateway server: %w", err)
}
+ // Start profiling server on testnet only
+ isTestnet := strings.Contains(strings.ToLower(appConfig.LumeraClientConfig.ChainID), "testnet")
+
+ if isTestnet {
+ profilingAddr := "0.0.0.0:8082"
+
+ logtrace.Debug(ctx, "Starting profiling server", logtrace.Fields{
+ "address": profilingAddr,
+ "chain_id": appConfig.LumeraClientConfig.ChainID,
+ "is_testnet": isTestnet,
+ })
+
+ go func() {
+ if err := http.ListenAndServe(profilingAddr, nil); err != nil {
+ logtrace.Error(ctx, "Profiling server error", logtrace.Fields{"error": err.Error()})
+ }
+ }()
+ }
+
// Start the services
go func() {
if err := RunServices(ctx, grpcServer, cService, *p2pService, gatewayServer); err != nil {
@@ -152,7 +186,7 @@ The supernode will connect to the Lumera network and begin participating in the
// Wait for termination signal
sig := <-sigCh
- logtrace.Info(ctx, "Received signal, shutting down", logtrace.Fields{"signal": sig.String()})
+ logtrace.Debug(ctx, "Received signal, shutting down", logtrace.Fields{"signal": sig.String()})
// Graceful shutdown
if err := supernodeInstance.Stop(ctx); err != nil {
@@ -168,7 +202,7 @@ func init() {
}
// initP2PService initializes the P2P service
-func initP2PService(ctx context.Context, config *config.Config, lumeraClient lumera.Client, kr cKeyring.Keyring, rqStore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore) (*p2p.P2P, error) {
+func initP2PService(ctx context.Context, config *config.Config, lumeraClient lumera.Client, kr cKeyring.Keyring, rqStore rqstore.Store, cloud cloud.Storage, mst *sqlite.MigrationMetaStore, metricsDisabled bool) (*p2p.P2P, error) {
// Get the supernode address from the keyring
keyInfo, err := kr.Key(config.SupernodeConfig.KeyName)
if err != nil {
@@ -182,9 +216,9 @@ func initP2PService(ctx context.Context, config *config.Config, lumeraClient lum
// Create P2P config using helper function
p2pConfig := createP2PConfig(config, address.String())
- logtrace.Info(ctx, "Initializing P2P service", logtrace.Fields{"address": p2pConfig.ListenAddress, "port": p2pConfig.Port, "data_dir": p2pConfig.DataDir, "supernode_id": address.String()})
+ logtrace.Debug(ctx, "Initializing P2P service", logtrace.Fields{"address": p2pConfig.ListenAddress, "port": p2pConfig.Port, "data_dir": p2pConfig.DataDir, "supernode_id": address.String()})
- p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst)
+ p2pService, err := p2p.New(ctx, p2pConfig, lumeraClient, kr, rqStore, cloud, mst, metricsDisabled)
if err != nil {
return nil, fmt.Errorf("failed to initialize p2p service: %w", err)
}
diff --git a/supernode/cmd/supernode.go b/supernode/cmd/supernode.go
index 19a65718..c0740fd0 100644
--- a/supernode/cmd/supernode.go
+++ b/supernode/cmd/supernode.go
@@ -71,13 +71,13 @@ func (s *Supernode) Start(ctx context.Context) error {
return err
}
- logtrace.Info(ctx, "Found valid key in keyring", logtrace.Fields{
+ logtrace.Debug(ctx, "Found valid key in keyring", logtrace.Fields{
"key_name": s.config.SupernodeConfig.KeyName,
"address": address.String(),
})
// Use the P2P service that was passed in via constructor
- logtrace.Info(ctx, "Starting P2P service", logtrace.Fields{})
+ logtrace.Debug(ctx, "Starting P2P service", logtrace.Fields{})
if err := s.p2pService.Run(ctx); err != nil {
return fmt.Errorf("p2p service error: %w", err)
}
@@ -89,7 +89,7 @@ func (s *Supernode) Start(ctx context.Context) error {
func (s *Supernode) Stop(ctx context.Context) error {
// Close the Lumera client connection
if s.lumeraClient != nil {
- logtrace.Info(ctx, "Closing Lumera client", logtrace.Fields{})
+ logtrace.Debug(ctx, "Closing Lumera client", logtrace.Fields{})
if err := s.lumeraClient.Close(); err != nil {
logtrace.Error(ctx, "Error closing Lumera client", logtrace.Fields{
"error": err.Error(),
@@ -131,7 +131,7 @@ func initRQStore(ctx context.Context, config *config.Config) (rqstore.Store, err
// Create the SQLite file path
rqStoreFile := rqDir + "/rqstore.db"
- logtrace.Info(ctx, "Initializing RaptorQ store", logtrace.Fields{
+ logtrace.Debug(ctx, "Initializing RaptorQ store", logtrace.Fields{
"file_path": rqStoreFile,
})
diff --git a/supernode/node/action/server/cascade/cascade_action_server.go b/supernode/node/action/server/cascade/cascade_action_server.go
index a99fbf0a..6a38b750 100644
--- a/supernode/node/action/server/cascade/cascade_action_server.go
+++ b/supernode/node/action/server/cascade/cascade_action_server.go
@@ -74,7 +74,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
}
ctx := stream.Context()
- logtrace.Info(ctx, "client streaming request to upload cascade input data received", fields)
+ logtrace.Debug(ctx, "client streaming request to upload cascade input data received", fields)
const maxFileSize = 1 * 1024 * 1024 * 1024 // 1GB limit
@@ -140,7 +140,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
return fmt.Errorf("file size %d exceeds maximum allowed size of 1GB", totalSize)
}
- logtrace.Info(ctx, "received data chunk", logtrace.Fields{
+ logtrace.Debug(ctx, "received data chunk", logtrace.Fields{
"chunk_size": len(x.Chunk.Data),
"total_size_so_far": totalSize,
})
@@ -148,7 +148,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
case *pb.RegisterRequest_Metadata:
// Store metadata - this should be the final message
metadata = x.Metadata
- logtrace.Info(ctx, "received metadata", logtrace.Fields{
+ logtrace.Debug(ctx, "received metadata", logtrace.Fields{
"task_id": metadata.TaskId,
"action_id": metadata.ActionId,
})
@@ -162,7 +162,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
}
fields[logtrace.FieldTaskID] = metadata.GetTaskId()
fields[logtrace.FieldActionID] = metadata.GetActionId()
- logtrace.Info(ctx, "metadata received from action-sdk", fields)
+ logtrace.Debug(ctx, "metadata received from action-sdk", fields)
// Ensure all data is written to disk before calculating hash
if err := tempFile.Sync(); err != nil {
@@ -174,7 +174,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
hash := hasher.Sum(nil)
hashHex := hex.EncodeToString(hash)
fields[logtrace.FieldHashHex] = hashHex
- logtrace.Info(ctx, "final BLAKE3 hash generated", fields)
+ logtrace.Debug(ctx, "final BLAKE3 hash generated", fields)
targetPath, err := replaceTempDirWithTaskDir(metadata.GetTaskId(), tempFilePath, tempFile)
if err != nil {
@@ -213,7 +213,7 @@ func (server *ActionServer) Register(stream pb.CascadeService_RegisterServer) er
return fmt.Errorf("registration failed: %w", err)
}
- logtrace.Info(ctx, "cascade registration completed successfully", fields)
+ logtrace.Debug(ctx, "cascade registration completed successfully", fields)
return nil
}
@@ -225,25 +225,12 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
}
ctx := stream.Context()
- logtrace.Info(ctx, "download request received from client", fields)
+ logtrace.Debug(ctx, "download request received from client", fields)
task := server.factory.NewCascadeRegistrationTask()
- // Verify signature if provided
- if req.GetSignature() != "" {
- // Cast to concrete type to access helper method
- if cascadeTask, ok := task.(*cascadeService.CascadeRegistrationTask); ok {
- err := cascadeTask.VerifyDownloadSignature(ctx, req.GetActionId(), req.GetSignature())
- if err != nil {
- fields[logtrace.FieldError] = err.Error()
- logtrace.Error(ctx, "signature verification failed", fields)
- return fmt.Errorf("signature verification failed: %w", err)
- }
- } else {
- logtrace.Error(ctx, "unable to cast task to CascadeRegistrationTask", fields)
- return fmt.Errorf("unable to verify signature: task type assertion failed")
- }
- }
+ // Authorization is enforced inside the task based on metadata.Public.
+ // If public, signature is skipped; if private, signature is required.
var restoredFilePath string
var tmpDir string
@@ -254,13 +241,14 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
if err := task.CleanupDownload(ctx, tmpDir); err != nil {
logtrace.Error(ctx, "error cleaning up the tmp dir", logtrace.Fields{logtrace.FieldError: err.Error()})
} else {
- logtrace.Info(ctx, "tmp dir has been cleaned up", logtrace.Fields{"tmp_dir": tmpDir})
+ logtrace.Debug(ctx, "tmp dir has been cleaned up", logtrace.Fields{"tmp_dir": tmpDir})
}
}
}()
err := task.Download(ctx, &cascadeService.DownloadRequest{
- ActionID: req.GetActionId(),
+ ActionID: req.GetActionId(),
+ Signature: req.GetSignature(),
}, func(resp *cascadeService.DownloadResponse) error {
grpcResp := &pb.DownloadResponse{
ResponseType: &pb.DownloadResponse_Event{
@@ -290,7 +278,7 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
logtrace.Error(ctx, "no artefact file retrieved", fields)
return fmt.Errorf("no artefact to stream")
}
- logtrace.Info(ctx, "streaming artefact file in chunks", fields)
+ logtrace.Debug(ctx, "streaming artefact file in chunks", fields)
// Open the restored file and stream directly from disk to avoid buffering entire file in memory
f, err := os.Open(restoredFilePath)
@@ -308,12 +296,19 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
// Calculate optimal chunk size based on file size
chunkSize := calculateOptimalChunkSize(fi.Size())
- logtrace.Info(ctx, "calculated optimal chunk size for download", logtrace.Fields{
+ logtrace.Debug(ctx, "calculated optimal chunk size for download", logtrace.Fields{
"file_size": fi.Size(),
"chunk_size": chunkSize,
})
- // Announce: file is ready to be served to the client
+ // Pre-read first chunk to avoid any delay between SERVE_READY and first data
+ buf := make([]byte, chunkSize)
+ n, readErr := f.Read(buf)
+ if readErr != nil && readErr != io.EOF {
+ return fmt.Errorf("chunked read failed: %w", readErr)
+ }
+
+ // Announce: file is ready to be served to the client (right before first data)
if err := stream.Send(&pb.DownloadResponse{
ResponseType: &pb.DownloadResponse_Event{
Event: &pb.DownloadEvent{
@@ -326,10 +321,27 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
return err
}
- // Stream the file in fixed-size chunks
- buf := make([]byte, chunkSize)
+ // Send pre-read first chunk if available
+ if n > 0 {
+ if err := stream.Send(&pb.DownloadResponse{
+ ResponseType: &pb.DownloadResponse_Chunk{
+ Chunk: &pb.DataChunk{Data: buf[:n]},
+ },
+ }); err != nil {
+ logtrace.Error(ctx, "failed to stream first chunk", logtrace.Fields{logtrace.FieldError: err.Error()})
+ return err
+ }
+ }
+
+ // If EOF after first read, we're done
+ if readErr == io.EOF {
+ logtrace.Debug(ctx, "completed streaming all chunks", fields)
+ return nil
+ }
+
+ // Continue streaming remaining chunks
for {
- n, readErr := f.Read(buf)
+ n, readErr = f.Read(buf)
if n > 0 {
if err := stream.Send(&pb.DownloadResponse{
ResponseType: &pb.DownloadResponse_Chunk{
@@ -350,6 +362,6 @@ func (server *ActionServer) Download(req *pb.DownloadRequest, stream pb.CascadeS
// Cleanup is handled in deferred block above
- logtrace.Info(ctx, "completed streaming all chunks", fields)
+ logtrace.Debug(ctx, "completed streaming all chunks", fields)
return nil
}
diff --git a/supernode/node/action/server/cascade/cascade_action_server_test.go b/supernode/node/action/server/cascade/cascade_action_server_test.go
index eca121d8..ff2738b3 100644
--- a/supernode/node/action/server/cascade/cascade_action_server_test.go
+++ b/supernode/node/action/server/cascade/cascade_action_server_test.go
@@ -10,7 +10,7 @@ import (
cascademocks "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/mocks"
"github.com/stretchr/testify/assert"
- "go.uber.org/mock/gomock"
+ "github.com/golang/mock/gomock"
)
func TestRegister_Success(t *testing.T) {
diff --git a/supernode/node/supernode/gateway/server.go b/supernode/node/supernode/gateway/server.go
index 5440a7f4..7e17e238 100644
--- a/supernode/node/supernode/gateway/server.go
+++ b/supernode/node/supernode/gateway/server.go
@@ -86,7 +86,7 @@ func (s *Server) Run(ctx context.Context) error {
IdleTimeout: 60 * time.Second,
}
- logtrace.Info(ctx, "Starting HTTP gateway server", logtrace.Fields{
+ logtrace.Debug(ctx, "Starting HTTP gateway server", logtrace.Fields{
"address": s.ipAddress,
"port": s.port,
})
@@ -105,7 +105,7 @@ func (s *Server) Stop(ctx context.Context) error {
return nil
}
- logtrace.Info(ctx, "Shutting down HTTP gateway server", nil)
+ logtrace.Debug(ctx, "Shutting down HTTP gateway server", nil)
return s.server.Shutdown(ctx)
}
diff --git a/supernode/node/supernode/server/server.go b/supernode/node/supernode/server/server.go
index 37e8f4dd..774be094 100644
--- a/supernode/node/supernode/server/server.go
+++ b/supernode/node/supernode/server/server.go
@@ -48,8 +48,8 @@ func (server *Server) Run(ctx context.Context) error {
// Set up gRPC logging
logtrace.SetGRPCLogger()
- logtrace.Info(ctx, "Server identity configured", logtrace.Fields{logtrace.FieldModule: "server", "identity": server.config.Identity})
- logtrace.Info(ctx, "Server listening", logtrace.Fields{logtrace.FieldModule: "server", "addresses": server.config.ListenAddresses})
+ logtrace.Debug(ctx, "Server identity configured", logtrace.Fields{logtrace.FieldModule: "server", "identity": server.config.Identity})
+ logtrace.Debug(ctx, "Server listening", logtrace.Fields{logtrace.FieldModule: "server", "addresses": server.config.ListenAddresses})
group, ctx := errgroup.WithContext(ctx)
@@ -74,7 +74,7 @@ func (server *Server) Run(ctx context.Context) error {
address := addr // Create a new variable to avoid closure issues
group.Go(func() error {
- logtrace.Info(ctx, "Starting gRPC server", logtrace.Fields{logtrace.FieldModule: "server", "address": address})
+ logtrace.Debug(ctx, "Starting gRPC server", logtrace.Fields{logtrace.FieldModule: "server", "address": address})
return server.grpcServer.Serve(ctx, address, opts)
})
}
diff --git a/supernode/services/cascade/adaptors/mocks/lumera_mock.go b/supernode/services/cascade/adaptors/mocks/lumera_mock.go
index 15a6c901..29cdd48f 100644
--- a/supernode/services/cascade/adaptors/mocks/lumera_mock.go
+++ b/supernode/services/cascade/adaptors/mocks/lumera_mock.go
@@ -1,10 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: lumera.go
-//
-// Generated by this command:
-//
-// mockgen -destination=mocks/lumera_mock.go -package=cascadeadaptormocks -source=lumera.go
-//
// Package cascadeadaptormocks is a generated GoMock package.
package cascadeadaptormocks
@@ -16,14 +11,13 @@ import (
types "github.com/LumeraProtocol/lumera/x/action/v1/types"
types0 "github.com/LumeraProtocol/lumera/x/supernode/v1/types"
tx "github.com/cosmos/cosmos-sdk/types/tx"
- gomock "go.uber.org/mock/gomock"
+ gomock "github.com/golang/mock/gomock"
)
// MockLumeraClient is a mock of LumeraClient interface.
type MockLumeraClient struct {
ctrl *gomock.Controller
recorder *MockLumeraClientMockRecorder
- isgomock struct{}
}
// MockLumeraClientMockRecorder is the mock recorder for MockLumeraClient.
@@ -53,26 +47,11 @@ func (m *MockLumeraClient) FinalizeAction(ctx context.Context, actionID string,
}
// FinalizeAction indicates an expected call of FinalizeAction.
-func (mr *MockLumeraClientMockRecorder) FinalizeAction(ctx, actionID, rqids any) *gomock.Call {
+func (mr *MockLumeraClientMockRecorder) FinalizeAction(ctx, actionID, rqids interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).FinalizeAction), ctx, actionID, rqids)
}
-// SimulateFinalizeAction mocks base method.
-func (m *MockLumeraClient) SimulateFinalizeAction(ctx context.Context, actionID string, rqids []string) (*tx.SimulateResponse, error) {
- m.ctrl.T.Helper()
- ret := m.ctrl.Call(m, "SimulateFinalizeAction", ctx, actionID, rqids)
- ret0, _ := ret[0].(*tx.SimulateResponse)
- ret1, _ := ret[1].(error)
- return ret0, ret1
-}
-
-// SimulateFinalizeAction indicates an expected call of SimulateFinalizeAction.
-func (mr *MockLumeraClientMockRecorder) SimulateFinalizeAction(ctx, actionID, rqids any) *gomock.Call {
- mr.mock.ctrl.T.Helper()
- return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SimulateFinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).SimulateFinalizeAction), ctx, actionID, rqids)
-}
-
// GetAction mocks base method.
func (m *MockLumeraClient) GetAction(ctx context.Context, actionID string) (*types.QueryGetActionResponse, error) {
m.ctrl.T.Helper()
@@ -83,7 +62,7 @@ func (m *MockLumeraClient) GetAction(ctx context.Context, actionID string) (*typ
}
// GetAction indicates an expected call of GetAction.
-func (mr *MockLumeraClientMockRecorder) GetAction(ctx, actionID any) *gomock.Call {
+func (mr *MockLumeraClientMockRecorder) GetAction(ctx, actionID interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetAction", reflect.TypeOf((*MockLumeraClient)(nil).GetAction), ctx, actionID)
}
@@ -98,7 +77,7 @@ func (m *MockLumeraClient) GetActionFee(ctx context.Context, dataSize string) (*
}
// GetActionFee indicates an expected call of GetActionFee.
-func (mr *MockLumeraClientMockRecorder) GetActionFee(ctx, dataSize any) *gomock.Call {
+func (mr *MockLumeraClientMockRecorder) GetActionFee(ctx, dataSize interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetActionFee", reflect.TypeOf((*MockLumeraClient)(nil).GetActionFee), ctx, dataSize)
}
@@ -113,11 +92,26 @@ func (m *MockLumeraClient) GetTopSupernodes(ctx context.Context, height uint64)
}
// GetTopSupernodes indicates an expected call of GetTopSupernodes.
-func (mr *MockLumeraClientMockRecorder) GetTopSupernodes(ctx, height any) *gomock.Call {
+func (mr *MockLumeraClientMockRecorder) GetTopSupernodes(ctx, height interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTopSupernodes", reflect.TypeOf((*MockLumeraClient)(nil).GetTopSupernodes), ctx, height)
}
+// SimulateFinalizeAction mocks base method.
+func (m *MockLumeraClient) SimulateFinalizeAction(ctx context.Context, actionID string, rqids []string) (*tx.SimulateResponse, error) {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "SimulateFinalizeAction", ctx, actionID, rqids)
+ ret0, _ := ret[0].(*tx.SimulateResponse)
+ ret1, _ := ret[1].(error)
+ return ret0, ret1
+}
+
+// SimulateFinalizeAction indicates an expected call of SimulateFinalizeAction.
+func (mr *MockLumeraClientMockRecorder) SimulateFinalizeAction(ctx, actionID, rqids interface{}) *gomock.Call {
+ mr.mock.ctrl.T.Helper()
+ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SimulateFinalizeAction", reflect.TypeOf((*MockLumeraClient)(nil).SimulateFinalizeAction), ctx, actionID, rqids)
+}
+
// Verify mocks base method.
func (m *MockLumeraClient) Verify(ctx context.Context, creator string, file, sigBytes []byte) error {
m.ctrl.T.Helper()
@@ -127,7 +121,7 @@ func (m *MockLumeraClient) Verify(ctx context.Context, creator string, file, sig
}
// Verify indicates an expected call of Verify.
-func (mr *MockLumeraClientMockRecorder) Verify(ctx, creator, file, sigBytes any) *gomock.Call {
+func (mr *MockLumeraClientMockRecorder) Verify(ctx, creator, file, sigBytes interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Verify", reflect.TypeOf((*MockLumeraClient)(nil).Verify), ctx, creator, file, sigBytes)
}
diff --git a/supernode/services/cascade/adaptors/mocks/p2p_mock.go b/supernode/services/cascade/adaptors/mocks/p2p_mock.go
index 4f62a440..ec99d92a 100644
--- a/supernode/services/cascade/adaptors/mocks/p2p_mock.go
+++ b/supernode/services/cascade/adaptors/mocks/p2p_mock.go
@@ -1,10 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: p2p.go
-//
-// Generated by this command:
-//
-// mockgen -destination=mocks/p2p_mock.go -package=cascadeadaptormocks -source=p2p.go
-//
// Package cascadeadaptormocks is a generated GoMock package.
package cascadeadaptormocks
@@ -15,14 +10,13 @@ import (
logtrace "github.com/LumeraProtocol/supernode/v2/pkg/logtrace"
adaptors "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors"
- gomock "go.uber.org/mock/gomock"
+ gomock "github.com/golang/mock/gomock"
)
// MockP2PService is a mock of P2PService interface.
type MockP2PService struct {
ctrl *gomock.Controller
recorder *MockP2PServiceMockRecorder
- isgomock struct{}
}
// MockP2PServiceMockRecorder is the mock recorder for MockP2PService.
@@ -51,7 +45,7 @@ func (m *MockP2PService) StoreArtefacts(ctx context.Context, req adaptors.StoreA
}
// StoreArtefacts indicates an expected call of StoreArtefacts.
-func (mr *MockP2PServiceMockRecorder) StoreArtefacts(ctx, req, f any) *gomock.Call {
+func (mr *MockP2PServiceMockRecorder) StoreArtefacts(ctx, req, f interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "StoreArtefacts", reflect.TypeOf((*MockP2PService)(nil).StoreArtefacts), ctx, req, f)
}
diff --git a/supernode/services/cascade/adaptors/mocks/rq_mock.go b/supernode/services/cascade/adaptors/mocks/rq_mock.go
index 4c53c1dd..f45f2eb5 100644
--- a/supernode/services/cascade/adaptors/mocks/rq_mock.go
+++ b/supernode/services/cascade/adaptors/mocks/rq_mock.go
@@ -1,10 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: rq.go
-//
-// Generated by this command:
-//
-// mockgen -destination=mocks/rq_mock.go -package=cascadeadaptormocks -source=rq.go
-//
// Package cascadeadaptormocks is a generated GoMock package.
package cascadeadaptormocks
@@ -13,15 +8,15 @@ import (
context "context"
reflect "reflect"
+ codec "github.com/LumeraProtocol/supernode/v2/pkg/codec"
adaptors "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors"
- gomock "go.uber.org/mock/gomock"
+ gomock "github.com/golang/mock/gomock"
)
// MockCodecService is a mock of CodecService interface.
type MockCodecService struct {
ctrl *gomock.Controller
recorder *MockCodecServiceMockRecorder
- isgomock struct{}
}
// MockCodecServiceMockRecorder is the mock recorder for MockCodecService.
@@ -51,7 +46,7 @@ func (m *MockCodecService) Decode(ctx context.Context, req adaptors.DecodeReques
}
// Decode indicates an expected call of Decode.
-func (mr *MockCodecServiceMockRecorder) Decode(ctx, req any) *gomock.Call {
+func (mr *MockCodecServiceMockRecorder) Decode(ctx, req interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Decode", reflect.TypeOf((*MockCodecService)(nil).Decode), ctx, req)
}
@@ -66,7 +61,25 @@ func (m *MockCodecService) EncodeInput(ctx context.Context, taskID, path string,
}
// EncodeInput indicates an expected call of EncodeInput.
-func (mr *MockCodecServiceMockRecorder) EncodeInput(ctx, taskID, path, dataSize any) *gomock.Call {
+func (mr *MockCodecServiceMockRecorder) EncodeInput(ctx, taskID, path, dataSize interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EncodeInput", reflect.TypeOf((*MockCodecService)(nil).EncodeInput), ctx, taskID, path, dataSize)
}
+
+// PrepareDecode mocks base method.
+func (m *MockCodecService) PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) ([]string, func(int, string, []byte) (string, error), func() error, *codec.Workspace, error) {
+ m.ctrl.T.Helper()
+ ret := m.ctrl.Call(m, "PrepareDecode", ctx, actionID, layout)
+ ret0, _ := ret[0].([]string)
+ ret1, _ := ret[1].(func(int, string, []byte) (string, error))
+ ret2, _ := ret[2].(func() error)
+ ret3, _ := ret[3].(*codec.Workspace)
+ ret4, _ := ret[4].(error)
+ return ret0, ret1, ret2, ret3, ret4
+}
+
+// PrepareDecode indicates an expected call of PrepareDecode.
+func (mr *MockCodecServiceMockRecorder) PrepareDecode(ctx, actionID, layout interface{}) *gomock.Call {
+ mr.mock.ctrl.T.Helper()
+ return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PrepareDecode", reflect.TypeOf((*MockCodecService)(nil).PrepareDecode), ctx, actionID, layout)
+}
diff --git a/supernode/services/cascade/adaptors/p2p.go b/supernode/services/cascade/adaptors/p2p.go
index 116d6810..944b9b50 100644
--- a/supernode/services/cascade/adaptors/p2p.go
+++ b/supernode/services/cascade/adaptors/p2p.go
@@ -39,13 +39,14 @@ type P2PService interface {
// p2pImpl is the default implementation of the P2PService interface.
type p2pImpl struct {
- p2p p2p.Client
- rqStore rqstore.Store
+ p2p p2p.Client
+ rqStore rqstore.Store
+ metricsDisabled bool
}
// NewP2PService returns a concrete implementation of P2PService.
-func NewP2PService(client p2p.Client, store rqstore.Store) P2PService {
- return &p2pImpl{p2p: client, rqStore: store}
+func NewP2PService(client p2p.Client, store rqstore.Store, metricsDisabled bool) P2PService {
+ return &p2pImpl{p2p: client, rqStore: store, metricsDisabled: metricsDisabled}
}
type StoreArtefactsRequest struct {
@@ -56,11 +57,13 @@ type StoreArtefactsRequest struct {
}
func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest, f logtrace.Fields) error {
- logtrace.Info(ctx, "About to store artefacts (metadata + symbols)", logtrace.Fields{"taskID": req.TaskID, "id_files": len(req.IDFiles)})
+ logtrace.Info(ctx, "StoreArtefacts start", logtrace.Fields{"taskID": req.TaskID, "actionID": req.ActionID, "id_files": len(req.IDFiles), "symbols_dir": req.SymbolsDir})
- // Enable per-node store RPC capture for this task
- cm.StartStoreCapture(req.TaskID)
- defer cm.StopStoreCapture(req.TaskID)
+ // Optionally enable per-node store RPC capture for this task
+ if !p.metricsDisabled {
+ cm.StartStoreCapture(req.TaskID)
+ defer cm.StopStoreCapture(req.TaskID)
+ }
start := time.Now()
firstPassSymbols, totalSymbols, err := p.storeCascadeSymbolsAndData(ctx, req.TaskID, req.ActionID, req.SymbolsDir, req.IDFiles)
@@ -68,7 +71,17 @@ func (p *p2pImpl) StoreArtefacts(ctx context.Context, req StoreArtefactsRequest,
return errors.Wrap(err, "error storing artefacts")
}
dur := time.Since(start).Milliseconds()
- logtrace.Info(ctx, "artefacts have been stored", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total": totalSymbols, "id_files_count": len(req.IDFiles)})
+ // After first-pass, log how many symbols remain on disk
+ remaining := 0
+ if req.SymbolsDir != "" {
+ if keys, werr := walkSymbolTree(req.SymbolsDir); werr == nil {
+ remaining = len(keys)
+ }
+ }
+ logtrace.Info(ctx, "StoreArtefacts completed", logtrace.Fields{"taskID": req.TaskID, "symbols_first_pass": firstPassSymbols, "symbols_total_available": totalSymbols, "id_files_count": len(req.IDFiles), "symbols_left_on_disk": remaining, "ms": dur})
+ if remaining == 0 {
+ logtrace.Info(ctx, "Symbols directory is empty after first-pass", logtrace.Fields{"taskID": req.TaskID, "dir": req.SymbolsDir})
+ }
// Record store summary for later event emission
cm.SetStoreSummary(req.TaskID, firstPassSymbols, totalSymbols, len(req.IDFiles), dur)
return nil
@@ -98,6 +111,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
if targetCount < 1 && totalAvailable > 0 {
targetCount = 1
}
+ logtrace.Info(ctx, "Symbols discovered in directory", logtrace.Fields{"total_symbols": totalAvailable, "dir": symbolsDir})
logtrace.Info(ctx, "first-pass target coverage (symbols)", logtrace.Fields{
"total_symbols": totalAvailable,
"target_percent": storeSymbolsPercent,
@@ -113,7 +127,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
}
sort.Strings(keys) // deterministic order inside the sample
}
-
+ logtrace.Info(ctx, "first-pass selected symbols", logtrace.Fields{"selected": len(keys), "of_total": totalAvailable, "dir": symbolsDir})
logtrace.Info(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)})
/* stream in fixed-size batches -------------------------------------- */
@@ -150,6 +164,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
payload = append(payload, symBytes...)
// Send as the same data type you use for symbols
+ logtrace.Info(ctx, "RPC StoreBatch (first-batch): metadata + symbols", logtrace.Fields{"taskID": taskID, "metadata_count": len(metadataFiles), "symbols_in_batch": len(symBytes), "payload_total": len(payload)})
bctx, cancel := context.WithTimeout(ctx, storeBatchContextTimeout)
bctx = cm.WithTaskID(bctx, taskID)
err = p.p2p.StoreBatch(bctx, payload, storage.P2PDataRaptorQSymbol, taskID)
@@ -157,6 +172,7 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
if err != nil {
return totalSymbols, totalAvailable, fmt.Errorf("p2p store batch (first): %w", err)
}
+ logtrace.Info(ctx, "RPC StoreBatch completed (first-batch)", logtrace.Fields{"taskID": taskID, "symbols_stored": len(symBytes)})
totalSymbols += len(symBytes)
// No per-RPC metrics propagated from p2p
@@ -167,6 +183,14 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
return totalSymbols, totalAvailable, fmt.Errorf("delete symbols: %w", err)
}
}
+ // Log remaining symbols in directory after deletion
+ if rem, werr := walkSymbolTree(symbolsDir); werr == nil {
+ if left := len(rem); left > 0 {
+ logtrace.Info(ctx, "symbols left after first-batch", logtrace.Fields{"taskID": taskID, "left": left})
+ } else {
+ logtrace.Info(ctx, "Symbols directory is empty after first-batch", logtrace.Fields{"taskID": taskID, "dir": symbolsDir})
+ }
+ }
firstBatchProcessed = true
} else {
@@ -191,6 +215,14 @@ func (p *p2pImpl) storeCascadeSymbolsAndData(ctx context.Context, taskID, action
if err := p.rqStore.UpdateIsFirstBatchStored(actionID); err != nil {
return totalSymbols, totalAvailable, fmt.Errorf("update first-batch flag: %w", err)
}
+ // Final remaining count after first pass flagged
+ if rem, werr := walkSymbolTree(symbolsDir); werr == nil {
+ if left := len(rem); left > 0 {
+ logtrace.Info(ctx, "first-pass completed; symbols remaining on disk", logtrace.Fields{"taskID": taskID, "left": left, "dir": symbolsDir})
+ } else {
+ logtrace.Info(ctx, "first-pass completed; directory empty", logtrace.Fields{"taskID": taskID, "dir": symbolsDir})
+ }
+ }
return totalSymbols, totalAvailable, nil
@@ -225,7 +257,7 @@ func walkSymbolTree(root string) ([]string, error) {
// storeSymbolsInP2P loads a batch of symbols and stores them via P2P.
// Returns (ratePct, requests, count, error) where `count` is the number of symbols in this batch.
func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fileKeys []string) (int, error) {
- logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)})
+ logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"taskID": taskID, "count": len(fileKeys)})
symbols, err := utils.LoadSymbols(root, fileKeys)
if err != nil {
@@ -236,15 +268,21 @@ func (c *p2pImpl) storeSymbolsInP2P(ctx context.Context, taskID, root string, fi
symCtx = cm.WithTaskID(symCtx, taskID)
defer cancel()
+ logtrace.Info(ctx, "RPC StoreBatch (symbols batch)", logtrace.Fields{"taskID": taskID, "symbols_in_batch": len(symbols)})
if err := c.p2p.StoreBatch(symCtx, symbols, storage.P2PDataRaptorQSymbol, taskID); err != nil {
return len(symbols), fmt.Errorf("p2p store batch: %w", err)
}
- logtrace.Info(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)})
+ logtrace.Info(ctx, "RPC StoreBatch completed (symbols batch)", logtrace.Fields{"taskID": taskID, "symbols_stored": len(symbols)})
if err := utils.DeleteSymbols(ctx, root, fileKeys); err != nil {
return len(symbols), fmt.Errorf("delete symbols: %w", err)
}
- logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)})
+ // After deletion, log remaining count in directory
+ left := -1
+ if rem, werr := walkSymbolTree(root); werr == nil {
+ left = len(rem)
+ }
+ logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"taskID": taskID, "count": len(symbols), "symbols_left_on_disk": left})
// No per-RPC metrics propagated from p2p
return len(symbols), nil
diff --git a/supernode/services/cascade/adaptors/rq.go b/supernode/services/cascade/adaptors/rq.go
index 5f4443cf..92e89819 100644
--- a/supernode/services/cascade/adaptors/rq.go
+++ b/supernode/services/cascade/adaptors/rq.go
@@ -11,6 +11,7 @@ import (
//go:generate mockgen -destination=mocks/rq_mock.go -package=cascadeadaptormocks -source=rq.go
type CodecService interface {
EncodeInput(ctx context.Context, taskID string, path string, dataSize int) (EncodeResult, error)
+ PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *codec.Workspace, err error)
Decode(ctx context.Context, req DecodeRequest) (DecodeResponse, error)
}
@@ -70,7 +71,11 @@ func (c *codecImpl) Decode(ctx context.Context, req DecodeRequest) (DecodeRespon
}
return DecodeResponse{
- FilePath: resp.Path,
+ FilePath: resp.FilePath,
DecodeTmpDir: resp.DecodeTmpDir,
}, nil
}
+
+func (c *codecImpl) PrepareDecode(ctx context.Context, actionID string, layout codec.Layout) (blockPaths []string, Write func(block int, symbolID string, data []byte) (string, error), Cleanup func() error, ws *codec.Workspace, err error) {
+ return
+}
diff --git a/supernode/services/cascade/config.go b/supernode/services/cascade/config.go
index 7a0f1ef2..01401d41 100644
--- a/supernode/services/cascade/config.go
+++ b/supernode/services/cascade/config.go
@@ -8,6 +8,8 @@ import (
type Config struct {
common.Config `mapstructure:",squash" json:"-"`
- RaptorQServiceAddress string `mapstructure:"-" json:"-"`
- RqFilesDir string `mapstructure:"rq_files_dir" json:"rq_files_dir,omitempty"`
+ RaptorQServiceAddress string `mapstructure:"-" json:"-"`
+ RqFilesDir string `mapstructure:"rq_files_dir" json:"rq_files_dir,omitempty"`
+ // MetricsDisabled toggles upload/download metrics for cascade service
+ MetricsDisabled bool `mapstructure:"-" json:"-"`
}
diff --git a/supernode/services/cascade/download.go b/supernode/services/cascade/download.go
index b8220045..ede254a4 100644
--- a/supernode/services/cascade/download.go
+++ b/supernode/services/cascade/download.go
@@ -24,6 +24,9 @@ const targetRequiredPercent = 17
type DownloadRequest struct {
ActionID string
+ // Signature is required for private downloads. For public cascade
+ // actions (metadata.Public == true), this is ignored.
+ Signature string
}
type DownloadResponse struct {
@@ -33,6 +36,12 @@ type DownloadResponse struct {
DownloadedDir string
}
+// Download retrieves a cascade artefact by action ID.
+//
+// Authorization behavior:
+// - If the cascade metadata has Public = true, signature verification is skipped
+// and the file is downloadable by anyone.
+// - If Public = false, a valid download signature is required.
func (task *CascadeRegistrationTask) Download(
ctx context.Context,
req *DownloadRequest,
@@ -53,17 +62,19 @@ func (task *CascadeRegistrationTask) Download(
actionDetails, err := task.LumeraClient.GetAction(ctx, req.ActionID)
if err != nil {
- fields[logtrace.FieldError] = err
+ // Ensure error is logged as string for consistency
+ fields[logtrace.FieldError] = err.Error()
return task.wrapErr(ctx, "failed to get action", err, fields)
}
logtrace.Info(ctx, "Action retrieved", fields)
task.streamDownloadEvent(SupernodeEventTypeActionRetrieved, "Action retrieved", "", "", send)
if actionDetails.GetAction().State != actiontypes.ActionStateDone {
+ // Return a clearer error message when action is not yet finalized
err = errors.New("action is not in a valid state")
fields[logtrace.FieldError] = "action state is not done yet"
fields[logtrace.FieldActionState] = actionDetails.GetAction().State
- return task.wrapErr(ctx, "action not found", err, fields)
+ return task.wrapErr(ctx, "action not finalized yet", err, fields)
}
logtrace.Info(ctx, "Action state validated", fields)
@@ -75,12 +86,37 @@ func (task *CascadeRegistrationTask) Download(
logtrace.Info(ctx, "Cascade metadata decoded", fields)
task.streamDownloadEvent(SupernodeEventTypeMetadataDecoded, "Cascade metadata decoded", "", "", send)
+ // Enforce download authorization based on metadata.Public
+ // - If public: skip signature verification; allow anonymous downloads
+ // - If private: require a valid signature
+ if !metadata.Public {
+ if req.Signature == "" {
+ fields[logtrace.FieldError] = "missing signature for private download"
+ // Provide a descriptive message without a fabricated root error
+ return task.wrapErr(ctx, "private cascade requires a download signature", nil, fields)
+ }
+ if err := task.VerifyDownloadSignature(ctx, req.ActionID, req.Signature); err != nil {
+ fields[logtrace.FieldError] = err.Error()
+ return task.wrapErr(ctx, "failed to verify download signature", err, fields)
+ }
+ logtrace.Info(ctx, "Download signature verified for private cascade", fields)
+ } else {
+ logtrace.Info(ctx, "Public cascade: skipping download signature verification", fields)
+ }
+
// Notify: network retrieval phase begins
task.streamDownloadEvent(SupernodeEventTypeNetworkRetrieveStarted, "Network retrieval started", "", "", send)
+ logtrace.Info(ctx, "Starting network retrieval of artefacts", logtrace.Fields{logtrace.FieldActionID: actionDetails.GetAction().ActionID})
filePath, tmpDir, err := task.downloadArtifacts(ctx, actionDetails.GetAction().ActionID, metadata, fields, send)
if err != nil {
fields[logtrace.FieldError] = err.Error()
+ // Ensure temporary decode directory is cleaned if decode failed after being created
+ if tmpDir != "" {
+ if cerr := task.CleanupDownload(ctx, tmpDir); cerr != nil {
+ logtrace.Warn(ctx, "cleanup of tmp dir after error failed", logtrace.Fields{"tmp_dir": tmpDir, logtrace.FieldError: cerr.Error()})
+ }
+ }
return task.wrapErr(ctx, "failed to download artifacts", err, fields)
}
logtrace.Info(ctx, "File reconstructed and hash verified", fields)
@@ -101,15 +137,19 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti
)
for _, indexID := range metadata.RqIdsIds {
+ iStart := time.Now()
+ logtrace.Info(ctx, "RPC Retrieve index file", logtrace.Fields{"index_id": indexID})
indexFile, err := task.P2PClient.Retrieve(ctx, indexID)
if err != nil || len(indexFile) == 0 {
+ logtrace.Warn(ctx, "Retrieve index file failed or empty", logtrace.Fields{"index_id": indexID, logtrace.FieldError: fmt.Sprintf("%v", err)})
continue
}
+ logtrace.Info(ctx, "Retrieve index file completed", logtrace.Fields{"index_id": indexID, "bytes": len(indexFile), "ms": time.Since(iStart).Milliseconds()})
// Parse index file to get layout IDs
indexData, err := task.parseIndexFile(indexFile)
if err != nil {
- logtrace.Info(ctx, "failed to parse index file", fields)
+ logtrace.Warn(ctx, "failed to parse index file", logtrace.Fields{"index_id": indexID, logtrace.FieldError: err.Error()})
continue
}
@@ -117,14 +157,14 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti
var netMS, decMS int64
layout, netMS, decMS, layoutAttempts, err = task.retrieveLayoutFromIndex(ctx, indexData, fields)
if err != nil {
- logtrace.Info(ctx, "failed to retrieve layout from index", fields)
+ logtrace.Warn(ctx, "failed to retrieve layout from index", logtrace.Fields{"index_id": indexID, logtrace.FieldError: err.Error(), "attempts": layoutAttempts})
continue
}
layoutFetchMS = netMS
layoutDecodeMS = decMS
if len(layout.Blocks) > 0 {
- logtrace.Info(ctx, "layout file retrieved via index", fields)
+ logtrace.Info(ctx, "layout file retrieved via index", logtrace.Fields{"index_id": indexID, "attempts": layoutAttempts, "net_ms": layoutFetchMS, "decode_ms": layoutDecodeMS})
break
}
}
@@ -139,6 +179,10 @@ func (task *CascadeRegistrationTask) downloadArtifacts(ctx context.Context, acti
return task.restoreFileFromLayout(ctx, layout, metadata.DataHash, actionID, send)
}
+// restoreFileFromLayout reconstructs the original file from the provided layout
+// and a subset of retrieved symbols. The method deduplicates symbol identifiers
+// before network retrieval to avoid redundant requests and ensure the requested
+// count reflects unique symbols only.
func (task *CascadeRegistrationTask) restoreFileFromLayout(
ctx context.Context,
layout codec.Layout,
@@ -150,9 +194,16 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout(
fields := logtrace.Fields{
logtrace.FieldActionID: actionID,
}
- var allSymbols []string
+ // Deduplicate symbols across blocks to avoid redundant requests
+ symSet := make(map[string]struct{})
for _, block := range layout.Blocks {
- allSymbols = append(allSymbols, block.Symbols...)
+ for _, s := range block.Symbols {
+ symSet[s] = struct{}{}
+ }
+ }
+ allSymbols := make([]string, 0, len(symSet))
+ for s := range symSet {
+ allSymbols = append(allSymbols, s)
}
sort.Strings(allSymbols)
@@ -163,26 +214,38 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout(
if targetRequiredCount < 1 && totalSymbols > 0 {
targetRequiredCount = 1
}
- logtrace.Info(ctx, "Retrieving all symbols for decode", fields)
+ logtrace.Info(ctx, "Retrieving target-required symbols for decode", logtrace.Fields{"total_symbols": totalSymbols, "target_required_percent": targetRequiredPercent, "target_required_count": targetRequiredCount})
- // Enable retrieve metrics capture for this action
- cm.StartRetrieveCapture(actionID)
- defer cm.StopRetrieveCapture(actionID)
+ if !task.config.MetricsDisabled {
+ cm.StartRetrieveCapture(actionID)
+ defer cm.StopRetrieveCapture(actionID)
+ }
// Measure symbols batch retrieve duration
retrieveStart := time.Now()
// Tag context with metrics task ID (actionID)
ctxRetrieve := cm.WithTaskID(ctx, actionID)
- symbols, err := task.P2PClient.BatchRetrieve(ctxRetrieve, allSymbols, totalSymbols, actionID)
+ // Retrieve only a fraction of symbols (targetRequiredCount) based on redundancy
+ // The DHT will short-circuit once it finds the required number across the provided keys
+ reqCount := targetRequiredCount
+ if reqCount > totalSymbols {
+ reqCount = totalSymbols
+ }
+ rStart := time.Now()
+ logtrace.Info(ctx, "RPC BatchRetrieve symbols", logtrace.Fields{"action_id": actionID, "requested": reqCount, "total_candidates": totalSymbols})
+ symbols, err := task.P2PClient.BatchRetrieve(ctxRetrieve, allSymbols, reqCount, actionID)
if err != nil {
fields[logtrace.FieldError] = err.Error()
logtrace.Error(ctx, "batch retrieve failed", fields)
return "", "", fmt.Errorf("batch retrieve symbols: %w", err)
}
retrieveMS := time.Since(retrieveStart).Milliseconds()
+ logtrace.Info(ctx, "RPC BatchRetrieve completed", logtrace.Fields{"action_id": actionID, "received": len(symbols), "ms": time.Since(rStart).Milliseconds()})
// Measure decode duration
decodeStart := time.Now()
+ dStart := time.Now()
+ logtrace.Info(ctx, "RQ Decode start", logtrace.Fields{"action_id": actionID})
decodeInfo, err := task.RQ.Decode(ctx, adaptors.DecodeRequest{
ActionID: actionID,
Symbols: symbols,
@@ -194,17 +257,23 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout(
return "", "", fmt.Errorf("decode symbols using RaptorQ: %w", err)
}
decodeMS := time.Since(decodeStart).Milliseconds()
+ logtrace.Info(ctx, "RQ Decode completed", logtrace.Fields{"action_id": actionID, "ms": time.Since(dStart).Milliseconds(), "tmp_dir": decodeInfo.DecodeTmpDir, "file_path": decodeInfo.FilePath})
// Set minimal retrieve summary and emit event strictly from internal collector
- cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS)
- payload := cm.BuildDownloadEventPayloadFromCollector(actionID)
- if retrieve, ok := payload["retrieve"].(map[string]any); ok {
- retrieve["target_required_percent"] = targetRequiredPercent
- retrieve["target_required_count"] = targetRequiredCount
- retrieve["total_symbols"] = totalSymbols
- }
- if b, err := json.MarshalIndent(payload, "", " "); err == nil {
- task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send)
+ if !task.config.MetricsDisabled {
+ cm.SetRetrieveSummary(actionID, retrieveMS, decodeMS)
+ payload := cm.BuildDownloadEventPayloadFromCollector(actionID)
+ if retrieve, ok := payload["retrieve"].(map[string]any); ok {
+ retrieve["target_required_percent"] = targetRequiredPercent
+ retrieve["target_required_count"] = targetRequiredCount
+ retrieve["total_symbols"] = totalSymbols
+ }
+ if b, err := json.MarshalIndent(payload, "", " "); err == nil {
+ task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, string(b), "", "", send)
+ }
+ } else {
+ // Send minimal hardcoded event when metrics disabled
+ task.streamDownloadEvent(SupernodeEventTypeArtefactsDownloaded, "Download completed (metrics disabled)", "", "", send)
}
fileHash, err := crypto.HashFileIncrementally(decodeInfo.FilePath, 0)
@@ -225,6 +294,16 @@ func (task *CascadeRegistrationTask) restoreFileFromLayout(
fields[logtrace.FieldError] = err.Error()
return "", decodeInfo.DecodeTmpDir, err
}
+ // Log the state of the temporary decode directory
+ if decodeInfo.DecodeTmpDir != "" {
+ if set, derr := utils.ReadDirFilenames(decodeInfo.DecodeTmpDir); derr == nil {
+ if left := len(set); left > 0 {
+ logtrace.Info(ctx, "Decode tmp directory has files remaining", logtrace.Fields{"dir": decodeInfo.DecodeTmpDir, "left": left})
+ } else {
+ logtrace.Info(ctx, "Decode tmp directory is empty", logtrace.Fields{"dir": decodeInfo.DecodeTmpDir})
+ }
+ }
+ }
logtrace.Info(ctx, "File successfully restored and hash verified", fields)
return decodeInfo.FilePath, decodeInfo.DecodeTmpDir, nil
@@ -267,20 +346,26 @@ func (task *CascadeRegistrationTask) retrieveLayoutFromIndex(ctx context.Context
for _, layoutID := range indexData.LayoutIDs {
attempts++
t0 := time.Now()
+ logtrace.Info(ctx, "RPC Retrieve layout file", logtrace.Fields{"layout_id": layoutID, "attempt": attempts})
layoutFile, err := task.P2PClient.Retrieve(ctx, layoutID)
- totalFetchMS += time.Since(t0).Milliseconds()
+ took := time.Since(t0).Milliseconds()
+ totalFetchMS += took
if err != nil || len(layoutFile) == 0 {
+ logtrace.Warn(ctx, "Retrieve layout file failed or empty", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "ms": took, logtrace.FieldError: fmt.Sprintf("%v", err)})
continue
}
t1 := time.Now()
layout, _, _, err := parseRQMetadataFile(layoutFile)
- totalDecodeMS += time.Since(t1).Milliseconds()
+ decMS := time.Since(t1).Milliseconds()
+ totalDecodeMS += decMS
if err != nil {
+ logtrace.Warn(ctx, "Parse layout file failed", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "decode_ms": decMS, logtrace.FieldError: err.Error()})
continue
}
if len(layout.Blocks) > 0 {
+ logtrace.Info(ctx, "Layout file retrieved and parsed", logtrace.Fields{"layout_id": layoutID, "attempt": attempts, "net_ms": took, "decode_ms": decMS})
return layout, totalFetchMS, totalDecodeMS, attempts, nil
}
}
@@ -288,15 +373,20 @@ func (task *CascadeRegistrationTask) retrieveLayoutFromIndex(ctx context.Context
return codec.Layout{}, totalFetchMS, totalDecodeMS, attempts, errors.New("no valid layout found in index")
}
-func (task *CascadeRegistrationTask) CleanupDownload(ctx context.Context, actionID string) error {
- if actionID == "" {
- return errors.New("actionID is empty")
+// CleanupDownload removes the temporary directory created during decode.
+// The parameter is a directory path (not an action ID).
+func (task *CascadeRegistrationTask) CleanupDownload(ctx context.Context, dirPath string) error {
+ if dirPath == "" {
+ return errors.New("directory path is empty")
}
- // For now, we use actionID as the directory path to maintain compatibility
- if err := os.RemoveAll(actionID); err != nil {
- return errors.Errorf("failed to delete download directory: %s, :%s", actionID, err.Error())
+ // For now, we use tmp directory path as provided by decoder
+ logtrace.Info(ctx, "Cleanup download directory", logtrace.Fields{"dir": dirPath})
+ if err := os.RemoveAll(dirPath); err != nil {
+ logtrace.Warn(ctx, "Cleanup download directory failed", logtrace.Fields{"dir": dirPath, logtrace.FieldError: err.Error()})
+ return errors.Errorf("failed to delete download directory: %s, :%s", dirPath, err.Error())
}
+ logtrace.Info(ctx, "Cleanup download directory completed", logtrace.Fields{"dir": dirPath})
return nil
}
diff --git a/supernode/services/cascade/helper.go b/supernode/services/cascade/helper.go
index fb8c7ef5..e6197b41 100644
--- a/supernode/services/cascade/helper.go
+++ b/supernode/services/cascade/helper.go
@@ -14,9 +14,9 @@ import (
"github.com/LumeraProtocol/supernode/v2/pkg/errors"
"github.com/LumeraProtocol/supernode/v2/pkg/logtrace"
"github.com/LumeraProtocol/supernode/v2/pkg/lumera/modules/supernode"
+ cm "github.com/LumeraProtocol/supernode/v2/pkg/p2pmetrics"
"github.com/LumeraProtocol/supernode/v2/pkg/utils"
"github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors"
- cm "github.com/LumeraProtocol/supernode/v2/pkg/p2pmetrics"
sdk "github.com/cosmos/cosmos-sdk/types"
"github.com/golang/protobuf/proto"
@@ -175,6 +175,20 @@ func (task *CascadeRegistrationTask) generateRQIDFiles(ctx context.Context, meta
// storeArtefacts persists cascade artefacts (ID files + RaptorQ symbols) via the
// P2P adaptor. P2P does not return metrics; cascade summarizes and emits them.
func (task *CascadeRegistrationTask) storeArtefacts(ctx context.Context, actionID string, idFiles [][]byte, symbolsDir string, f logtrace.Fields) error {
+ if f == nil {
+ f = logtrace.Fields{}
+ }
+ lf := logtrace.Fields{
+ logtrace.FieldActionID: actionID,
+ logtrace.FieldTaskID: task.ID(),
+ "id_files_count": len(idFiles),
+ "symbols_dir": symbolsDir,
+ }
+ for k, v := range f {
+ lf[k] = v
+ }
+ logtrace.Info(ctx, "storeArtefacts invoked", lf)
+
return task.P2P.StoreArtefacts(ctx, adaptors.StoreArtefactsRequest{
IDFiles: idFiles,
SymbolsDir: symbolsDir,
@@ -199,17 +213,17 @@ func (task *CascadeRegistrationTask) wrapErr(ctx context.Context, msg string, er
// emitArtefactsStored builds a single-line metrics summary and emits the
// SupernodeEventTypeArtefactsStored event while logging the metrics line.
func (task *CascadeRegistrationTask) emitArtefactsStored(
- ctx context.Context,
- fields logtrace.Fields,
- _ codec.Layout,
- send func(resp *RegisterResponse) error,
+ ctx context.Context,
+ fields logtrace.Fields,
+ _ codec.Layout,
+ send func(resp *RegisterResponse) error,
) {
if fields == nil {
fields = logtrace.Fields{}
}
- // Build payload strictly from internal collector (no P2P snapshots)
- payload := cm.BuildStoreEventPayloadFromCollector(task.ID())
+ // Build payload strictly from internal collector (no P2P snapshots)
+ payload := cm.BuildStoreEventPayloadFromCollector(task.ID())
b, _ := json.MarshalIndent(payload, "", " ")
msg := string(b)
diff --git a/supernode/services/cascade/mocks/cascade_interfaces_mock.go b/supernode/services/cascade/mocks/cascade_interfaces_mock.go
index 497497c3..44d3189c 100644
--- a/supernode/services/cascade/mocks/cascade_interfaces_mock.go
+++ b/supernode/services/cascade/mocks/cascade_interfaces_mock.go
@@ -1,10 +1,5 @@
// Code generated by MockGen. DO NOT EDIT.
// Source: interfaces.go
-//
-// Generated by this command:
-//
-// mockgen -destination=mocks/cascade_interfaces_mock.go -package=cascademocks -source=interfaces.go
-//
// Package cascademocks is a generated GoMock package.
package cascademocks
@@ -14,14 +9,13 @@ import (
reflect "reflect"
cascade "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade"
- gomock "go.uber.org/mock/gomock"
+ gomock "github.com/golang/mock/gomock"
)
// MockCascadeServiceFactory is a mock of CascadeServiceFactory interface.
type MockCascadeServiceFactory struct {
ctrl *gomock.Controller
recorder *MockCascadeServiceFactoryMockRecorder
- isgomock struct{}
}
// MockCascadeServiceFactoryMockRecorder is the mock recorder for MockCascadeServiceFactory.
@@ -59,7 +53,6 @@ func (mr *MockCascadeServiceFactoryMockRecorder) NewCascadeRegistrationTask() *g
type MockCascadeTask struct {
ctrl *gomock.Controller
recorder *MockCascadeTaskMockRecorder
- isgomock struct{}
}
// MockCascadeTaskMockRecorder is the mock recorder for MockCascadeTask.
@@ -88,7 +81,7 @@ func (m *MockCascadeTask) CleanupDownload(ctx context.Context, actionID string)
}
// CleanupDownload indicates an expected call of CleanupDownload.
-func (mr *MockCascadeTaskMockRecorder) CleanupDownload(ctx, actionID any) *gomock.Call {
+func (mr *MockCascadeTaskMockRecorder) CleanupDownload(ctx, actionID interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CleanupDownload", reflect.TypeOf((*MockCascadeTask)(nil).CleanupDownload), ctx, actionID)
}
@@ -102,7 +95,7 @@ func (m *MockCascadeTask) Download(ctx context.Context, req *cascade.DownloadReq
}
// Download indicates an expected call of Download.
-func (mr *MockCascadeTaskMockRecorder) Download(ctx, req, send any) *gomock.Call {
+func (mr *MockCascadeTaskMockRecorder) Download(ctx, req, send interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Download", reflect.TypeOf((*MockCascadeTask)(nil).Download), ctx, req, send)
}
@@ -116,7 +109,7 @@ func (m *MockCascadeTask) Register(ctx context.Context, req *cascade.RegisterReq
}
// Register indicates an expected call of Register.
-func (mr *MockCascadeTaskMockRecorder) Register(ctx, req, send any) *gomock.Call {
+func (mr *MockCascadeTaskMockRecorder) Register(ctx, req, send interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Register", reflect.TypeOf((*MockCascadeTask)(nil).Register), ctx, req, send)
}
diff --git a/supernode/services/cascade/register.go b/supernode/services/cascade/register.go
index dd6e1e77..8d6cfd07 100644
--- a/supernode/services/cascade/register.go
+++ b/supernode/services/cascade/register.go
@@ -64,7 +64,7 @@ func (task *CascadeRegistrationTask) Register(
if remErr := os.RemoveAll(req.FilePath); remErr != nil {
logtrace.Warn(ctx, "Failed to remove uploaded file", fields)
} else {
- logtrace.Info(ctx, "Uploaded file cleaned up", fields)
+ logtrace.Debug(ctx, "Uploaded file cleaned up", fields)
}
}
}()
@@ -147,7 +147,7 @@ func (task *CascadeRegistrationTask) Register(
/* 10. Simulate finalize to avoid storing artefacts if it would fail ---------- */
if _, err := task.LumeraClient.SimulateFinalizeAction(ctx, action.ActionID, rqidResp.RQIDs); err != nil {
fields[logtrace.FieldError] = err.Error()
- logtrace.Info(ctx, "Finalize simulation failed", fields)
+ logtrace.Debug(ctx, "Finalize simulation failed", fields)
// Emit explicit simulation failure event for client visibility
task.streamEvent(SupernodeEventTypeFinalizeSimulationFailed, "Finalize simulation failed", "", send)
return task.wrapErr(ctx, "finalize action simulation failed", err, fields)
@@ -157,18 +157,20 @@ func (task *CascadeRegistrationTask) Register(
task.streamEvent(SupernodeEventTypeFinalizeSimulated, "Finalize simulation passed", "", send)
/* 11. Persist artefacts -------------------------------------------------------- */
- // Persist artefacts to the P2P network. P2P interfaces return error only;
- // metrics are summarized at the cascade layer and emitted via event.
+ // Persist artefacts to the P2P network. P2P interfaces return error only;
+ // metrics are summarized at the cascade layer and emitted via event.
if err := task.storeArtefacts(ctx, action.ActionID, rqidResp.RedundantMetadataFiles, encResp.SymbolsDir, fields); err != nil {
return err
}
- // Emit compact analytics payload from centralized metrics collector
- task.emitArtefactsStored(ctx, fields, encResp.Metadata, send)
+ // Emit compact analytics payload from centralized metrics collector (optional)
+ if !task.config.MetricsDisabled {
+ task.emitArtefactsStored(ctx, fields, encResp.Metadata, send)
+ }
resp, err := task.LumeraClient.FinalizeAction(ctx, action.ActionID, rqidResp.RQIDs)
if err != nil {
fields[logtrace.FieldError] = err.Error()
- logtrace.Info(ctx, "Finalize action error", fields)
+ logtrace.Debug(ctx, "Finalize action error", fields)
return task.wrapErr(ctx, "failed to finalize action", err, fields)
}
txHash := resp.TxResponse.TxHash
diff --git a/supernode/services/cascade/register_test.go b/supernode/services/cascade/register_test.go
index c73b96b7..6f56791a 100644
--- a/supernode/services/cascade/register_test.go
+++ b/supernode/services/cascade/register_test.go
@@ -21,8 +21,8 @@ import (
"github.com/cosmos/gogoproto/proto"
"lukechampine.com/blake3"
+ "github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
- "go.uber.org/mock/gomock"
)
func TestCascadeRegistrationTask_Register(t *testing.T) {
@@ -104,10 +104,10 @@ func TestCascadeRegistrationTask_Register(t *testing.T) {
Metadata: codecpkg.Layout{Blocks: []codecpkg.Block{{BlockID: 1, Hash: "abc"}}},
}, nil)
- // 8. Store artefacts (no metrics returned; recorded centrally)
- p2p.EXPECT().
- StoreArtefacts(gomock.Any(), gomock.Any(), gomock.Any()).
- Return(nil)
+ // 8. Store artefacts (no metrics returned; recorded centrally)
+ p2p.EXPECT().
+ StoreArtefacts(gomock.Any(), gomock.Any(), gomock.Any()).
+ Return(nil)
},
expectedError: "",
expectedEvents: 12,
diff --git a/supernode/services/cascade/service.go b/supernode/services/cascade/service.go
index a1d9898b..b5b2870a 100644
--- a/supernode/services/cascade/service.go
+++ b/supernode/services/cascade/service.go
@@ -56,11 +56,11 @@ func (service *CascadeService) GetRunningTasks() []string {
// NewCascadeService returns a new CascadeService instance
func NewCascadeService(config *Config, lumera lumera.Client, p2pClient p2p.Client, codec codec.Codec, rqstore rqstore.Store) *CascadeService {
- return &CascadeService{
- config: config,
- SuperNodeService: base.NewSuperNodeService(p2pClient),
- LumeraClient: adaptors.NewLumeraClient(lumera),
- P2P: adaptors.NewP2PService(p2pClient, rqstore),
- RQ: adaptors.NewCodecService(codec),
- }
+ return &CascadeService{
+ config: config,
+ SuperNodeService: base.NewSuperNodeService(p2pClient),
+ LumeraClient: adaptors.NewLumeraClient(lumera),
+ P2P: adaptors.NewP2PService(p2pClient, rqstore, config.MetricsDisabled),
+ RQ: adaptors.NewCodecService(codec),
+ }
}
diff --git a/supernode/services/cascade/service_test.go b/supernode/services/cascade/service_test.go
index eaa7bf7f..bc2998ad 100644
--- a/supernode/services/cascade/service_test.go
+++ b/supernode/services/cascade/service_test.go
@@ -8,8 +8,8 @@ import (
"github.com/LumeraProtocol/supernode/v2/supernode/services/cascade"
cascadeadaptormocks "github.com/LumeraProtocol/supernode/v2/supernode/services/cascade/adaptors/mocks"
"github.com/LumeraProtocol/supernode/v2/supernode/services/common"
+ "github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
- "go.uber.org/mock/gomock"
)
func TestNewCascadeService(t *testing.T) {
diff --git a/supernode/services/common/base/supernode_service.go b/supernode/services/common/base/supernode_service.go
index 1d41715b..424556b0 100644
--- a/supernode/services/common/base/supernode_service.go
+++ b/supernode/services/common/base/supernode_service.go
@@ -52,7 +52,7 @@ func (service *SuperNodeService) RunHelper(ctx context.Context, nodeID string, p
service.Worker = task.NewWorker()
logtrace.Error(ctx, "Service run failed, retrying", logtrace.Fields{logtrace.FieldModule: "supernode", logtrace.FieldError: err.Error()})
} else {
- logtrace.Info(ctx, "Service run completed successfully - closing sn services", logtrace.Fields{logtrace.FieldModule: "supernode"})
+ logtrace.Debug(ctx, "Service run completed successfully - closing sn services", logtrace.Fields{logtrace.FieldModule: "supernode"})
return nil
}
}
diff --git a/supernode/services/common/base/supernode_task.go b/supernode/services/common/base/supernode_task.go
index 937e6013..2908558d 100644
--- a/supernode/services/common/base/supernode_task.go
+++ b/supernode/services/common/base/supernode_task.go
@@ -25,7 +25,7 @@ type SuperNodeTask struct {
func (task *SuperNodeTask) RunHelper(ctx context.Context, clean TaskCleanerFunc) error {
ctx = task.context(ctx)
logtrace.Debug(ctx, "Start task", logtrace.Fields{})
- defer logtrace.Info(ctx, "Task canceled", logtrace.Fields{})
+ defer logtrace.Debug(ctx, "Task canceled", logtrace.Fields{})
defer task.Cancel()
task.SetStatusNotifyFunc(func(status *state.Status) {
diff --git a/supernode/services/common/storage/handler.go b/supernode/services/common/storage/handler.go
index 210dab0f..3967fe2d 100644
--- a/supernode/services/common/storage/handler.go
+++ b/supernode/services/common/storage/handler.go
@@ -74,7 +74,7 @@ func (h *StorageHandler) StoreBatch(ctx context.Context, list [][]byte, typ int)
taskID = fmt.Sprintf("%v", val)
}
- logtrace.Info(ctx, "task_id in storeList", logtrace.Fields{logtrace.FieldTaskID: taskID})
+ logtrace.Debug(ctx, "task_id in storeList", logtrace.Fields{logtrace.FieldTaskID: taskID})
// Add taskID to context for metrics
ctx = p2pmetrics.WithTaskID(ctx, taskID)
return h.P2PClient.StoreBatch(ctx, list, typ, taskID)
@@ -110,7 +110,7 @@ func (h *StorageHandler) StoreRaptorQSymbolsIntoP2P(ctx context.Context, taskID,
sort.Strings(keys) // deterministic order inside the sample
}
- logtrace.Info(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)})
+ logtrace.Debug(ctx, "storing RaptorQ symbols", logtrace.Fields{"count": len(keys)})
/* stream in fixed-size batches -------------------------------------- */
for start := 0; start < len(keys); {
@@ -128,7 +128,7 @@ func (h *StorageHandler) StoreRaptorQSymbolsIntoP2P(ctx context.Context, taskID,
return fmt.Errorf("update first-batch flag: %w", err)
}
- logtrace.Info(ctx, "finished storing RaptorQ symbols", logtrace.Fields{"curr-time": time.Now().UTC(), "count": len(keys)})
+ logtrace.Debug(ctx, "finished storing RaptorQ symbols", logtrace.Fields{"curr-time": time.Now().UTC(), "count": len(keys)})
return nil
}
@@ -160,7 +160,7 @@ func walkSymbolTree(root string) ([]string, error) {
}
func (h *StorageHandler) storeSymbolsInP2P(ctx context.Context, taskID, root string, fileKeys []string) error {
- logtrace.Info(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)})
+ logtrace.Debug(ctx, "loading batch symbols", logtrace.Fields{"count": len(fileKeys)})
symbols, err := utils.LoadSymbols(root, fileKeys)
if err != nil {
@@ -173,13 +173,13 @@ func (h *StorageHandler) storeSymbolsInP2P(ctx context.Context, taskID, root str
return fmt.Errorf("p2p store batch: %w", err)
}
- logtrace.Info(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)})
+ logtrace.Debug(ctx, "stored batch symbols", logtrace.Fields{"count": len(symbols)})
if err := utils.DeleteSymbols(ctx, root, fileKeys); err != nil {
return fmt.Errorf("delete symbols: %w", err)
}
- logtrace.Info(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)})
+ logtrace.Debug(ctx, "deleted batch symbols", logtrace.Fields{"count": len(symbols)})
return nil
}
diff --git a/supernode/services/common/supernode/service.go b/supernode/services/common/supernode/service.go
index 13d5efe4..81bac456 100644
--- a/supernode/services/common/supernode/service.go
+++ b/supernode/services/common/supernode/service.go
@@ -54,7 +54,7 @@ func (s *SupernodeStatusService) GetStatus(ctx context.Context, includeP2PMetric
logtrace.FieldMethod: "GetStatus",
logtrace.FieldModule: "SupernodeStatusService",
}
- logtrace.Info(ctx, "status request received", fields)
+ logtrace.Debug(ctx, "status request received", fields)
var resp StatusResponse
resp.Version = Version
diff --git a/supernode/services/verifier/verifier.go b/supernode/services/verifier/verifier.go
index 867bd966..68a2ae77 100644
--- a/supernode/services/verifier/verifier.go
+++ b/supernode/services/verifier/verifier.go
@@ -75,7 +75,7 @@ func (cv *ConfigVerifier) VerifyConfig(ctx context.Context) (*VerificationResult
// Check 5: Verify all required ports are available
cv.checkPortsAvailable(result)
- logtrace.Info(ctx, "Config verification completed", logtrace.Fields{
+ logtrace.Debug(ctx, "Config verification completed", logtrace.Fields{
"valid": result.IsValid(),
"errors": len(result.Errors),
"warnings": len(result.Warnings),
diff --git a/tests/integration/p2p/p2p_integration_test.go b/tests/integration/p2p/p2p_integration_test.go
index bce71f58..d5df6dc2 100644
--- a/tests/integration/p2p/p2p_integration_test.go
+++ b/tests/integration/p2p/p2p_integration_test.go
@@ -108,7 +108,7 @@ func TestP2PBasicIntegration(t *testing.T) {
// Add debug logging
log.Printf("Storing batch with keys: %v", expectedKeys)
- err := services[0].StoreBatch(ctx, batchData, 0, taskID)
+ err := services[0].StoreBatch(ctx, batchData, 0, taskID)
require.NoError(t, err)
// Add immediate verification
@@ -203,7 +203,8 @@ func SetupTestP2PNodes(t *testing.T, ctx context.Context) ([]p2p.Client, []*rqst
require.NoError(t, err, "failed to create rqstore for node %d: %v", i, err)
rqStores = append(rqStores, rqStore)
- service, err := p2p.New(ctx, p2pConfig, mockClient, kr, rqStore, nil, nil)
+ // Disable metrics in integration tests by default
+ service, err := p2p.New(ctx, p2pConfig, mockClient, kr, rqStore, nil, nil, true)
require.NoError(t, err, "failed to create p2p service for node %d: %v", i, err)
// Start P2P service
diff --git a/tests/system/go.mod b/tests/system/go.mod
index 15b8212d..e6eb3bba 100644
--- a/tests/system/go.mod
+++ b/tests/system/go.mod
@@ -95,6 +95,7 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/glog v1.2.4 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
+ github.com/golang/mock v1.6.0 // indirect
github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/flatbuffers v1.12.1 // indirect
diff --git a/tests/system/go.sum b/tests/system/go.sum
index 9ff0158a..6e9c0112 100644
--- a/tests/system/go.sum
+++ b/tests/system/go.sum
@@ -803,6 +803,7 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/zeebo/errs v1.4.0 h1:XNdoD/RRMKP7HD0UhJnIzUy74ISdGGxURlYG8HSWSfM=
github.com/zeebo/errs v1.4.0/go.mod h1:sgbWHsvVuTPHcqJJGQ1WhI5KbWlHYz+2+2C/LSEtCw4=
github.com/zondax/hid v0.9.2 h1:WCJFnEDMiqGF64nlZz28E9qLVZ0KSJ7xpc5DLEyma2U=
@@ -888,6 +889,7 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -933,6 +935,7 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
golang.org/x/sync v0.13.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1025,6 +1028,7 @@ golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapK
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
+golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU=
golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=