Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
f387b89
feat: set block size to 512kb
dryajov Mar 18, 2026
d229be8
feat: dissable nat in integration tests - speeds up startup significa…
dryajov Mar 18, 2026
83f9737
feat: regenerate circuits for 512kb blocks
dryajov Mar 18, 2026
a09fda1
feat: bump deps
dryajov Mar 18, 2026
7a3c456
feat: update circuits
dryajov Mar 18, 2026
c6b38cc
feat: allow setting namespace for kubernetes
dryajov Mar 19, 2026
bd7f260
fix: don't double del manifest and count against quota
dryajov Mar 19, 2026
b522a17
feat: adding quota checks
dryajov Mar 19, 2026
437a85f
fix: remove noisy trace
dryajov Mar 19, 2026
f519b43
fix: properly handle verifiable manifest cleanup
dryajov Mar 20, 2026
34d0e07
fix: race condition in concurrent put/drop test
dryajov Mar 20, 2026
9570bb2
feat: cleanup slots on failed purchases
dryajov Mar 20, 2026
2097a25
feat: adding tracked semaphore (keyed)
dryajov Mar 20, 2026
de865a8
feat: avoid interleaving delets and puts by allowing delets to run af…
dryajov Mar 20, 2026
b410009
fix: deletel re-entrancy bug
dryajov Mar 20, 2026
29eb35d
feat: track cleanup future
dryajov Mar 20, 2026
2a91f7d
feat: check for file presence
dryajov Mar 20, 2026
cd75a75
fix: don't create a semaphore for `count`
dryajov Mar 20, 2026
48b1422
feat: add asyncbarrier
dryajov Mar 21, 2026
391b135
feat: move repo operations to `operations` to avoid put/del interleaving
dryajov Mar 21, 2026
1ce13ff
feat: add cancellation tests
dryajov Mar 21, 2026
ee18143
fix: properly store and cleanup all manifests
dryajov Mar 21, 2026
6e482ca
fix: remove un-used param
dryajov Mar 21, 2026
70dbc86
feat: trest proper cleanup across all manifests
dryajov Mar 21, 2026
a5e55d4
fix: femove unused param
dryajov Mar 21, 2026
48632f1
fix: avoid mutating query iterator
dryajov Mar 22, 2026
9c92b06
feat: allow setting dataset params with storeVerifiableManifest
dryajov Mar 22, 2026
f30f964
bump archivist contracts
dryajov Mar 23, 2026
9829741
bump circom_witnessgen
dryajov Mar 23, 2026
35eb60a
feat: use fetchManifest instead of manually decoding from block
dryajov Mar 23, 2026
d63c2aa
feat: properly track manifest refcounts to avoid dangling manifests
dryajov Mar 24, 2026
fb1b337
fix: fire onBlockStored callback when storing manifest block
dryajov Mar 24, 2026
6237338
feat: add tests for manifest tracking
dryajov Mar 24, 2026
b51a402
feat: streamline manifest tracking
dryajov Mar 26, 2026
0f138c5
fix: skip non-manifest blocks
dryajov Mar 26, 2026
246ee63
feat: more tightening of manifest tracking
dryajov Mar 27, 2026
cdc58cc
feat: make sure slot proving times out eventually and add buffer to s…
dryajov Mar 30, 2026
8316dd0
fix: properly handle quota over limit uploads
dryajov Mar 31, 2026
1c000f6
fix: avoid double counting on update and set expiry
dryajov Apr 2, 2026
a7d1701
Update archivist/marketplace/sales/states/initialproving.nim
dryajov Apr 2, 2026
dd52bab
fix: potential race condition during delete
dryajov Apr 2, 2026
819c4a2
fix: minor review cleanup
dryajov Apr 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ docs
metrics
nimcache
tests
vendor/nimble/nat_traversal/vendor/miniupnp/miniupnpc/build
vendor/nimble/nat_traversal/vendor/libnatpmp-upstream/libnatpmp.a
6 changes: 6 additions & 0 deletions .github/workflows/docker-dist-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ on:
required: false
type: boolean
default: false
release_tests_namespace:
description: Kubernetes namespace for release tests
required: false
type: string
default: 'devnet'

jobs:
get-contracts-hash:
Expand Down Expand Up @@ -57,6 +62,7 @@ jobs:
tag_stable: ${{ startsWith(github.ref, 'refs/tags/') }}
contract_image: "durabilitylabs/archivist-contracts:sha-${{ needs.get-contracts-hash.outputs.hash }}-dist-tests"
run_release_tests: ${{ inputs.run_release_tests }}
release_tests_namespace: ${{ inputs.release_tests_namespace || 'devnet' }}
secrets: inherit

deploy-testnet:
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/docker-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ on:
required: false
type: string
default: false
release_tests_namespace:
description: Kubernetes namespace for release tests
required: false
type: string
default: 'devnet'
contract_image:
description: Specifies compatible smart contract image
required: false
Expand Down Expand Up @@ -291,5 +296,6 @@ jobs:
branch: ${{ needs.compute-tests-inputs.outputs.branch }}
archivistdockerimage: ${{ needs.compute-tests-inputs.outputs.archivistdockerimage }}
workflow_source: ${{ needs.compute-tests-inputs.outputs.workflow_source }}
namespace: ${{ inputs.release_tests_namespace || 'devnet' }}
secrets:
KUBE_CONFIG: ${{ secrets.DEVNET_KUBE_CONFIG }}
7 changes: 2 additions & 5 deletions archivist/archivist.nim
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,8 @@ proc new*(
overlayTtl = config.overlayTtl.seconds,
)

maintenance = BlockMaintainer.new(
repoStore,
interval = config.overlayMaintenanceInterval,
numberOfBlocksPerInterval = config.overlayMaintenanceNumberOfBlocks,
)
maintenance =
BlockMaintainer.new(repoStore, interval = config.overlayMaintenanceInterval)

peerStore = PeerCtxStore.new()
pendingBlocks = PendingBlocksManager.new()
Expand Down
7 changes: 4 additions & 3 deletions archivist/archivisttypes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

{.push raises: [].}

import std/math
import std/tables
import std/sugar

Expand All @@ -25,13 +26,13 @@ export tables

const
# Size of blocks for storage / network exchange,
DefaultBlockSize* = NBytes 1024 * 64
DefaultCellSize* = NBytes 2048
DefaultBlockSize* = 512.KiBs
DefaultCellSize* = 2.KiBs

# Proving defaults
DefaultMaxSlotDepth* = 32
DefaultMaxDatasetDepth* = 8
DefaultBlockDepth* = 5
DefaultBlockDepth* = int(log2(float(DefaultBlockSize div DefaultCellSize)))
DefaultCellElms* = 67
DefaultSamplesNum* = 5

Expand Down
16 changes: 12 additions & 4 deletions archivist/blockexchange/engine/engine.nim
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,18 @@ proc blocksDeliveryHandler*(
validatedBlocksDelivery.keepItIf(it.address.cid != bd.address.cid)
continue

# TODO: The putBlock here should be replace by something like -
# storeManifestBlock(...)
if err =? (await self.localStore.putBlock(bd.blk)).errorOption:
error "Unable to store block", err = err.msg
if not isManifest:
error "Skipping, blocks should either be part of a tree or a manifest",
cid = bd.blk.cid
continue

without manifest =? Manifest.decode(bd.blk), err:
error "Unable to decode manifest block", err = err.msg
validatedBlocksDelivery.keepItIf(it.address.cid != bd.address.cid)
continue

if err =? (await self.localStore.storeManifest(manifest)).errorOption:
error "Unable to store manifest", err = err.msg
validatedBlocksDelivery.keepItIf(it.address.cid != bd.address.cid)

archivist_block_exchange_blocks_received.inc(validatedBlocksDelivery.len.int64)
Expand Down
13 changes: 13 additions & 0 deletions archivist/clock.nim
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ proc withTimeout*(
await future.cancelAndWait()
raise newException(Timeout, "Timed out")

proc withTimeout*[T](
future: Future[T], clock: Clock, expiry: SecondsSince1970
): Future[T] {.async.} =
let timeout = clock.waitUntil(expiry)
try:
await future or timeout
finally:
await timeout.cancelAndWait()
if not future.completed:
await future.cancelAndWait()
raise newException(Timeout, "Timed out")
return future.read()

proc toBytes*(i: SecondsSince1970): seq[byte] =
let asUint = cast[uint64](i)
@(asUint.toBytes)
Expand Down
19 changes: 17 additions & 2 deletions archivist/erasure/erasure.nim
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,12 @@ proc decode*(
## be recovered
##

logScope:
treeCid = encoded.treeCid
originalTreeCid = encoded.originalTreeCid

trace "Preparing to decode dataset"

let
emptyCid = ?emptyCid(encoded.version, encoded.hcodec, encoded.codec)
params = EncodingParams.initFromEncoded(encoded, emptyCid)
Expand Down Expand Up @@ -663,6 +669,7 @@ proc decode*(
if err =? (await self.repoStore.putSomeProofs(tree, idxIter)).errorOption:
return failure(err)

trace "Successfully decoded original dataset"
success(),
)

Expand All @@ -689,6 +696,12 @@ proc repair*(
## to avoid leaving garbage in the store
##

logScope:
treeCid = encoded.treeCid
originalTreeCid = encoded.originalTreeCid

trace "Preparing to repair dataset"

let
emptyCid = ?emptyCid(encoded.version, encoded.hcodec, encoded.codec)
params = EncodingParams.initFromEncoded(encoded, emptyCid)
Expand All @@ -708,8 +721,9 @@ proc repair*(
"Original tree root differs from the tree root computed out of recovered data"
)

await self.repoStore.putAllProofs(tree)
,
?await self.repoStore.putAllProofs(tree)
trace "Successfully repaired original dataset"
success(),
)

# TODO: We don't get valid parity data from leopard,
Expand All @@ -719,6 +733,7 @@ proc repair*(
let repaired =
?(await self.encode(encoded, encoded.ecK, encoded.ecM, encoded.protectedStrategy))

trace "Successfully re-encoded original dataset"
if repaired.treeCid != encoded.treeCid:
return failure(
"Original tree root differs from the repaired tree root encoded out of recovered data"
Expand Down
10 changes: 9 additions & 1 deletion archivist/marketplace/sales/states/filled.nim
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,18 @@ method run*(
if onFilled =? agent.onFilled:
onFilled(request, data.slotIndex)

# Add buffer past contract end so overlay data survives through the
# last proof window. Buffer covers period (last proof window) +
# proofTimeout (challenge window). Without this, maintenance can drop
# the overlay while the proving loop is still running its last proof.
Comment on lines +58 to +61
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that this is necessary. When the request has ended, submitting the proof will not work anyway. So it doesn't matter that generating the proof fails.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, as per our discussion, this is mostly harmless on the marketplace side, but it will trigger re-downloads of data that is potentially already gone, so it will either download something that is not longer needed, or get stuck in retries.

let expiry = await marketplace.getRequestEnd(data.requestId)
let periodicity = marketplace.periodicity()
let buffer = periodicity.seconds + marketplace.proofTimeout()
let expiryWithBuffer = expiry + buffer
let cid = request.content.cid
let slotIndex = data.slotIndex
if err =? (await storage.updateSlotExpiry(cid, slotIndex, expiry)).errorOption:
if err =?
(await storage.updateSlotExpiry(cid, slotIndex, expiryWithBuffer)).errorOption:
return some State(SaleErrored(error: err))

when defined(archivist_system_testing_options):
Expand Down
8 changes: 6 additions & 2 deletions archivist/marketplace/sales/states/initialproving.nim
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,14 @@ method run*(
let cid = request.content.cid
let slotIndex = data.slotIndex
let challenge = await context.marketplace.getChallenge(slot.id)
without proof =? await storage.proveSlot(cid, slotIndex, challenge), err:
# Deadline is periodEnd - proof must be generated before the period closes.
let periodEnd = periodicity.periodEnd(provingPeriod).toSecondsSince1970

without proof =?
(await storage.proveSlot(cid, slotIndex, challenge).withTimeout(clock, periodEnd)),
err:
error "Failed to generate initial proof", error = err.msg
return some State(SaleErrored(error: err))

let periodAtFinish = periodicity.periodOf(StorageTimestamp.init(clock.now()))
if periodAtFinish != provingPeriod:
warn "Failed to generate initial proof in time",
Expand Down
15 changes: 13 additions & 2 deletions archivist/marketplace/sales/states/proving.nim
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import std/options
import pkg/questionable/results
import pkg/chronos
import ../../../clock
import ../../../logutils
import ../../../utils/exceptions
Expand Down Expand Up @@ -85,7 +86,18 @@ proc proveLoop(
(await marketplace.willProofBeRequired(slotId)):
let challenge = await marketplace.getChallenge(slotId)
info "Generating required proof", challenge = challenge
await state.prove(slot, challenge, context, provingPeriod)
# Deadline is the end of the current proving period. Proof must be
# generated and submitted before periodEnd.
let periodicity = marketplace.periodicity()
let periodEnd = periodicity.periodEnd(provingPeriod).toSecondsSince1970
try:
await state.prove(slot, challenge, context, provingPeriod).withTimeout(
clock, periodEnd
)
except Timeout:
warn "Proof generation timed out", provingPeriod = provingPeriod
await waitUntilPeriod(provingPeriod + 1'u8)
continue
let periodAtFinish = getCurrentPeriod()
if periodAtFinish != provingPeriod:
warn "Failed to generate proof in time", periodAtFinish = periodAtFinish
Expand Down Expand Up @@ -123,7 +135,6 @@ method run*(
): Future[?State] {.async: (raises: []).} =
let data = SalesAgent(machine).data
let context = SalesAgent(machine).context
let clock = context.clock

without request =? data.request:
raiseAssert "no sale request"
Expand Down
3 changes: 1 addition & 2 deletions archivist/marketplacestorage.nim
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ method proveSlot*(
method updateSlotExpiry*(
storage: MarketplaceStorage, cid: Cid, slotIndex: uint64, expiry: StorageTimestamp
): Future[?!void] {.async: (raises: [CancelledError]).} =
# TODO: update only the slot expiry, not the expiry of the entiry dataset
await storage.node.updateExpiry(cid, expiry.toSecondsSince1970)
await storage.node.updateSlotExpiry(cid, slotIndex, expiry.toSecondsSince1970)

method deleteSlot*(
storage: MarketplaceStorage, cid: Cid, slotIndex: uint64
Expand Down
Loading
Loading