From 75aece790268ff6a26b4288a58716cfd61d43b48 Mon Sep 17 00:00:00 2001 From: Mateusz Galazyn Date: Wed, 15 Apr 2026 16:35:30 +0200 Subject: [PATCH 1/3] Remove watchEpochStateUpdate. Simplify retryUntilJustM. --- ...24922_mgalazyn_refactor_retry_functions.md | 13 + cardano-testnet/src/Cardano/Testnet.hs | 1 + .../src/Testnet/Components/Query.hs | 224 ++++++------------ .../src/Testnet/EpochStateProcessing.hs | 40 +++- .../src/Testnet/Process/Cli/DRep.hs | 4 +- .../Testnet/Test/Cli/Plutus/BuildRaw.hs | 64 ++--- .../Test/Cli/Plutus/CostCalculation.hs | 29 +-- .../Cardano/Testnet/Test/Cli/Query.hs | 16 +- .../Cli/Scripts/Simple/CostCalculation.hs | 16 +- .../Testnet/Test/Gov/CommitteeAddNew.hs | 7 +- .../Testnet/Test/Gov/GovActionTimeout.hs | 43 ++-- .../Cardano/Testnet/Test/Gov/InfoAction.hs | 4 +- .../Cardano/Testnet/Test/Gov/NoConfidence.hs | 8 +- .../Testnet/Test/Gov/PredefinedAbstainDRep.hs | 4 +- .../Test/Gov/ProposeNewConstitution.hs | 4 +- 15 files changed, 208 insertions(+), 269 deletions(-) create mode 100644 cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md diff --git a/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md b/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md new file mode 100644 index 00000000000..b7fcdcc714f --- /dev/null +++ b/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md @@ -0,0 +1,13 @@ +### Maintenance + +- Unified retry/wait functions in `Testnet.Components.Query` by factoring out a common `retryUntilRightM` core. + `retryUntilJustM` and `retryUntilM` are now thin wrappers over this shared primitive, eliminating duplicated timeout/polling logic. +- Removed `watchEpochStateUpdate` and migrated all call sites to `retryUntilJustM`/`retryUntilM`. +- Simplified `waitForBlocks` (dropped `MonadCatch` constraint, eliminated `EpochInterval maxBound` hack). + Now mirrors `waitForEpochs`: relies solely on the shared retry loop's timeout instead of an outer block-count predicate, avoiding the drift between two independent snapshots of the starting block number. +- Simplified `checkDRepState` by replacing direct `foldEpochState` usage with `EpochStateView` polling. +- Simplified `assertNewEpochState` by replacing `watchEpochStateUpdate` with `retryUntilRightM`. +- Changed `EpochStateView` from a record with three fields to a newtype wrapping the `IORef`, removing unused `nodeConfigPath` and `socketPath` fields. +- Added `maybeExtractGovernanceActionExpiry` in `Testnet.EpochStateProcessing`, which reads a proposal's `gasExpiresAfter` epoch from the gov state. +- Rewrote the `Gov Action Timeout` integration test to derive its wait target from the proposal's actual expiry epoch, removing the race window caused by not knowing which epoch the proposal was recorded in. + The check now waits one full epoch past the removal boundary so the RATIFY-produced state is @k@-deep stable and cannot be invalidated by a chain rollback. diff --git a/cardano-testnet/src/Cardano/Testnet.hs b/cardano-testnet/src/Cardano/Testnet.hs index 66f8a0346d7..0a6928d923d 100644 --- a/cardano-testnet/src/Cardano/Testnet.hs +++ b/cardano-testnet/src/Cardano/Testnet.hs @@ -29,6 +29,7 @@ module Cardano.Testnet ( -- * EpochState processsing helper functions maybeExtractGovernanceActionIndex, + maybeExtractGovernanceActionExpiry, -- * Processes procChairman, diff --git a/cardano-testnet/src/Testnet/Components/Query.hs b/cardano-testnet/src/Testnet/Components/Query.hs index 3f310d5e201..fb8689a9f2d 100644 --- a/cardano-testnet/src/Testnet/Components/Query.hs +++ b/cardano-testnet/src/Testnet/Components/Query.hs @@ -14,7 +14,7 @@ module Testnet.Components.Query , getEpochState , getSlotNumber , getBlockNumber - , watchEpochStateUpdate + , getEpochStateDetails , getMinDRepDeposit , getMinGovActionDeposit @@ -60,10 +60,8 @@ import qualified Cardano.Ledger.State as L import Prelude -import Control.Exception.Safe (MonadCatch) import Control.Monad import Control.Monad.Trans.Resource -import Control.Monad.Trans.State.Strict (put) import Data.IORef import Data.List (sortOn) import qualified Data.Map as Map @@ -123,7 +121,7 @@ waitForEpochs -> EpochInterval -- ^ Number of epochs to wait -> m EpochNo -- ^ The epoch number reached waitForEpochs epochStateView interval = withFrozenCallStack $ do - void $ watchEpochStateUpdate epochStateView interval $ \_ -> pure Nothing + void . retryUntilRightM epochStateView (WaitForEpochs interval) . pure $ Left () getCurrentEpochNo epochStateView -- | Wait for the requested number of blocks @@ -132,7 +130,6 @@ waitForBlocks => MonadIO m => MonadTest m => MonadAssertion m - => MonadCatch m => EpochStateView -> Word64 -- ^ Number of blocks to wait -> m BlockNo -- ^ The block number reached @@ -140,12 +137,8 @@ waitForBlocks epochStateView numberOfBlocks = withFrozenCallStack $ do BlockNo startingBlockNumber <- getBlockNumber epochStateView H.note_ $ "Current block number: " <> show startingBlockNumber <> ". " <> "Waiting for " <> show numberOfBlocks <> " blocks" - H.noteShowM . H.nothingFailM . fmap (fmap BlockNo) $ - watchEpochStateUpdate epochStateView (EpochInterval maxBound) $ \(_, _, BlockNo blockNumber) -> - pure $ - if blockNumber >= startingBlockNumber + numberOfBlocks - then Just blockNumber - else Nothing + void . retryUntilRightM epochStateView (WaitForBlocks numberOfBlocks) . pure $ Left () + getBlockNumber epochStateView data TestnetWaitPeriod = WaitForEpochs EpochInterval @@ -159,6 +152,34 @@ instance Show TestnetWaitPeriod where WaitForBlocks n -> "WaitForBlocks " <> show n WaitForSlots n -> "WaitForSlots " <> show n +-- | Core retry loop. Repeats the action every 300ms until it returns 'Right' +-- or the timeout is reached, in which case the last 'Left' is returned. +retryUntilRightM + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> TestnetWaitPeriod + -> m (Either e a) + -> m (Either e a) +retryUntilRightM esv timeout act = withFrozenCallStack $ do + startingValue <- getCurrentValue + go $ startingValue + timeoutW64 + where + go deadline = act >>= \case + r@(Right _) -> pure r + l@(Left _) -> do + cv <- getCurrentValue + if cv > deadline + then pure l + else H.threadDelay 300_000 >> go deadline + + (getCurrentValue, timeoutW64) = case timeout of + WaitForEpochs (EpochInterval n) -> (unEpochNo <$> getCurrentEpochNo esv, fromIntegral n) + WaitForSlots n -> (unSlotNo <$> getSlotNumber esv, n) + WaitForBlocks n -> (unBlockNo <$> getBlockNumber esv, n) + -- | Retries the action until it returns 'Just' or the timeout is reached retryUntilJustM :: HasCallStack @@ -169,32 +190,12 @@ retryUntilJustM -> TestnetWaitPeriod -- ^ timeout for an operation -> m (Maybe a) -> m a -retryUntilJustM esv timeout act = withFrozenCallStack $ do - startingValue <- getCurrentValue - go startingValue - where - go startingValue = withFrozenCallStack $ do - cv <- getCurrentValue - when (timeoutW64 + startingValue < cv) $ do - H.note_ $ "Action did not result in 'Just' - waited for: " <> show timeout - H.failure - act >>= \case - Just a -> pure a - Nothing -> do - H.threadDelay 300_000 - go startingValue - - getCurrentValue = withFrozenCallStack $ - case timeout of - WaitForEpochs _ -> unEpochNo <$> getCurrentEpochNo esv - WaitForSlots _ -> unSlotNo <$> getSlotNumber esv - WaitForBlocks _ -> unBlockNo <$> getBlockNumber esv - - timeoutW64 = - case timeout of - WaitForEpochs (EpochInterval n) -> fromIntegral n - WaitForSlots n -> n - WaitForBlocks n -> n +retryUntilJustM esv timeout act = withFrozenCallStack $ + retryUntilRightM esv timeout (maybe (Left ()) Right <$> act) >>= \case + Right a -> pure a + Left () -> do + H.note_ $ "Action did not result in 'Just' - waited for: " <> show timeout + H.failure -- | Like 'retryUntilJustM' but takes a plain action and a predicate instead of -- an action returning 'Maybe'. On timeout, annotates the last value that failed @@ -210,34 +211,13 @@ retryUntilM -> m a -- ^ action to retry -> (a -> Bool) -- ^ predicate that must hold -> m a -retryUntilM esv timeout act predicate = withFrozenCallStack $ do - startingValue <- getCurrentValue - go startingValue - where - go startingValue = withFrozenCallStack $ do - result <- act - if predicate result - then pure result - else do - cv <- getCurrentValue - if timeoutW64 + startingValue < cv - then do - H.noteShow_ result - H.note_ $ "Predicate not satisfied after: " <> show timeout - H.failure - else H.threadDelay 300_000 >> go startingValue - - getCurrentValue = withFrozenCallStack $ - case timeout of - WaitForEpochs _ -> unEpochNo <$> getCurrentEpochNo esv - WaitForSlots _ -> unSlotNo <$> getSlotNumber esv - WaitForBlocks _ -> unBlockNo <$> getBlockNumber esv - - timeoutW64 = - case timeout of - WaitForEpochs (EpochInterval n) -> fromIntegral n - WaitForSlots n -> n - WaitForBlocks n -> n +retryUntilM esv timeout act predicate = withFrozenCallStack $ + retryUntilRightM esv timeout ((\r -> if predicate r then Right r else Left r) <$> act) >>= \case + Right a -> pure a + Left r -> do + H.noteShow_ r + H.note_ $ "Predicate not satisfied after: " <> show timeout + H.failure -- | Status of the 'EpochStateView' background thread when epoch state is not yet available data EpochStateStatus @@ -247,12 +227,8 @@ data EpochStateStatus -- ^ The background thread encountered an error while folding blocks -- | A read-only mutable pointer to an epoch state, updated automatically -data EpochStateView = EpochStateView - { nodeConfigPath :: !(NodeConfigFile In) - -- ^ node configuration file path - , socketPath :: !SocketPath - -- ^ node socket path, to which foldEpochState is connected to - , epochStateView :: !(IORef (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) +newtype EpochStateView = EpochStateView + { epochStateView :: IORef (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo)) -- ^ Automatically updated current NewEpochState. 'Left' indicates the state is not yet available -- (either not initialised or an error occurred). 'Right' contains the latest epoch state. -- Use 'getEpochState', 'getBlockNumber', 'getSlotNumber' to access the values. @@ -344,34 +320,7 @@ getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do case result of Left err -> writeIORef epochStateView $ Left $ EpochStateFoldError err Right _ -> pure () - pure $ EpochStateView nodeConfigFile socketPath epochStateView - --- | Watch the epoch state view until the guard function returns 'Just' or the timeout epoch is reached. --- Executes the guard function every 300ms. Waits for at most @maxWait@ epochs. --- The function will return the result of the guard function if it is met within the number of epochs, --- otherwise it will return @Nothing@. -watchEpochStateUpdate - :: forall m a. (HasCallStack, MonadIO m, MonadTest m, MonadAssertion m) - => EpochStateView -- ^ The info to access the epoch state - -> EpochInterval -- ^ The maximum number of epochs to wait - -> ((AnyNewEpochState, SlotNo, BlockNo) -> m (Maybe a)) -- ^ The guard function (@Just@ if the condition is met, @Nothing@ otherwise) - -> m (Maybe a) -watchEpochStateUpdate epochStateView (EpochInterval maxWait) f = withFrozenCallStack $ do - AnyNewEpochState _ newEpochState _ <- getEpochState epochStateView - let EpochNo currentEpoch = L.nesEL newEpochState - go $ currentEpoch + fromIntegral maxWait - where - go :: Word64 -> m (Maybe a) - go timeout = do - newEpochStateDetails@(AnyNewEpochState _ newEpochState' _, _, _) <- getEpochStateDetails epochStateView - let EpochNo currentEpoch = L.nesEL newEpochState' - f newEpochStateDetails >>= \case - Just result -> pure (Just result) - Nothing - | currentEpoch > timeout -> pure Nothing - | otherwise -> do - H.threadDelay 300_000 - go timeout + pure $ EpochStateView epochStateView -- | Retrieve all UTxOs map from the epoch state view. findAllUtxos @@ -505,42 +454,18 @@ checkDRepState -> Maybe a) -- ^ A function that checks whether the DRep state is correct or up to date -- and potentially inspects it. -> m a -checkDRepState epochStateView@EpochStateView{nodeConfigPath, socketPath} sbe f = withFrozenCallStack $ do - currentEpoch <- getCurrentEpochNo epochStateView - let terminationEpoch = succ . succ $ currentEpoch - result <- H.evalIO . runExceptT $ foldEpochState nodeConfigPath socketPath QuickValidation terminationEpoch Nothing - $ \(AnyNewEpochState actualEra newEpochState _) _slotNumber _blockNumber -> do - Refl <- either error pure $ assertErasEqual sbe actualEra - let dreps = - shelleyBasedEraConstraints sbe - $ SQ.queryDRepState newEpochState Set.empty - case f dreps of - Nothing -> pure ConditionNotMet - Just a -> do put $ Just a - pure ConditionMet - case result of - Left (FoldBlocksApplyBlockError (TerminationEpochReached epochNo)) -> do - H.note_ $ unlines - [ "checkDRepState: condition not met before termination epoch: " <> show epochNo - , "This is likely an error of this test." ] - H.failure - Left err -> do - H.note_ $ unlines - [ "checkDRepState: could not reach termination epoch: " <> docToString (prettyError err) - , "This is probably an error unrelated to this test." ] +checkDRepState epochStateView sbe f = withFrozenCallStack $ + retryUntilRightM epochStateView (WaitForEpochs $ EpochInterval 2) action >>= \case + Right a -> pure a + Left () -> do + H.note_ "checkDRepState: condition not met within 2 epochs. This is likely a test error." H.failure - Right (_, Nothing) -> do - H.note_ $ unlines - [ "checkDRepState: foldEpochState returned Nothing: " - , "This is probably an error related to foldEpochState." ] - H.failure - Right (ConditionNotMet, Just _) -> do - H.note_ $ unlines - [ "checkDRepState: foldEpochState returned Just and ConditionNotMet: " - , "This is probably an error related to foldEpochState." ] - H.failure - Right (ConditionMet, Just val) -> - return val + where + action = do + AnyNewEpochState actualEra newEpochState _ <- getEpochState epochStateView + Refl <- H.leftFail $ assertErasEqual sbe actualEra + pure . maybe (Left ()) Right . f $ shelleyBasedEraConstraints sbe + $ SQ.queryDRepState newEpochState Set.empty -- | Obtain governance state from node (CLI query) getGovState @@ -627,30 +552,21 @@ assertNewEpochState -- ^ The lens to access the specific value in the epoch state. -> value -- ^ The expected value to check in the epoch state. -> m () -assertNewEpochState epochStateView sbe maxWait lens expected = withFrozenCallStack $ do - mStateView <- watchEpochStateUpdate epochStateView maxWait (const checkEpochState) - when (isNothing mStateView) $ do - val <- getFromEpochStateForEra - -- there's a tiny tiny chance that the value has changed since 'watchEpochStateUpdate' - -- so check it again - if val == expected - then pure () - else H.failMessage callStack $ unlines - [ "assertNewEpochState: expected value not reached within the time frame." - , "Expected value: " <> show expected - , "Actual value: " <> show val - ] +assertNewEpochState epochStateView sbe maxWait lens expected = withFrozenCallStack $ + retryUntilRightM epochStateView (WaitForEpochs maxWait) checkEpochState >>= \case + Right () -> pure () + Left actual -> do + H.note_ $ unlines + [ "assertNewEpochState: expected value not reached within " <> show maxWait + , "Expected: " <> show expected + , "Actual: " <> show actual + ] + H.failure where - checkEpochState - :: HasCallStack - => m (Maybe ()) checkEpochState = withFrozenCallStack $ do val <- getFromEpochStateForEra - pure $ if val == expected then Just () else Nothing + pure $ if val == expected then Right () else Left val - getFromEpochStateForEra - :: HasCallStack - => m value getFromEpochStateForEra = withFrozenCallStack $ do (AnyNewEpochState actualEra newEpochState _, _, _) <- getEpochStateDetails epochStateView Refl <- H.leftFail $ assertErasEqual sbe actualEra diff --git a/cardano-testnet/src/Testnet/EpochStateProcessing.hs b/cardano-testnet/src/Testnet/EpochStateProcessing.hs index 77208e96c74..5d4da73c4fc 100644 --- a/cardano-testnet/src/Testnet/EpochStateProcessing.hs +++ b/cardano-testnet/src/Testnet/EpochStateProcessing.hs @@ -4,6 +4,7 @@ module Testnet.EpochStateProcessing ( maybeExtractGovernanceActionIndex + , maybeExtractGovernanceActionExpiry , waitForGovActionVotes ) where @@ -17,15 +18,15 @@ import qualified Cardano.Ledger.Shelley.LedgerState as L import Prelude -import Control.Monad +import Control.Monad (void) import qualified Data.Map as Map -import Data.Maybe import Data.Word (Word16) import GHC.Exts (IsList (toList), toList) import GHC.Stack import Lens.Micro (to, (^.)) -import Testnet.Components.Query (EpochStateView, watchEpochStateUpdate) +import Testnet.Components.Query (EpochStateView, TestnetWaitPeriod (..), + getEpochStateDetails, retryUntilJustM) import Hedgehog import Hedgehog.Extras (MonadAssertion) @@ -49,6 +50,32 @@ maybeExtractGovernanceActionIndex txid (AnyNewEpochState sbe newEpochState _) = | ti1 == L.extractHash ti2 = Just gai compareWithTxId _ x _ _ = x +-- | Look up the @gasExpiresAfter@ epoch for the governance action submitted +-- by the given transaction id. Returns 'Nothing' if the proposal is not +-- present in the current proposals map (either because it has not yet been +-- recorded or because it has already been removed). +-- +-- The ledger removes an expired proposal at the start of epoch +-- @gasExpiresAfter + 1@ (via the RATIFY rule), so callers that want to +-- observe the proposal gone should wait until @currentEpoch > expiresAfter@. +maybeExtractGovernanceActionExpiry + :: HasCallStack + => TxId -- ^ transaction id searched for + -> AnyNewEpochState + -> Maybe EpochNo +maybeExtractGovernanceActionExpiry txid (AnyNewEpochState sbe newEpochState _) = + caseShelleyToBabbageOrConwayEraOnwards + (const $ error "Governance actions only available in Conway era onwards") + (\ceo -> conwayEraOnwardsConstraints ceo $ do + let proposals = newEpochState ^. L.newEpochStateGovStateL . L.proposalsGovStateL + Map.foldlWithKey' (compareWithTxId txid) Nothing (L.proposalsActionsMap proposals) + ) + sbe + where + compareWithTxId (TxId ti1) Nothing (GovActionId (L.TxId ti2) _) gas + | ti1 == L.extractHash ti2 = Just (L.gasExpiresAfter gas) + compareWithTxId _ x _ _ = x + -- | Wait for the last gov action proposal in the list to have DRep or SPO votes. waitForGovActionVotes :: forall m. HasCallStack @@ -58,10 +85,9 @@ waitForGovActionVotes => EpochStateView -- ^ Current epoch state view. It can be obtained using the 'getEpochStateView' function. -> EpochInterval -- ^ The maximum wait time in epochs. -> m () -waitForGovActionVotes epochStateView maxWait = withFrozenCallStack $ do - mResult <- watchEpochStateUpdate epochStateView maxWait checkForVotes - when (isNothing mResult) $ - H.failMessage callStack "waitForGovActionVotes: No votes appeared before timeout." +waitForGovActionVotes epochStateView maxWait = withFrozenCallStack $ + void $ retryUntilJustM epochStateView (WaitForEpochs maxWait) $ + getEpochStateDetails epochStateView >>= checkForVotes where checkForVotes :: HasCallStack diff --git a/cardano-testnet/src/Testnet/Process/Cli/DRep.hs b/cardano-testnet/src/Testnet/Process/Cli/DRep.hs index 1cf7d570124..c7c4ac72ded 100644 --- a/cardano-testnet/src/Testnet/Process/Cli/DRep.hs +++ b/cardano-testnet/src/Testnet/Process/Cli/DRep.hs @@ -418,7 +418,7 @@ makeActivityChangeProposal execConfig epochStateView ceo work governanceActionTxId <- retrieveTransactionId execConfig signedProposalTx governanceActionIndex <- - H.nothingFailM $ watchEpochStateUpdate epochStateView timeout $ \(anyNewEpochState, _, _) -> - return $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs timeout) $ + maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView return (governanceActionTxId, governanceActionIndex) diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/BuildRaw.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/BuildRaw.hs index effc0d6300b..25d3c719d6c 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/BuildRaw.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/BuildRaw.hs @@ -8,41 +8,31 @@ module Cardano.Testnet.Test.Cli.Plutus.BuildRaw ( ) where -import Cardano.Api hiding (Value) -import Cardano.Api.Experimental (Some (Some)) -import Cardano.Api.Ledger (EpochInterval (..)) +import Cardano.Api hiding (Value) +import Cardano.Api.Experimental (Some (Some)) +import Cardano.Api.Ledger (EpochInterval (..)) -import Cardano.Testnet +import Cardano.Testnet -import Prelude +import Prelude -import Control.Monad (void) -import Data.Default.Class (Default (def)) +import Control.Monad (void) +import Data.Default.Class (Default (def)) import qualified Data.Text as Text -import System.FilePath (()) +import System.FilePath (()) import qualified System.Info as SYS -import Testnet.Components.Query ( - findLargestUtxoForPaymentKey, - getEpochStateView, - getTxIx, - watchEpochStateUpdate, - ) +import Testnet.Components.Query (TestnetWaitPeriod (..), findLargestUtxoForPaymentKey, + getEpochStateDetails, getEpochStateView, getTxIx, retryUntilJustM) import qualified Testnet.Defaults as Defaults -import Testnet.Process.Cli.Transaction ( - TxOutAddress (..), - mkSpendOutputsOnlyTx, - retrieveTransactionId, - signTx, - submitTx, - ) -import Testnet.Process.Run (execCli', mkExecConfig) -import Testnet.Property.Util (integrationRetryWorkspace) -import Testnet.Start.Types (eraToString) -import Testnet.Types - -import Hedgehog (Property) -import qualified Hedgehog as H +import Testnet.Process.Cli.Transaction (TxOutAddress (..), mkSpendOutputsOnlyTx, + retrieveTransactionId, signTx, submitTx) +import Testnet.Process.Run (execCli', mkExecConfig) +import Testnet.Property.Util (integrationRetryWorkspace) +import Testnet.Start.Types (eraToString) +import Testnet.Types + +import Hedgehog (Property) import qualified Hedgehog.Extras.Test.Base as H import qualified Hedgehog.Extras.Test.File as H import qualified Hedgehog.Extras.Test.TestWatchdog as H @@ -118,11 +108,8 @@ hprop_build_raw_ref_script_spend = integrationRetryWorkspace 2 "build-raw-ref-sc txIdPublishRefScript <- retrieveTransactionId execConfig signedTxPublishRefScript txIxPublishRefScript <- - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount -- Step 2: Lock funds at script address refScriptLock <- H.createDirectoryIfMissing $ work "ref-script-lock" @@ -143,8 +130,8 @@ hprop_build_raw_ref_script_spend = integrationRetryWorkspace 2 "build-raw-ref-sc txIdLock <- retrieveTransactionId execConfig signedTxLock txIxLock <- - H.evalMaybeM $ - watchEpochStateUpdate epochStateView (EpochInterval 2) (getTxIx sbe txIdLock transferAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdLock transferAmount -- Step 3: Query protocol parameters void $ @@ -207,8 +194,5 @@ hprop_build_raw_ref_script_spend = integrationRetryWorkspace 2 "build-raw-ref-sc -- Verify the transaction landed on chain txIdUnlock <- retrieveTransactionId execConfig signedUnlockTx void $ - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdUnlock (transferAmount - fee)) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdUnlock (transferAmount - fee) diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/CostCalculation.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/CostCalculation.hs index 82fe82e3b54..723fd8f6f2c 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/CostCalculation.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Plutus/CostCalculation.hs @@ -34,8 +34,8 @@ import System.Directory (makeAbsolute) import System.FilePath (()) import qualified System.Info as SYS -import Testnet.Components.Query (findLargestUtxoForPaymentKey, getEpochStateView, getTxIx, - watchEpochStateUpdate) +import Testnet.Components.Query (TestnetWaitPeriod (..), findLargestUtxoForPaymentKey, + getEpochStateDetails, getEpochStateView, getTxIx, retryUntilJustM) import qualified Testnet.Defaults as Defaults import Testnet.Process.Cli.Transaction (TxOutAddress (..), mkSpendOutputsOnlyTx, retrieveTransactionId, signTx, submitTx) @@ -110,11 +110,8 @@ hprop_ref_plutus_cost_calculation = integrationRetryWorkspace 2 "ref-plutus-scri -- Wait until transaction is on chain and obtain transaction identifier txIdPublishRefScript <- retrieveTransactionId execConfig signedTxPublishRefScript txIxPublishRefScript <- - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount -- Submit a transaction to lock money in the reference script refScriptLock <- H.createDirectoryIfMissing $ work "ref-script-lock" @@ -137,8 +134,8 @@ hprop_ref_plutus_cost_calculation = integrationRetryWorkspace 2 "ref-plutus-scri -- Wait until transaction is on chain and obtain transaction identifier txIdLock <- retrieveTransactionId execConfig signedTxLock txIxLock <- - H.evalMaybeM $ - watchEpochStateUpdate epochStateView (EpochInterval 2) (getTxIx sbe txIdLock transferAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdLock transferAmount -- Create transaction that uses reference script refScriptUnlock <- H.createDirectoryIfMissing $ work "ref-script-unlock" @@ -274,11 +271,8 @@ hprop_included_plutus_cost_calculation = integrationRetryWorkspace 2 "included-p -- Wait until transaction is on chain and obtain transaction identifier txIdIncludedScriptLock <- retrieveTransactionId execConfig signedTxIncludedScriptLock txIxIncludedScriptLock <- - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdIncludedScriptLock includedScriptLockAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdIncludedScriptLock includedScriptLockAmount -- Create transaction that uses reference script includedScriptUnlock <- H.createDirectoryIfMissing $ work "included-script-unlock" @@ -391,11 +385,8 @@ hprop_included_simple_script_cost_calculation = integrationRetryWorkspace 2 "inc -- Wait until transaction is on chain and obtain transaction identifier txIdSimpleScriptLock <- retrieveTransactionId execConfig signedTxSimpleScriptLock txIxSimpleScriptLock <- - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdSimpleScriptLock lockedAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdSimpleScriptLock lockedAmount -- Create transaction that unlocks the simple script UTxO we just created simpleScriptUnlockWork <- H.createDirectoryIfMissing $ work "simple-script-unlock" diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Query.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Query.hs index d7118d281ca..da0b888e30c 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Query.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Query.hs @@ -51,8 +51,8 @@ import System.Directory (makeAbsolute) import System.FilePath (()) import Testnet.Components.Configuration (eraToString) -import Testnet.Components.Query (EpochStateView, checkDRepsNumber, getEpochStateView, - getTxIx, watchEpochStateUpdate) +import Testnet.Components.Query (EpochStateView, TestnetWaitPeriod (..), checkDRepsNumber, + getEpochStateDetails, getEpochStateView, getSlotNumber, getTxIx, retryUntilJustM) import qualified Testnet.Defaults as Defaults import Testnet.Process.Cli.Transaction (TxOutAddress (..), mkSimpleSpendOutputsOnlyTx, mkSpendOutputsOnlyTx, retrieveTransactionId, signTx, submitTx) @@ -341,7 +341,7 @@ hprop_cli_queries = integrationRetryWorkspace 2 "cli-queries" $ \tempAbsBasePath -- Wait until transaction is on chain and obtain transaction identifier txId <- retrieveTransactionId execConfig signedTx - txIx <- H.evalMaybeM $ watchEpochStateUpdate epochStateView (EpochInterval 2) (getTxIx sbe txId transferAmount) + txIx <- retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ getEpochStateDetails epochStateView >>= getTxIx sbe txId transferAmount -- Query the reference script size let protocolParametersOutFile = refScriptSizeWork "ref-script-size-out.json" H.noteM_ $ execCli' execConfig [ eraName, "query", "ref-script-size" @@ -459,11 +459,11 @@ hprop_cli_queries = integrationRetryWorkspace 2 "cli-queries" $ \tempAbsBasePath -> ShelleyGenesis -> m SlotNo -- ^ The block number reached waitForFuturePParamsToStabilise epochStateView shelleyGenesisConf = withFrozenCallStack $ - H.noteShowM . H.nothingFailM $ - watchEpochStateUpdate epochStateView (EpochInterval 2) $ \(_, slotNo, _) -> do - pure $ if areFuturePParamsStable shelleyGenesisConf slotNo - then Just slotNo - else Nothing + H.noteShowM $ retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ do + slotNo <- getSlotNumber epochStateView + pure $ if areFuturePParamsStable shelleyGenesisConf slotNo + then Just slotNo + else Nothing -- We wait till a slot after: 4 * securityParam / slotCoeff -- If we query 'govState' before that we get 'PotentialPParamsUpdate' diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Scripts/Simple/CostCalculation.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Scripts/Simple/CostCalculation.hs index 4fc7622c820..e48dd61b526 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Scripts/Simple/CostCalculation.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Cli/Scripts/Simple/CostCalculation.hs @@ -22,8 +22,8 @@ import qualified Data.Text as Text import System.FilePath (()) import qualified System.Info as SYS -import Testnet.Components.Query (TestnetWaitPeriod (..), findAllUtxos, getEpochStateView, - getTxIx, retryUntilM, watchEpochStateUpdate) +import Testnet.Components.Query (TestnetWaitPeriod (..), findAllUtxos, + getEpochStateDetails, getEpochStateView, getTxIx, retryUntilJustM, retryUntilM) import qualified Testnet.Defaults as Defaults import Testnet.Process.Cli.Transaction (TxOutAddress (..), mkSpendOutputsOnlyTx, retrieveTransactionId, signTx, submitTx) @@ -33,7 +33,6 @@ import Testnet.Start.Types (eraToString) import Testnet.Types import Hedgehog (Property) -import qualified Hedgehog as H import qualified Hedgehog.Extras.Test.Base as H import qualified Hedgehog.Extras.Test.File as H import qualified Hedgehog.Extras.Test.TestWatchdog as H @@ -103,11 +102,8 @@ hprop_ref_simple_script_mint = integrationRetryWorkspace 2 "ref-simple-script" $ -- Wait until transaction is on chain and obtain transaction identifier txIdPublishRefScript <- retrieveTransactionId execConfig signedTxPublishRefScript txIxPublishRefScript <- - H.evalMaybeM $ - watchEpochStateUpdate - epochStateView - (EpochInterval 2) - (getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdPublishRefScript scriptPublishUTxOAmount -- Submit a transaction to lock money in the reference script refScriptLock <- H.createDirectoryIfMissing $ work "ref-script-lock" @@ -130,8 +126,8 @@ hprop_ref_simple_script_mint = integrationRetryWorkspace 2 "ref-simple-script" $ -- Wait until transaction is on chain and obtain transaction identifier txIdLock <- retrieveTransactionId execConfig signedTxLock txIxLock <- - H.evalMaybeM $ - watchEpochStateUpdate epochStateView (EpochInterval 2) (getTxIx sbe txIdLock transferAmount) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 2) $ + getEpochStateDetails epochStateView >>= getTxIx sbe txIdLock transferAmount -- Create transaction that uses reference script refScriptUnlock <- H.createDirectoryIfMissing $ work "ref-script-unlock" diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/CommitteeAddNew.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/CommitteeAddNew.hs index da3b00bbcb2..bb3338d80ee 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/CommitteeAddNew.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/CommitteeAddNew.hs @@ -251,8 +251,8 @@ hprop_constitutional_committee_add_new = integrationRetryWorkspace 2 "constituti governanceActionTxId <- H.noteShowM $ retrieveTransactionId execConfig signedProposalTx governanceActionIx <- - H.nothingFailM . watchEpochStateUpdate epochStateView (L.EpochInterval 1) $ \(anyNewEpochState, _, _) -> - pure $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ L.EpochInterval 1) $ + maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView dRepVoteFiles <- DRep.generateVoteFiles @@ -296,7 +296,8 @@ hprop_constitutional_committee_add_new = integrationRetryWorkspace 2 "constituti length (filter ((== L.VoteYes) . snd) gaSpoVotes) === 1 length spoVotes === length gaSpoVotes - H.nothingFailM $ watchEpochStateUpdate epochStateView (L.EpochInterval 1) (return . committeeIsPresent) + retryUntilJustM epochStateView (WaitForEpochs $ L.EpochInterval 1) $ + committeeIsPresent <$> getEpochStateDetails epochStateView -- show proposed committee meembers H.noteShow_ ccCredentials diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/GovActionTimeout.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/GovActionTimeout.hs index c699a2cba9a..33424dfb8e4 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/GovActionTimeout.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/GovActionTimeout.hs @@ -31,8 +31,7 @@ import Testnet.Property.Util (integrationRetryWorkspace) import Testnet.Start.Types import Testnet.Types -import Hedgehog (Property) -import qualified Hedgehog as H +import Hedgehog import qualified Hedgehog.Extras as H -- | Test that SPOs cannot vote on a Protocol Parameter change @@ -80,11 +79,6 @@ hprop_check_gov_action_timeout = integrationRetryWorkspace 2 "gov-action-timeout baseDir <- H.createDirectoryIfMissing $ gov "output" - -- Figure out expiration time for proposals - - govActionLifetime <- getGovActionLifetime epochStateView ceo - H.note_ $ "govActionLifetime: " <> show govActionLifetime - -- Register stake address let stakeCertFp = gov "stake.regcert" stakeKeys = KeyPair { verificationKey = File $ gov "stake.vkey" @@ -132,12 +126,31 @@ hprop_check_gov_action_timeout = integrationRetryWorkspace 2 "gov-action-timeout makeActivityChangeProposal execConfig epochStateView ceo (baseDir "proposal") Nothing (EpochInterval 3) stakeKeys wallet0 (EpochInterval 2) - -- Wait for proposal to expire - void $ waitForEpochs epochStateView (EpochInterval $ unEpochInterval govActionLifetime + 1) - - -- Check proposal expired - mGovernanceActionTxIx <- watchEpochStateUpdate epochStateView (EpochInterval 2) $ \(anyNewEpochState, _, _) -> - return $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState - - mGovernanceActionTxIx H.=== Nothing + -- Read the proposal's expiry epoch directly from the gov state. + -- The RATIFY rule removes expired proposals at the start of epoch + -- @expiresAfter + 1@, so once @currentEpoch > expiresAfter@ the proposal + -- must be gone. + expiresAfter@(EpochNo expiryE) <- H.nothingFailM $ + maybeExtractGovernanceActionExpiry governanceActionTxId <$> getEpochState epochStateView + H.note_ $ "Proposal expires after epoch: " <> show expiresAfter + + -- Wait until we are at least two epochs past @expiresAfter@, i.e. in + -- epoch @expiresAfter + 2@ or later. RATIFY removes the proposal at the + -- first block of epoch @expiresAfter + 1@, but the testnet security + -- parameter is @k = 5@ blocks while epochs average only ~10 blocks, so a + -- rollback within the @k@-window can cross the removal boundary. Waiting + -- a full extra epoch past the boundary makes the removal @k@-deep stable + -- and eliminates the rollback race. + EpochNo currentE <- getCurrentEpochNo epochStateView + let timeoutEpochs + | expiryE >= currentE = fromIntegral (expiryE - currentE) + 3 + | otherwise = 3 + void $ retryUntilM epochStateView (WaitForEpochs $ EpochInterval timeoutEpochs) + (getCurrentEpochNo epochStateView) + (> EpochNo (expiryE + 1)) + + -- At this point the proposal must be absent from the gov state. + mGovernanceActionTxIx <- + maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView + mGovernanceActionTxIx === Nothing diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/InfoAction.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/InfoAction.hs index 30f00ff3f6d..e4ecb653a6c 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/InfoAction.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/InfoAction.hs @@ -191,8 +191,8 @@ hprop_ledger_events_info_action = integrationRetryWorkspace 2 "info-hash" $ \tem txId <- H.noteShowM $ retrieveTransactionId execConfig (File txbodySignedFp) governanceActionIndex <- - H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 1) $ \(anyNewEpochState, _, _) -> - pure $ maybeExtractGovernanceActionIndex txId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 1) + $ maybeExtractGovernanceActionIndex txId <$> getEpochState epochStateView let voteFp :: Int -> FilePath voteFp n = work gov "vote-" <> show n diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/NoConfidence.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/NoConfidence.hs index 9062afec4f8..bddda02c373 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/NoConfidence.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/NoConfidence.hs @@ -129,8 +129,7 @@ hprop_gov_no_confidence = integrationRetryWorkspace 2 "no-confidence" $ \tempAbs epochStateView <- getEpochStateView configurationFile (File socketPath) - H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 3) $ \anyNewEpochState-> - pure $ committeeIsPresent True anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 3) $ committeeIsPresent True <$> getEpochStateDetails epochStateView -- Step 2. Propose motion of no confidence. DRep and SPO voting thresholds must be met. @@ -189,8 +188,7 @@ hprop_gov_no_confidence = integrationRetryWorkspace 2 "no-confidence" $ \tempAbs governanceActionTxId <- retrieveTransactionId execConfig signedProposalTx governanceActionIndex <- - H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 10) $ \(anyNewEpochState, _, _) -> - pure $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 10) $ maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView let spoVotes :: [(String, Int)] spoVotes = [("yes", 1), ("yes", 2), ("no", 3)] @@ -238,7 +236,7 @@ hprop_gov_no_confidence = integrationRetryWorkspace 2 "no-confidence" $ \tempAbs -- Step 4. We confirm the no confidence motion has been ratified by checking -- for an empty constitutional committee. - H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 10) (return . committeeIsPresent False) + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 10) $ committeeIsPresent False <$> getEpochStateDetails epochStateView -- | Checks if the committee is empty or not. committeeIsPresent :: Bool -> (AnyNewEpochState, SlotNo, BlockNo) -> Maybe () diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/PredefinedAbstainDRep.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/PredefinedAbstainDRep.hs index ca5e5830ef2..34f282dbe99 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/PredefinedAbstainDRep.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/PredefinedAbstainDRep.hs @@ -287,8 +287,8 @@ makeDesiredPoolNumberChangeProposal execConfig epochStateView ceo work prefix governanceActionTxId <- retrieveTransactionId execConfig signedProposalTx governanceActionIndex <- - H.nothingFailM $ watchEpochStateUpdate epochStateView (EpochInterval 1) $ \(anyNewEpochState, _, _) -> - pure $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 1) + $ maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView pure (governanceActionTxId, governanceActionIndex) diff --git a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/ProposeNewConstitution.hs b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/ProposeNewConstitution.hs index d9385fe5db6..eca5e6fadd8 100644 --- a/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/ProposeNewConstitution.hs +++ b/cardano-testnet/test/cardano-testnet-test/Cardano/Testnet/Test/Gov/ProposeNewConstitution.hs @@ -229,8 +229,8 @@ hprop_ledger_events_propose_new_constitution = integrationRetryWorkspace 2 "prop governanceActionTxId <- retrieveTransactionId execConfig signedProposalTx governanceActionIndex <- - H.nothingFailM . watchEpochStateUpdate epochStateView (EpochInterval 1) $ \(anyNewEpochState, _, _) -> - pure $ maybeExtractGovernanceActionIndex governanceActionTxId anyNewEpochState + retryUntilJustM epochStateView (WaitForEpochs $ EpochInterval 1) + $ maybeExtractGovernanceActionIndex governanceActionTxId <$> getEpochState epochStateView -- Proposal was successfully submitted, now we vote on the proposal and confirm it was ratified voteFiles <- generateVoteFiles execConfig work "vote-files" From 44995047a12be739da0de542ce4c46e828510bd1 Mon Sep 17 00:00:00 2001 From: Mateusz Galazyn Date: Thu, 16 Apr 2026 17:28:06 +0200 Subject: [PATCH 2/3] cardano-testnet | Refactor EpochStateView to use TVar instead of IORef --- ...24922_mgalazyn_refactor_retry_functions.md | 2 +- ...16_160000_mgalazyn_stm_epoch_state_view.md | 4 + .../src/Testnet/Components/Query.hs | 148 +++++++++++++----- 3 files changed, 114 insertions(+), 40 deletions(-) create mode 100644 cardano-testnet/changelog.d/20260416_160000_mgalazyn_stm_epoch_state_view.md diff --git a/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md b/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md index b7fcdcc714f..175902e4112 100644 --- a/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md +++ b/cardano-testnet/changelog.d/20260416_124922_mgalazyn_refactor_retry_functions.md @@ -7,7 +7,7 @@ Now mirrors `waitForEpochs`: relies solely on the shared retry loop's timeout instead of an outer block-count predicate, avoiding the drift between two independent snapshots of the starting block number. - Simplified `checkDRepState` by replacing direct `foldEpochState` usage with `EpochStateView` polling. - Simplified `assertNewEpochState` by replacing `watchEpochStateUpdate` with `retryUntilRightM`. -- Changed `EpochStateView` from a record with three fields to a newtype wrapping the `IORef`, removing unused `nodeConfigPath` and `socketPath` fields. +- Removed unused `nodeConfigPath` and `socketPath` fields from `EpochStateView`. - Added `maybeExtractGovernanceActionExpiry` in `Testnet.EpochStateProcessing`, which reads a proposal's `gasExpiresAfter` epoch from the gov state. - Rewrote the `Gov Action Timeout` integration test to derive its wait target from the proposal's actual expiry epoch, removing the race window caused by not knowing which epoch the proposal was recorded in. The check now waits one full epoch past the removal boundary so the RATIFY-produced state is @k@-deep stable and cannot be invalidated by a chain rollback. diff --git a/cardano-testnet/changelog.d/20260416_160000_mgalazyn_stm_epoch_state_view.md b/cardano-testnet/changelog.d/20260416_160000_mgalazyn_stm_epoch_state_view.md new file mode 100644 index 00000000000..bc2bd480dc6 --- /dev/null +++ b/cardano-testnet/changelog.d/20260416_160000_mgalazyn_stm_epoch_state_view.md @@ -0,0 +1,4 @@ +### Maintenance + +- Retries in `EpochStateView` wake immediately on each epoch state update instead of polling. + Multiple threads waiting on the same view wake up together. diff --git a/cardano-testnet/src/Testnet/Components/Query.hs b/cardano-testnet/src/Testnet/Components/Query.hs index fb8689a9f2d..361b03829c3 100644 --- a/cardano-testnet/src/Testnet/Components/Query.hs +++ b/cardano-testnet/src/Testnet/Components/Query.hs @@ -60,9 +60,12 @@ import qualified Cardano.Ledger.State as L import Prelude +import Control.Applicative ((<|>)) +import Control.Concurrent.STM (TVar, modifyTVar', newTVarIO, readTVar, writeTVar) +import qualified Control.Concurrent.STM as STM import Control.Monad +import Control.Monad.Trans.Maybe (MaybeT (..), mapMaybeT, runMaybeT) import Control.Monad.Trans.Resource -import Data.IORef import Data.List (sortOn) import qualified Data.Map as Map import Data.Map.Strict (Map) @@ -87,6 +90,8 @@ import qualified Hedgehog as H import Hedgehog.Extras (MonadAssertion) import qualified Hedgehog.Extras as H +import UnliftIO.STM (atomically, readTVarIO, registerDelay) + -- | Block and wait for the desired epoch. waitUntilEpoch :: HasCallStack @@ -152,8 +157,10 @@ instance Show TestnetWaitPeriod where WaitForBlocks n -> "WaitForBlocks " <> show n WaitForSlots n -> "WaitForSlots " <> show n --- | Core retry loop. Repeats the action every 300ms until it returns 'Right' --- or the timeout is reached, in which case the last 'Left' is returned. +-- | Core retry loop. Returns early on 'Right'; on 'Left', blocks via STM until +-- the 'EpochStateView' is updated (with a safety fallback timeout) and retries. +-- Gives up and returns the last 'Left' once the 'TestnetWaitPeriod' deadline is +-- exceeded. retryUntilRightM :: HasCallStack => MonadIO m @@ -167,13 +174,18 @@ retryUntilRightM esv timeout act = withFrozenCallStack $ do startingValue <- getCurrentValue go $ startingValue + timeoutW64 where - go deadline = act >>= \case - r@(Right _) -> pure r - l@(Left _) -> do - cv <- getCurrentValue - if cv > deadline - then pure l - else H.threadDelay 300_000 >> go deadline + go deadline = do + -- Sample the version before running 'act' so that any update landing during 'act' + -- makes 'awaitStateUpdateTimeout' return without blocking, rather than waiting for + -- the next update and adding a block/epoch of latency. + versionBeforeAct <- readTVarIO $ epochStateVersion esv + act >>= \case + r@(Right _) -> pure r + l@(Left _) -> do + cv <- getCurrentValue + if cv > deadline + then pure l + else awaitStateUpdateTimeout esv 300 versionBeforeAct *> go deadline (getCurrentValue, timeoutW64) = case timeout of WaitForEpochs (EpochInterval n) -> (unEpochNo <$> getCurrentEpochNo esv, fromIntegral n) @@ -227,13 +239,47 @@ data EpochStateStatus -- ^ The background thread encountered an error while folding blocks -- | A read-only mutable pointer to an epoch state, updated automatically -newtype EpochStateView = EpochStateView - { epochStateView :: IORef (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo)) +data EpochStateView = EpochStateView + { epochStateView :: !(TVar (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) -- ^ Automatically updated current NewEpochState. 'Left' indicates the state is not yet available -- (either not initialised or an error occurred). 'Right' contains the latest epoch state. -- Use 'getEpochState', 'getBlockNumber', 'getSlotNumber' to access the values. + , epochStateVersion :: !(TVar Word64) + -- ^ Monotonically increasing counter, bumped on every state write. + -- Used by 'awaitStateUpdateTimeout' to block until the next update. } +-- | Block until the epoch state version advances past the provided previously sampled +-- version, or until the fallback timeout expires. Returns immediately if the current +-- version already differs, so callers can sample before running an action and avoid +-- missing updates that land during the action. Returns 'Nothing' on timeout. +-- All threads blocked on the same 'EpochStateView' wake up on each update. +awaitStateUpdateTimeout + :: MonadIO m + => EpochStateView + -> DTC.NominalDiffTime -- ^ Fallback timeout + -> Word64 -- ^ Previously sampled version + -> m (Maybe (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) +awaitStateUpdateTimeout EpochStateView{epochStateVersion, epochStateView} timeout sinceVersion = runMaybeT $ fastResult <|> awaitedResult + where + -- Fast path: if the version already differs, read state and version atomically and return + -- without allocating a 'registerDelay' timer. This avoids accumulating timer-queue entries + -- when callers sample a stale version and an update has already landed. + fastResult = mapMaybeT atomically $ do + v <- lift $ readTVar epochStateVersion + guard $ v /= sinceVersion + lift $ readTVar epochStateView + + awaitedResult = MaybeT $ do + timedOutVar <- registerDelay . ceiling $ timeout * 1_000_000 + atomically $ do + v <- readTVar epochStateVersion + timedOut <- readTVar timedOutVar + case (v /= sinceVersion, timedOut) of + (True, _) -> Just <$> readTVar epochStateView + (_, True) -> pure Nothing + _ -> STM.retry + -- | Get epoch state from the view. If the state isn't available, retry waiting up to 25 seconds. Fails -- immediately if the background thread encountered an error, or after 25 seconds if not yet initialised. getEpochState @@ -266,9 +312,9 @@ getSlotNumber getSlotNumber epochStateView = withFrozenCallStack $ (\(_, slotNumber, _) -> slotNumber) <$> getEpochStateDetails epochStateView --- | Utility function for accessing epoch state in 'IORef'. --- Retries every 0.5s for up to 25 seconds while not initialised. --- Fails immediately if the background fold thread encountered an error. +-- | Access the current epoch state. Returns immediately if state is already available. +-- Blocks up to 25 seconds waiting for initialisation if the background thread has not yet +-- received any epoch state. Fails immediately if the background thread encountered an error. getEpochStateDetails :: HasCallStack => MonadAssertion m @@ -276,27 +322,46 @@ getEpochStateDetails => MonadIO m => EpochStateView -> m (AnyNewEpochState, SlotNo, BlockNo) -getEpochStateDetails EpochStateView{epochStateView} = - withFrozenCallStack $ do - deadline <- liftIO $ DTC.addUTCTime 25 <$> DTC.getCurrentTime - go deadline +getEpochStateDetails EpochStateView{epochStateView} = withFrozenCallStack $ + -- Fast path: read the TVar outside STM block so we don't register a pointless + -- 'initTimeoutSeconds' timer on every call. These getters run inside tight + -- retry loops, and the unused timer-queue entries would otherwise accumulate. + readTVarIO epochStateView + >>= awaitForState + >>= failEpochStateFoldError where - go deadline = do - result <- H.evalIO $ readIORef epochStateView - case result of - Left (EpochStateFoldError err) -> do - H.note_ $ "EpochStateView background thread failed: " <> docToString (prettyError err) - H.failure - Left EpochStateNotInitialised -> do - currentTime <- liftIO DTC.getCurrentTime - if currentTime < deadline - then do - H.threadDelay 500_000 - go deadline - else do - H.note_ "EpochStateView has not been initialised within 25 seconds" - H.failure - Right details -> pure details + initTimeoutSeconds :: Int + initTimeoutSeconds = 25 + + awaitForState + :: MonadIO n + => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) + -> n (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo)) + awaitForState = \case + Left EpochStateNotInitialised -> do + -- register delay only when we're starting to retry + timedOutVar <- registerDelay $ initTimeoutSeconds * 1_000_000 + atomically $ do + state' <- readTVar epochStateView + state' <$ case state' of + -- retry until timeout + Left EpochStateNotInitialised -> readTVar timedOutVar >>= guard + _ -> pure () + state -> pure state + + failEpochStateFoldError + :: (HasCallStack, MonadTest n) + => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) + -> n (AnyNewEpochState, SlotNo, BlockNo) + failEpochStateFoldError = \case + Right details -> pure details + Left (EpochStateFoldError err) -> do + H.note_ $ "EpochStateView background thread failed: " <> docToString (prettyError err) + H.failure + Left EpochStateNotInitialised -> do + H.note_ $ "EpochStateView has not been initialised within " <> show initTimeoutSeconds <> " seconds" + H.failure + -- | Create a background thread listening for new epoch states. New epoch states are available to access -- through 'EpochStateView', using query functions. @@ -311,16 +376,21 @@ getEpochStateView -> SocketPath -- ^ node socket path -> m EpochStateView getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do - epochStateView <- H.evalIO $ newIORef $ Left EpochStateNotInitialised + epochStateView <- H.evalIO $ newTVarIO $ Left EpochStateNotInitialised + epochStateVersion <- H.evalIO $ newTVarIO 0 void . asyncRegister_ $ do result <- runExceptT $ foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) () $ \epochState slotNumber blockNumber -> do - liftIOAnnotated . writeIORef epochStateView $ Right (epochState, slotNumber, blockNumber) + liftIOAnnotated . atomically $ do + writeTVar epochStateView $ Right (epochState, slotNumber, blockNumber) + modifyTVar' epochStateVersion (+ 1) pure ConditionNotMet case result of - Left err -> writeIORef epochStateView $ Left $ EpochStateFoldError err + Left err -> atomically $ do + writeTVar epochStateView $ Left $ EpochStateFoldError err + modifyTVar' epochStateVersion (+ 1) Right _ -> pure () - pure $ EpochStateView epochStateView + pure $ EpochStateView epochStateView epochStateVersion -- | Retrieve all UTxOs map from the epoch state view. findAllUtxos From 8149f0927323895e6138de1b332374c46493fb21 Mon Sep 17 00:00:00 2001 From: Jordan Millar Date: Thu, 23 Apr 2026 11:10:28 -0400 Subject: [PATCH 3/3] cardano-testnet | Extract EpochStateView into its own module Move EpochStateView, its background-thread setup, the STM-based wait primitive, and the retry loops out of Testnet.Components.Query and into a new module Testnet.Components.EpochStateView. The new module carries a top-down haddock that explains how the writer thread, the version counter, and awaitStateUpdateTimeout cooperate so that future readers can learn the mechanism from one place rather than piecing it together across Query.hs. Testnet.Components.Query re-exports the public API so existing callers do not need to change their imports. --- cardano-testnet/cardano-testnet.cabal | 1 + ....millar_extract_epoch_state_view_module.md | 7 + .../src/Testnet/Components/EpochStateView.hs | 407 ++++++++++++++++++ .../src/Testnet/Components/Query.hs | 321 +------------- 4 files changed, 424 insertions(+), 312 deletions(-) create mode 100644 cardano-testnet/changelog.d/20260423_120000_jordan.millar_extract_epoch_state_view_module.md create mode 100644 cardano-testnet/src/Testnet/Components/EpochStateView.hs diff --git a/cardano-testnet/cardano-testnet.cabal b/cardano-testnet/cardano-testnet.cabal index b628fee5811..bef8cf3d58c 100644 --- a/cardano-testnet/cardano-testnet.cabal +++ b/cardano-testnet/cardano-testnet.cabal @@ -111,6 +111,7 @@ library Parsers.Run Testnet.Blockfrost Testnet.Components.Configuration + Testnet.Components.EpochStateView Testnet.Components.Query Testnet.Defaults Testnet.EpochStateProcessing diff --git a/cardano-testnet/changelog.d/20260423_120000_jordan.millar_extract_epoch_state_view_module.md b/cardano-testnet/changelog.d/20260423_120000_jordan.millar_extract_epoch_state_view_module.md new file mode 100644 index 00000000000..f8df1524a83 --- /dev/null +++ b/cardano-testnet/changelog.d/20260423_120000_jordan.millar_extract_epoch_state_view_module.md @@ -0,0 +1,7 @@ +### Maintenance + +- Extracted `EpochStateView` and its retry loops from `Testnet.Components.Query` + into a new module `Testnet.Components.EpochStateView` with a haddock overview + of how the writer thread, version counter, and STM-based wait primitive fit + together. `Testnet.Components.Query` re-exports the public API so existing + callers are unaffected. diff --git a/cardano-testnet/src/Testnet/Components/EpochStateView.hs b/cardano-testnet/src/Testnet/Components/EpochStateView.hs new file mode 100644 index 00000000000..6137d619c0c --- /dev/null +++ b/cardano-testnet/src/Testnet/Components/EpochStateView.hs @@ -0,0 +1,407 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NumericUnderscores #-} + +{- | Shared epoch state view for integration tests. + +A background thread subscribes to the node via chain-sync (through 'foldEpochState') +and pushes the latest ledger state into a pair of 'TVar's each time a new block +arrives. Test code reads this view to inspect the chain state and polls for +conditions to become true. + +== How the pieces fit together + +'EpochStateView' holds two 'TVar's (private to this module): + +* The current @(AnyNewEpochState, SlotNo, BlockNo)@, or a status value while the + view is initialising or after the background thread has errored. +* A monotonically-increasing @Word64@ version counter, bumped on every write to + the state 'TVar'. This counter is the synchronisation channel between the + writer thread and any waiter: a caller records the current version, performs + its check, and then blocks on STM until the version differs. + +The writer is set up by 'getEpochStateView'. It launches a long-lived fold (via +'asyncRegister_') that runs the chain-sync client against the node. For every +block the node streams, it writes the derived @NewEpochState@ into the state +'TVar' and bumps the version. If the fold terminates with an error, it writes +the error status and bumps the version so any waiting threads can observe the +failure. + +== Retry loop + +'retryUntilRightM' is the core retry primitive. It takes an action that returns +@Either e a@ and a 'TestnetWaitPeriod' deadline expressed in chain units +(epochs\/blocks\/slots). Each iteration: + +1. Samples the current version /before/ running the action, so that updates + landing during the action are not missed. +2. Runs the action. On 'Right' it returns immediately. +3. On 'Left' it checks whether the chain-unit deadline has been exceeded; if so + it returns the last 'Left'. +4. Otherwise it blocks on STM until the version advances past the sampled + value, then loops. + +The STM block is performed by an internal helper that combines a fast path +(return immediately if the version already differs — common when the action +took long enough that a block landed during it) with an awaited path (register +a fallback timer and block on STM until either the version advances or the +timer fires). The fallback is a stall-detection heartbeat, not a wait duration +— its only job is to guarantee no single STM transaction blocks indefinitely. +What actually terminates the loop is either a successful action or the +chain-unit deadline. + +'retryUntilJustM', 'retryUntilM', 'waitForEpochs', and 'waitForBlocks' are thin +wrappers around 'retryUntilRightM'. +-} +module Testnet.Components.EpochStateView + ( -- * Shared epoch state + EpochStateView + , getEpochStateView + + -- * Reading the state + , getEpochState + , getEpochStateDetails + , getSlotNumber + , getBlockNumber + , getCurrentEpochNo + + -- * Waiting for state changes + , TestnetWaitPeriod (..) + , waitForEpochs + , waitForBlocks + , retryUntilRightM + , retryUntilJustM + , retryUntilM + ) where + +import Cardano.Api as Api +import Cardano.Api.Ledger (EpochInterval (..)) + +import qualified Cardano.Ledger.Shelley.LedgerState as L + +import Prelude + +import Control.Applicative ((<|>)) +import Control.Concurrent.STM (TVar, modifyTVar', newTVarIO, readTVar, writeTVar) +import qualified Control.Concurrent.STM as STM +import Control.Monad +import Control.Monad.Trans.Maybe (MaybeT (..), mapMaybeT, runMaybeT) +import Control.Monad.Trans.Resource +import qualified Data.Time.Clock as DTC +import Data.Word (Word64) +import GHC.Stack +import Lens.Micro ((^.)) + +import Testnet.Process.RunIO (liftIOAnnotated) +import Testnet.Runtime + +import Hedgehog +import qualified Hedgehog as H +import Hedgehog.Extras (MonadAssertion) +import qualified Hedgehog.Extras as H + +import UnliftIO.STM (atomically, readTVarIO, registerDelay) + +-- | Wait for the number of epochs +waitForEpochs + :: HasCallStack + => MonadTest m + => MonadAssertion m + => MonadIO m + => EpochStateView + -> EpochInterval -- ^ Number of epochs to wait + -> m EpochNo -- ^ The epoch number reached +waitForEpochs epochStateView interval = withFrozenCallStack $ do + void . retryUntilRightM epochStateView (WaitForEpochs interval) . pure $ Left () + getCurrentEpochNo epochStateView + +-- | Wait for the requested number of blocks +waitForBlocks + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> Word64 -- ^ Number of blocks to wait + -> m BlockNo -- ^ The block number reached +waitForBlocks epochStateView numberOfBlocks = withFrozenCallStack $ do + BlockNo startingBlockNumber <- getBlockNumber epochStateView + H.note_ $ "Current block number: " <> show startingBlockNumber <> ". " + <> "Waiting for " <> show numberOfBlocks <> " blocks" + void . retryUntilRightM epochStateView (WaitForBlocks numberOfBlocks) . pure $ Left () + getBlockNumber epochStateView + +-- | Deadline for 'retryUntilRightM' and its wrappers, expressed in chain units +-- rather than wall-clock time. Each iteration of the loop only advances when the +-- chain advances, so the deadline measures how much chain progress the caller is +-- willing to wait for. +data TestnetWaitPeriod + = WaitForEpochs EpochInterval + | WaitForBlocks Word64 + | WaitForSlots Word64 + deriving Eq + +instance Show TestnetWaitPeriod where + show = \case + WaitForEpochs (EpochInterval n) -> "WaitForEpochs " <> show n + WaitForBlocks n -> "WaitForBlocks " <> show n + WaitForSlots n -> "WaitForSlots " <> show n + +-- | Core retry loop. Returns early on 'Right'; on 'Left', blocks via STM until +-- the 'EpochStateView' is updated (with a safety fallback timeout) and retries. +-- Gives up and returns the last 'Left' once the 'TestnetWaitPeriod' deadline is +-- exceeded. +retryUntilRightM + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> TestnetWaitPeriod + -> m (Either e a) + -> m (Either e a) +retryUntilRightM esv timeout act = withFrozenCallStack $ do + startingValue <- getCurrentValue + go $ startingValue + timeoutW64 + where + go deadline = do + -- Sample the version before running 'act' so that any update landing during 'act' + -- makes 'awaitStateUpdateTimeout' return without blocking, rather than waiting for + -- the next update and adding a block/epoch of latency. + versionBeforeAct <- readTVarIO $ epochStateVersion esv + act >>= \case + r@(Right _) -> pure r + l@(Left _) -> do + cv <- getCurrentValue + if cv > deadline + then pure l + else awaitStateUpdateTimeout esv stallHeartbeatSeconds versionBeforeAct *> go deadline + + (getCurrentValue, timeoutW64) = case timeout of + WaitForEpochs (EpochInterval n) -> (unEpochNo <$> getCurrentEpochNo esv, fromIntegral n) + WaitForSlots n -> (unSlotNo <$> getSlotNumber esv, n) + WaitForBlocks n -> (unBlockNo <$> getBlockNumber esv, n) + + -- | Stall-detection heartbeat, not a wait duration. Releases the STM transaction at + -- most this often so the loop can re-check the chain-unit deadline; in normal + -- operation a block update lands first and the heartbeat never fires. + stallHeartbeatSeconds :: DTC.NominalDiffTime + stallHeartbeatSeconds = 300 + +-- | Retries the action until it returns 'Just' or the timeout is reached +retryUntilJustM + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> TestnetWaitPeriod -- ^ timeout for an operation + -> m (Maybe a) + -> m a +retryUntilJustM esv timeout act = withFrozenCallStack $ + retryUntilRightM esv timeout (maybe (Left ()) Right <$> act) >>= \case + Right a -> pure a + Left () -> do + H.note_ $ "Action did not result in 'Just' - waited for: " <> show timeout + H.failure + +-- | Like 'retryUntilJustM' but takes a plain action and a predicate instead of +-- an action returning 'Maybe'. On timeout, annotates the last value that failed +-- the predicate. Intermediate attempts produce no annotations. +retryUntilM + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => Show a + => EpochStateView + -> TestnetWaitPeriod -- ^ timeout + -> m a -- ^ action to retry + -> (a -> Bool) -- ^ predicate that must hold + -> m a +retryUntilM esv timeout act predicate = withFrozenCallStack $ + retryUntilRightM esv timeout ((\r -> if predicate r then Right r else Left r) <$> act) >>= \case + Right a -> pure a + Left r -> do + H.noteShow_ r + H.note_ $ "Predicate not satisfied after: " <> show timeout + H.failure + +-- | Status of the 'EpochStateView' background thread when epoch state is not yet available +data EpochStateStatus + = EpochStateNotInitialised + -- ^ The background thread has not yet received any epoch state from the node + | EpochStateFoldError !FoldBlocksError + -- ^ The background thread encountered an error while folding blocks + +-- | A read-only handle to an epoch state that is kept fresh by a background thread. +-- +-- The constructor is private. Reads go through the accessor functions exported from +-- this module ('getEpochState', 'getBlockNumber', 'getSlotNumber', +-- 'getEpochStateDetails', 'getCurrentEpochNo') so that callers cannot accidentally +-- race against the version-counter synchronisation contract described in the module +-- header. +data EpochStateView = EpochStateView + { epochStateView :: !(TVar (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) + , epochStateVersion :: !(TVar Word64) + } + +-- | Block until the epoch state version advances past the provided previously sampled +-- version, or until the fallback timeout expires. Returns immediately if the current +-- version already differs, so callers can sample before running an action and avoid +-- missing updates that land during the action. Returns 'Nothing' on timeout. +-- All threads blocked on the same 'EpochStateView' wake up on each update. +awaitStateUpdateTimeout + :: MonadIO m + => EpochStateView + -> DTC.NominalDiffTime -- ^ Fallback timeout + -> Word64 -- ^ Previously sampled version + -> m (Maybe (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) +awaitStateUpdateTimeout EpochStateView{epochStateVersion, epochStateView} timeout sinceVersion = runMaybeT $ fastResult <|> awaitedResult + where + -- Fast path: if the version already differs, read state and version atomically and return + -- without allocating a 'registerDelay' timer. This avoids accumulating timer-queue entries + -- when callers sample a stale version and an update has already landed. + fastResult = mapMaybeT atomically $ do + v <- lift $ readTVar epochStateVersion + guard $ v /= sinceVersion + lift $ readTVar epochStateView + + awaitedResult = MaybeT $ do + timedOutVar <- registerDelay . ceiling $ timeout * 1_000_000 + atomically $ do + v <- readTVar epochStateVersion + timedOut <- readTVar timedOutVar + case (v /= sinceVersion, timedOut) of + (True, _) -> Just <$> readTVar epochStateView + (_, True) -> pure Nothing + _ -> STM.retry + +-- | Get epoch state from the view. If the state isn't available, retry waiting up to 25 seconds. Fails +-- immediately if the background thread encountered an error, or after 25 seconds if not yet initialised. +getEpochState + :: HasCallStack + => MonadTest m + => MonadAssertion m + => MonadIO m + => EpochStateView + -> m AnyNewEpochState +getEpochState epochStateView = + withFrozenCallStack $ (\(nes, _, _) -> nes) <$> getEpochStateDetails epochStateView + +getBlockNumber + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> m BlockNo -- ^ The number of last produced block +getBlockNumber epochStateView = + withFrozenCallStack $ (\(_, _, blockNumber) -> blockNumber) <$> getEpochStateDetails epochStateView + +getSlotNumber + :: HasCallStack + => MonadIO m + => MonadTest m + => MonadAssertion m + => EpochStateView + -> m SlotNo -- ^ The current slot number +getSlotNumber epochStateView = + withFrozenCallStack $ (\(_, slotNumber, _) -> slotNumber) <$> getEpochStateDetails epochStateView + +-- | Access the current epoch state. Returns immediately if state is already available. +-- Blocks up to 25 seconds waiting for initialisation if the background thread has not yet +-- received any epoch state. Fails immediately if the background thread encountered an error. +getEpochStateDetails + :: HasCallStack + => MonadAssertion m + => MonadTest m + => MonadIO m + => EpochStateView + -> m (AnyNewEpochState, SlotNo, BlockNo) +getEpochStateDetails EpochStateView{epochStateView} = withFrozenCallStack $ + -- Fast path: read the TVar outside STM block so we don't register a pointless + -- 'initTimeoutSeconds' timer on every call. These getters run inside tight + -- retry loops, and the unused timer-queue entries would otherwise accumulate. + readTVarIO epochStateView + >>= awaitForState + >>= failEpochStateFoldError + where + initTimeoutSeconds :: Int + initTimeoutSeconds = 25 + + awaitForState + :: MonadIO n + => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) + -> n (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo)) + awaitForState = \case + Left EpochStateNotInitialised -> do + -- register delay only when we're starting to retry + timedOutVar <- registerDelay $ initTimeoutSeconds * 1_000_000 + atomically $ do + state' <- readTVar epochStateView + state' <$ case state' of + -- retry until timeout + Left EpochStateNotInitialised -> readTVar timedOutVar >>= guard + _ -> pure () + state -> pure state + + failEpochStateFoldError + :: (HasCallStack, MonadTest n) + => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) + -> n (AnyNewEpochState, SlotNo, BlockNo) + failEpochStateFoldError = \case + Right details -> pure details + Left (EpochStateFoldError err) -> do + H.note_ $ "EpochStateView background thread failed: " <> docToString (prettyError err) + H.failure + Left EpochStateNotInitialised -> do + H.note_ $ "EpochStateView has not been initialised within " <> show initTimeoutSeconds <> " seconds" + H.failure + + +-- | Create a background thread listening for new epoch states. New epoch states are available to access +-- through 'EpochStateView', using query functions. +-- The background thread captures any 'FoldBlocksError' into the shared state, so that consumers +-- (e.g. 'getEpochStateDetails') can fail immediately with a meaningful error message instead of +-- waiting for the full timeout. +getEpochStateView + :: HasCallStack + => MonadResource m + => MonadTest m + => NodeConfigFile In -- ^ node Yaml configuration file path + -> SocketPath -- ^ node socket path + -> m EpochStateView +getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do + epochStateView <- H.evalIO $ newTVarIO $ Left EpochStateNotInitialised + epochStateVersion <- H.evalIO $ newTVarIO 0 + void . asyncRegister_ $ do + result <- runExceptT $ foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) () + $ \epochState slotNumber blockNumber -> do + liftIOAnnotated . atomically $ do + writeTVar epochStateView $ Right (epochState, slotNumber, blockNumber) + modifyTVar' epochStateVersion (+ 1) + pure ConditionNotMet + case result of + Left err -> atomically $ do + writeTVar epochStateView $ Left $ EpochStateFoldError err + modifyTVar' epochStateVersion (+ 1) + Right _ -> pure () + pure $ EpochStateView epochStateView epochStateVersion + +-- | Return current-ish epoch number. +-- Because we're using Ledger's 'NewEpochState', the returned epoch number won't be reflecting the current +-- epoch number during the transiontion between the epochs. In other cases it will be the true number of the +-- current epoch. +getCurrentEpochNo + :: HasCallStack + => MonadAssertion m + => MonadIO m + => MonadTest m + => EpochStateView + -> m EpochNo +getCurrentEpochNo epochStateView = withFrozenCallStack $ do + AnyNewEpochState _ newEpochState _ <- getEpochState epochStateView + pure $ newEpochState ^. L.nesELL diff --git a/cardano-testnet/src/Testnet/Components/Query.hs b/cardano-testnet/src/Testnet/Components/Query.hs index 361b03829c3..44979e62de2 100644 --- a/cardano-testnet/src/Testnet/Components/Query.hs +++ b/cardano-testnet/src/Testnet/Components/Query.hs @@ -2,32 +2,31 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} module Testnet.Components.Query - ( EpochStateView + ( -- * Epoch state view (re-exported from "Testnet.Components.EpochStateView") + EpochStateView , getEpochStateView , getEpochState , getSlotNumber , getBlockNumber , getEpochStateDetails + , getCurrentEpochNo + , TestnetWaitPeriod (..) + , waitForEpochs + , waitForBlocks + , retryUntilJustM + , retryUntilM , getMinDRepDeposit , getMinGovActionDeposit , getGovState - , getCurrentEpochNo , getTreasuryValue - , TestnetWaitPeriod (..) - , waitForEpochs , waitUntilEpoch - , waitForBlocks - , retryUntilJustM - , retryUntilM , findAllUtxos , findUtxosWithAddress @@ -60,12 +59,6 @@ import qualified Cardano.Ledger.State as L import Prelude -import Control.Applicative ((<|>)) -import Control.Concurrent.STM (TVar, modifyTVar', newTVarIO, readTVar, writeTVar) -import qualified Control.Concurrent.STM as STM -import Control.Monad -import Control.Monad.Trans.Maybe (MaybeT (..), mapMaybeT, runMaybeT) -import Control.Monad.Trans.Resource import Data.List (sortOn) import qualified Data.Map as Map import Data.Map.Strict (Map) @@ -73,16 +66,13 @@ import Data.Maybe import Data.Ord (Down (..)) import qualified Data.Set as Set import qualified Data.Text as T -import qualified Data.Time.Clock as DTC import Data.Type.Equality -import Data.Word (Word64) import GHC.Exts (IsList (..)) import GHC.Stack import Lens.Micro (Lens', to, (^.)) -import Testnet.Process.RunIO (liftIOAnnotated) +import Testnet.Components.EpochStateView import Testnet.Property.Assert -import Testnet.Runtime import Testnet.Types import Hedgehog @@ -90,8 +80,6 @@ import qualified Hedgehog as H import Hedgehog.Extras (MonadAssertion) import qualified Hedgehog.Extras as H -import UnliftIO.STM (atomically, readTVarIO, registerDelay) - -- | Block and wait for the desired epoch. waitUntilEpoch :: HasCallStack @@ -116,282 +104,6 @@ waitUntilEpoch nodeConfigFile socketPath desiredEpoch = withFrozenCallStack $ do <> "- invalid foldEpochState behaviour, result: " <> show res H.failure --- | Wait for the number of epochs -waitForEpochs - :: HasCallStack - => MonadTest m - => MonadAssertion m - => MonadIO m - => EpochStateView - -> EpochInterval -- ^ Number of epochs to wait - -> m EpochNo -- ^ The epoch number reached -waitForEpochs epochStateView interval = withFrozenCallStack $ do - void . retryUntilRightM epochStateView (WaitForEpochs interval) . pure $ Left () - getCurrentEpochNo epochStateView - --- | Wait for the requested number of blocks -waitForBlocks - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => EpochStateView - -> Word64 -- ^ Number of blocks to wait - -> m BlockNo -- ^ The block number reached -waitForBlocks epochStateView numberOfBlocks = withFrozenCallStack $ do - BlockNo startingBlockNumber <- getBlockNumber epochStateView - H.note_ $ "Current block number: " <> show startingBlockNumber <> ". " - <> "Waiting for " <> show numberOfBlocks <> " blocks" - void . retryUntilRightM epochStateView (WaitForBlocks numberOfBlocks) . pure $ Left () - getBlockNumber epochStateView - -data TestnetWaitPeriod - = WaitForEpochs EpochInterval - | WaitForBlocks Word64 - | WaitForSlots Word64 - deriving Eq - -instance Show TestnetWaitPeriod where - show = \case - WaitForEpochs (EpochInterval n) -> "WaitForEpochs " <> show n - WaitForBlocks n -> "WaitForBlocks " <> show n - WaitForSlots n -> "WaitForSlots " <> show n - --- | Core retry loop. Returns early on 'Right'; on 'Left', blocks via STM until --- the 'EpochStateView' is updated (with a safety fallback timeout) and retries. --- Gives up and returns the last 'Left' once the 'TestnetWaitPeriod' deadline is --- exceeded. -retryUntilRightM - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => EpochStateView - -> TestnetWaitPeriod - -> m (Either e a) - -> m (Either e a) -retryUntilRightM esv timeout act = withFrozenCallStack $ do - startingValue <- getCurrentValue - go $ startingValue + timeoutW64 - where - go deadline = do - -- Sample the version before running 'act' so that any update landing during 'act' - -- makes 'awaitStateUpdateTimeout' return without blocking, rather than waiting for - -- the next update and adding a block/epoch of latency. - versionBeforeAct <- readTVarIO $ epochStateVersion esv - act >>= \case - r@(Right _) -> pure r - l@(Left _) -> do - cv <- getCurrentValue - if cv > deadline - then pure l - else awaitStateUpdateTimeout esv 300 versionBeforeAct *> go deadline - - (getCurrentValue, timeoutW64) = case timeout of - WaitForEpochs (EpochInterval n) -> (unEpochNo <$> getCurrentEpochNo esv, fromIntegral n) - WaitForSlots n -> (unSlotNo <$> getSlotNumber esv, n) - WaitForBlocks n -> (unBlockNo <$> getBlockNumber esv, n) - --- | Retries the action until it returns 'Just' or the timeout is reached -retryUntilJustM - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => EpochStateView - -> TestnetWaitPeriod -- ^ timeout for an operation - -> m (Maybe a) - -> m a -retryUntilJustM esv timeout act = withFrozenCallStack $ - retryUntilRightM esv timeout (maybe (Left ()) Right <$> act) >>= \case - Right a -> pure a - Left () -> do - H.note_ $ "Action did not result in 'Just' - waited for: " <> show timeout - H.failure - --- | Like 'retryUntilJustM' but takes a plain action and a predicate instead of --- an action returning 'Maybe'. On timeout, annotates the last value that failed --- the predicate. Intermediate attempts produce no annotations. -retryUntilM - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => Show a - => EpochStateView - -> TestnetWaitPeriod -- ^ timeout - -> m a -- ^ action to retry - -> (a -> Bool) -- ^ predicate that must hold - -> m a -retryUntilM esv timeout act predicate = withFrozenCallStack $ - retryUntilRightM esv timeout ((\r -> if predicate r then Right r else Left r) <$> act) >>= \case - Right a -> pure a - Left r -> do - H.noteShow_ r - H.note_ $ "Predicate not satisfied after: " <> show timeout - H.failure - --- | Status of the 'EpochStateView' background thread when epoch state is not yet available -data EpochStateStatus - = EpochStateNotInitialised - -- ^ The background thread has not yet received any epoch state from the node - | EpochStateFoldError !FoldBlocksError - -- ^ The background thread encountered an error while folding blocks - --- | A read-only mutable pointer to an epoch state, updated automatically -data EpochStateView = EpochStateView - { epochStateView :: !(TVar (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) - -- ^ Automatically updated current NewEpochState. 'Left' indicates the state is not yet available - -- (either not initialised or an error occurred). 'Right' contains the latest epoch state. - -- Use 'getEpochState', 'getBlockNumber', 'getSlotNumber' to access the values. - , epochStateVersion :: !(TVar Word64) - -- ^ Monotonically increasing counter, bumped on every state write. - -- Used by 'awaitStateUpdateTimeout' to block until the next update. - } - --- | Block until the epoch state version advances past the provided previously sampled --- version, or until the fallback timeout expires. Returns immediately if the current --- version already differs, so callers can sample before running an action and avoid --- missing updates that land during the action. Returns 'Nothing' on timeout. --- All threads blocked on the same 'EpochStateView' wake up on each update. -awaitStateUpdateTimeout - :: MonadIO m - => EpochStateView - -> DTC.NominalDiffTime -- ^ Fallback timeout - -> Word64 -- ^ Previously sampled version - -> m (Maybe (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo))) -awaitStateUpdateTimeout EpochStateView{epochStateVersion, epochStateView} timeout sinceVersion = runMaybeT $ fastResult <|> awaitedResult - where - -- Fast path: if the version already differs, read state and version atomically and return - -- without allocating a 'registerDelay' timer. This avoids accumulating timer-queue entries - -- when callers sample a stale version and an update has already landed. - fastResult = mapMaybeT atomically $ do - v <- lift $ readTVar epochStateVersion - guard $ v /= sinceVersion - lift $ readTVar epochStateView - - awaitedResult = MaybeT $ do - timedOutVar <- registerDelay . ceiling $ timeout * 1_000_000 - atomically $ do - v <- readTVar epochStateVersion - timedOut <- readTVar timedOutVar - case (v /= sinceVersion, timedOut) of - (True, _) -> Just <$> readTVar epochStateView - (_, True) -> pure Nothing - _ -> STM.retry - --- | Get epoch state from the view. If the state isn't available, retry waiting up to 25 seconds. Fails --- immediately if the background thread encountered an error, or after 25 seconds if not yet initialised. -getEpochState - :: HasCallStack - => MonadTest m - => MonadAssertion m - => MonadIO m - => EpochStateView - -> m AnyNewEpochState -getEpochState epochStateView = - withFrozenCallStack $ (\(nes, _, _) -> nes) <$> getEpochStateDetails epochStateView - -getBlockNumber - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => EpochStateView - -> m BlockNo -- ^ The number of last produced block -getBlockNumber epochStateView = - withFrozenCallStack $ (\(_, _, blockNumber) -> blockNumber) <$> getEpochStateDetails epochStateView - -getSlotNumber - :: HasCallStack - => MonadIO m - => MonadTest m - => MonadAssertion m - => EpochStateView - -> m SlotNo -- ^ The current slot number -getSlotNumber epochStateView = - withFrozenCallStack $ (\(_, slotNumber, _) -> slotNumber) <$> getEpochStateDetails epochStateView - --- | Access the current epoch state. Returns immediately if state is already available. --- Blocks up to 25 seconds waiting for initialisation if the background thread has not yet --- received any epoch state. Fails immediately if the background thread encountered an error. -getEpochStateDetails - :: HasCallStack - => MonadAssertion m - => MonadTest m - => MonadIO m - => EpochStateView - -> m (AnyNewEpochState, SlotNo, BlockNo) -getEpochStateDetails EpochStateView{epochStateView} = withFrozenCallStack $ - -- Fast path: read the TVar outside STM block so we don't register a pointless - -- 'initTimeoutSeconds' timer on every call. These getters run inside tight - -- retry loops, and the unused timer-queue entries would otherwise accumulate. - readTVarIO epochStateView - >>= awaitForState - >>= failEpochStateFoldError - where - initTimeoutSeconds :: Int - initTimeoutSeconds = 25 - - awaitForState - :: MonadIO n - => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) - -> n (Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo)) - awaitForState = \case - Left EpochStateNotInitialised -> do - -- register delay only when we're starting to retry - timedOutVar <- registerDelay $ initTimeoutSeconds * 1_000_000 - atomically $ do - state' <- readTVar epochStateView - state' <$ case state' of - -- retry until timeout - Left EpochStateNotInitialised -> readTVar timedOutVar >>= guard - _ -> pure () - state -> pure state - - failEpochStateFoldError - :: (HasCallStack, MonadTest n) - => Either EpochStateStatus (AnyNewEpochState, SlotNo, BlockNo) - -> n (AnyNewEpochState, SlotNo, BlockNo) - failEpochStateFoldError = \case - Right details -> pure details - Left (EpochStateFoldError err) -> do - H.note_ $ "EpochStateView background thread failed: " <> docToString (prettyError err) - H.failure - Left EpochStateNotInitialised -> do - H.note_ $ "EpochStateView has not been initialised within " <> show initTimeoutSeconds <> " seconds" - H.failure - - --- | Create a background thread listening for new epoch states. New epoch states are available to access --- through 'EpochStateView', using query functions. --- The background thread captures any 'FoldBlocksError' into the shared state, so that consumers --- (e.g. 'getEpochStateDetails') can fail immediately with a meaningful error message instead of --- waiting for the full timeout. -getEpochStateView - :: HasCallStack - => MonadResource m - => MonadTest m - => NodeConfigFile In -- ^ node Yaml configuration file path - -> SocketPath -- ^ node socket path - -> m EpochStateView -getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do - epochStateView <- H.evalIO $ newTVarIO $ Left EpochStateNotInitialised - epochStateVersion <- H.evalIO $ newTVarIO 0 - void . asyncRegister_ $ do - result <- runExceptT $ foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) () - $ \epochState slotNumber blockNumber -> do - liftIOAnnotated . atomically $ do - writeTVar epochStateView $ Right (epochState, slotNumber, blockNumber) - modifyTVar' epochStateVersion (+ 1) - pure ConditionNotMet - case result of - Left err -> atomically $ do - writeTVar epochStateView $ Left $ EpochStateFoldError err - modifyTVar' epochStateVersion (+ 1) - Right _ -> pure () - pure $ EpochStateView epochStateView epochStateVersion - -- | Retrieve all UTxOs map from the epoch state view. findAllUtxos :: forall era m. HasCallStack @@ -590,21 +302,6 @@ getMinDRepDeposit epochStateView ceo = withFrozenCallStack $ do govState <- getGovState epochStateView ceo pure $ conwayEraOnwardsConstraints ceo $ govState ^. L.cgsCurPParamsL . L.ppDRepDepositL . to L.unCoin --- | Return current-ish epoch number. --- Because we're using Ledger's 'NewEpochState', the returned epoch number won't be reflecting the current --- epoch number during the transiontion between the epochs. In other cases it will be the true number of the --- current epoch. -getCurrentEpochNo - :: HasCallStack - => MonadAssertion m - => MonadIO m - => MonadTest m - => EpochStateView - -> m EpochNo -getCurrentEpochNo epochStateView = withFrozenCallStack $ do - AnyNewEpochState _ newEpochState _ <- getEpochState epochStateView - pure $ newEpochState ^. L.nesELL - -- | Assert that the value pointed by the @lens@ in the epoch state is the same as the @expected@ value -- or it becomes the same within the @maxWait@ epochs. If the value is not reached within the time frame, -- the test fails.