diff --git a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md new file mode 100644 index 00000000000..c206686cc2c --- /dev/null +++ b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md @@ -0,0 +1,12 @@ +### Changed + +- The UPLC/PLC/PIR textual parser now rejects unquoted identifiers that + contain a `-` anywhere other than as the terminal numeric unique-suffix + separator (e.g. `pubKeyHash-305478r71`, `foo-bar`, `foo-123-456`) with + a dedicated `InvalidIdentifier` diagnostic that points directly at the + offending name and shows the full bad text. Previously the same inputs + silently mis-parsed — the prefix was taken as a name plus unique-suffix + and the remainder was picked up as an adjacent term — which surfaced as + a confusing "unexpected '(' expecting ')'" message far from the real + site (see #7742). To use such a string as a name verbatim, wrap it in + backticks: `` `pubKeyHash-305478r71` ``. diff --git a/plutus-core/plutus-core/src/PlutusCore/Error.hs b/plutus-core/plutus-core/src/PlutusCore/Error.hs index 6c4692c1599..e9ee8e74e35 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Error.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Error.hs @@ -52,6 +52,12 @@ data ParserError = BuiltinTypeNotAStar !T.Text !SourcePos | UnknownBuiltinFunction !T.Text !SourcePos ![T.Text] | InvalidBuiltinConstant !T.Text !T.Text !SourcePos + | {-| An unquoted identifier that violates the grammar: a '-' appeared + anywhere other than as the separator of a terminal numeric unique-suffix + (e.g. @pubKeyHash-305478r71@, @foo-bar@, @foo-123-456@). The 'Text' + carries the full offending text as it appeared in the source, so the + user sees their own name back in the diagnostic. -} + InvalidIdentifier !T.Text !SourcePos deriving stock (Eq, Ord, Generic) deriving anyclass (NFData) @@ -192,6 +198,18 @@ instance Pretty ParserError where <+> squotes (pretty s) <+> "at" <+> pretty loc + pretty (InvalidIdentifier txt loc) = + "Invalid identifier" + <+> squotes (pretty txt) + <+> "at" + <+> pretty loc + <> "." + <> hardline + <> "A '-' inside a name is the numeric unique-suffix separator and must be" + <+> "followed only by digits and a word boundary." + <> hardline + <> "To use this text as a name verbatim, quote it with backticks:" + <+> pretty ("`" <> txt <> "`") instance ShowErrorComponent ParserError where showErrorComponent = show . pretty diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs index 18d3be7e2bc..eb3154059e7 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs @@ -13,6 +13,7 @@ import Control.Monad.Except import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT) import Data.Map qualified as M +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text import Text.Megaparsec hiding (ParseError, State, parse, some) @@ -217,9 +218,33 @@ name = try $ parseUnquoted <|> parseQuoted where parseUnquoted :: Parser Name parseUnquoted = do + startOffset <- getOffset + startPos <- getSourcePos' _ <- lookAhead (satisfy isIdentifierStartingChar) + inputBefore <- getInput str <- takeWhileP (Just "identifier-unquoted") isIdentifierChar - Name str <$> uniqueSuffix str + u <- uniqueSuffix str + {- The parsed prefix is only a valid identifier if the next character is + a real word-boundary. If instead we see more identifier chars or another + '-', the user wrote something like `foo-bar` or `pubKeyHash-305478r71` — + the '-NNN' run we just treated as a unique-suffix was actually part of + their intended name (or they have a stray '-' at all). Fail with a + custom diagnostic that points at the whole offending identifier. -} + mBad <- optional (lookAhead (satisfy isNameExtensionChar)) + case mBad of + Nothing -> pure (Name str u) + Just _ -> do + -- Consume the remainder so the reported text covers the full name. + _ <- takeWhileP Nothing isNameExtensionChar + inputAfter <- getInput + let consumed = Text.length inputBefore - Text.length inputAfter + fullText = Text.take consumed inputBefore + parseError $ + FancyError startOffset $ + Set.singleton (ErrorCustom (InvalidIdentifier fullText startPos)) + + isNameExtensionChar :: Char -> Bool + isNameExtensionChar c = isIdentifierChar c || c == '-' parseQuoted :: Parser Name parseQuoted = do diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden new file mode 100644 index 00000000000..f851fa8de87 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden @@ -0,0 +1,7 @@ +test:1:21: + | +1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) + | ^ +Invalid identifier 'foo-123-456' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-123-456` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden new file mode 100644 index 00000000000..effdf9e6cb8 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden @@ -0,0 +1,7 @@ +test:1:21: + | +1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) + | ^ +Invalid identifier 'pubKeyHash-305478r71' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden new file mode 100644 index 00000000000..0ed707dbbf1 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden @@ -0,0 +1,7 @@ +test:1:21: + | +1 | (program 1.1.0 (lam foo-bar foo-bar)) + | ^ +Invalid identifier 'foo-bar' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-bar` diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index b6036a90b52..c2671111706 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -60,6 +60,9 @@ test_parsing = , propMissingConOperands , propInvalidKeyword , propBracketMismatch + , propInvalidIdentifierHyphenLetters + , propInvalidIdentifierHyphenWord + , propInvalidIdentifierDoubleUnique ] ] @@ -241,6 +244,49 @@ propBracketMismatch = "bracket-mismatch" "(program 1.1.0 [(var x))" +{- Note [Negative identifier-grammar tests] +The parser's name grammar treats '-NNN' purely as the numeric unique-suffix: +'foo-123' → Name "foo" (Unique 123). A '-' anywhere else in an identifier is +not allowed by the unquoted grammar (see 'isIdentifierChar' in +'PlutusCore.Name.Unique'). Several tools in the wild (e.g. Scalus 0.16.0's +'toUplcOptimized') emit names like 'pubKeyHash-305478r71' that violate this, +and today the parser mis-parses them in a way that surfaces as a confusing +error hundreds of lines away from the offending name — see issue #7742. + +The goldens below freeze the *current* (unhelpful) error output so that a +future diagnostic improvement shows up as an explicit golden-file diff. +When the parser is taught to point at the bad name itself, accept the new +goldens with 'scripts/regen-goldens.sh' (or '--accept'). -} + +{-| @pubKeyHash-305478r71@ — the exact shape Scalus 0.16.0 produces, inside a +binder. Current behaviour: the parser eats @pubKeyHash-305478@ as name+unique, +picks up @r71@ as the lam body, then fails far away on the next paren. -} +propInvalidIdentifierHyphenLetters :: TestTree +propInvalidIdentifierHyphenLetters = + testParseErrorGolden + "Invalid identifier: hyphen followed by digits then letters" + "invalid-identifier-hyphen-letters" + "(program 1.1.0 (lam pubKeyHash-305478r71 (lam x x)))" + +{-| @foo-bar@ — hyphen followed by non-digits. Current behaviour: the parser +stops at '-' (it is not in 'isIdentifierChar'), takes @foo@ as the name, and +then explodes on @-bar@ which is not a valid continuation anywhere. -} +propInvalidIdentifierHyphenWord :: TestTree +propInvalidIdentifierHyphenWord = + testParseErrorGolden + "Invalid identifier: hyphen followed by non-digits" + "invalid-identifier-hyphen-word" + "(program 1.1.0 (lam foo-bar foo-bar))" + +{-| @foo-123-456@ — ambiguous double '-NNN' run. Current behaviour: the first +@-123@ wins as the unique, @-456@ is left over and fails the next check. -} +propInvalidIdentifierDoubleUnique :: TestTree +propInvalidIdentifierDoubleUnique = + testParseErrorGolden + "Invalid identifier: double unique-suffix" + "invalid-identifier-double-unique" + "(program 1.1.0 (lam foo-123-456 foo-123-456))" + -------------------------------------------------------------------------------- -- Helper Functions ------------------------------------------------------------