From b9be0112c3a4a25af4447e267db0a342c0e5634d Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Fri, 24 Apr 2026 18:19:53 +0200 Subject: [PATCH 1/2] Add negative golden tests for invalid-identifier parse errors (#7742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Freeze the current (unhelpful) error output for three forms of invalid UPLC identifier: - `foo-bar` — hyphen followed by non-digits - `foo-123-456` — double `-NNN` suffix - `pubKeyHash-305478r71` — hyphen + digits + more letters (the shape Scalus 0.16.0's `toUplcOptimized` emits, from issue #7742) All three cases produce misleading diagnostics today — notably the Scalus case reports the error 8+ characters past the offending name. Capturing the status quo as goldens so that a follow-up improvement to name-parser diagnostics shows up as an explicit golden-file diff. --- .../invalid-identifier-double-unique.golden | 6 +++ .../invalid-identifier-hyphen-letters.golden | 6 +++ .../invalid-identifier-hyphen-word.golden | 6 +++ .../testlib/Generators/Spec.hs | 46 +++++++++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden create mode 100644 plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden new file mode 100644 index 00000000000..2c1561edb6b --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden @@ -0,0 +1,6 @@ +test:1:28: + | +1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) + | ^ +unexpected '-' +expecting '`', digit, opening bracket '[', or opening parenthesis '(' diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden new file mode 100644 index 00000000000..d0113c25b87 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden @@ -0,0 +1,6 @@ +test:1:42: + | +1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) + | ^ +unexpected '(' +expecting closing parenthesis ')' diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden new file mode 100644 index 00000000000..f7d50bb245d --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden @@ -0,0 +1,6 @@ +test:1:24: + | +1 | (program 1.1.0 (lam foo-bar foo-bar)) + | ^ +unexpected '-' +expecting '`', identifier-unquoted, opening bracket '[', or opening parenthesis '(' diff --git a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs index b6036a90b52..c2671111706 100644 --- a/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs +++ b/plutus-core/untyped-plutus-core/testlib/Generators/Spec.hs @@ -60,6 +60,9 @@ test_parsing = , propMissingConOperands , propInvalidKeyword , propBracketMismatch + , propInvalidIdentifierHyphenLetters + , propInvalidIdentifierHyphenWord + , propInvalidIdentifierDoubleUnique ] ] @@ -241,6 +244,49 @@ propBracketMismatch = "bracket-mismatch" "(program 1.1.0 [(var x))" +{- Note [Negative identifier-grammar tests] +The parser's name grammar treats '-NNN' purely as the numeric unique-suffix: +'foo-123' → Name "foo" (Unique 123). A '-' anywhere else in an identifier is +not allowed by the unquoted grammar (see 'isIdentifierChar' in +'PlutusCore.Name.Unique'). Several tools in the wild (e.g. Scalus 0.16.0's +'toUplcOptimized') emit names like 'pubKeyHash-305478r71' that violate this, +and today the parser mis-parses them in a way that surfaces as a confusing +error hundreds of lines away from the offending name — see issue #7742. + +The goldens below freeze the *current* (unhelpful) error output so that a +future diagnostic improvement shows up as an explicit golden-file diff. +When the parser is taught to point at the bad name itself, accept the new +goldens with 'scripts/regen-goldens.sh' (or '--accept'). -} + +{-| @pubKeyHash-305478r71@ — the exact shape Scalus 0.16.0 produces, inside a +binder. Current behaviour: the parser eats @pubKeyHash-305478@ as name+unique, +picks up @r71@ as the lam body, then fails far away on the next paren. -} +propInvalidIdentifierHyphenLetters :: TestTree +propInvalidIdentifierHyphenLetters = + testParseErrorGolden + "Invalid identifier: hyphen followed by digits then letters" + "invalid-identifier-hyphen-letters" + "(program 1.1.0 (lam pubKeyHash-305478r71 (lam x x)))" + +{-| @foo-bar@ — hyphen followed by non-digits. Current behaviour: the parser +stops at '-' (it is not in 'isIdentifierChar'), takes @foo@ as the name, and +then explodes on @-bar@ which is not a valid continuation anywhere. -} +propInvalidIdentifierHyphenWord :: TestTree +propInvalidIdentifierHyphenWord = + testParseErrorGolden + "Invalid identifier: hyphen followed by non-digits" + "invalid-identifier-hyphen-word" + "(program 1.1.0 (lam foo-bar foo-bar))" + +{-| @foo-123-456@ — ambiguous double '-NNN' run. Current behaviour: the first +@-123@ wins as the unique, @-456@ is left over and fails the next check. -} +propInvalidIdentifierDoubleUnique :: TestTree +propInvalidIdentifierDoubleUnique = + testParseErrorGolden + "Invalid identifier: double unique-suffix" + "invalid-identifier-double-unique" + "(program 1.1.0 (lam foo-123-456 foo-123-456))" + -------------------------------------------------------------------------------- -- Helper Functions ------------------------------------------------------------ From 39999a8092b45549c96751e21679f760f90722a3 Mon Sep 17 00:00:00 2001 From: Yuriy Lazaryev Date: Fri, 24 Apr 2026 18:28:58 +0200 Subject: [PATCH 2/2] Point parser diagnostics at the offending name (#7742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the unquoted-identifier parser finishes, require that the next char is a real word-boundary (not another identifier char and not another '-'). Otherwise the caller wrote something like `pubKeyHash-305478r71`, `foo-bar` or `foo-123-456`: the '-NNN' we just consumed as the numeric unique-suffix is not actually terminal, and the prefix interpretation would silently mis-parse. Consume the remainder of the extended identifier so the diagnostic can cite the full bad text, then raise a new `InvalidIdentifier` custom parser error with a caret on the start of the identifier and an actionable hint to quote it with backticks. For the original Scalus 0.16.0 HTLC reproducer this changes the error from `htlc.uplc:448:39: unexpected '(' expecting ')'` (on a lambda 8+ chars past the real site) to `htlc.uplc:447:41: Invalid identifier 'pubKeyHash-305478r71'` — on the offending name itself. The three negative goldens added in the previous commit are updated to the new message; all 3886 tests across plutus-core/untyped-plutus-core/ plutus-ir pass unchanged. --- ...aryev_issue_7742_uplc_parser_large_case.md | 12 +++++++++ .../plutus-core/src/PlutusCore/Error.hs | 18 +++++++++++++ .../src/PlutusCore/Parser/ParserCommon.hs | 27 ++++++++++++++++++- .../invalid-identifier-double-unique.golden | 9 ++++--- .../invalid-identifier-hyphen-letters.golden | 9 ++++--- .../invalid-identifier-hyphen-word.golden | 9 ++++--- 6 files changed, 71 insertions(+), 13 deletions(-) create mode 100644 plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md diff --git a/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md new file mode 100644 index 00000000000..c206686cc2c --- /dev/null +++ b/plutus-core/changelog.d/20260424_182803_yuriy.lazaryev_issue_7742_uplc_parser_large_case.md @@ -0,0 +1,12 @@ +### Changed + +- The UPLC/PLC/PIR textual parser now rejects unquoted identifiers that + contain a `-` anywhere other than as the terminal numeric unique-suffix + separator (e.g. `pubKeyHash-305478r71`, `foo-bar`, `foo-123-456`) with + a dedicated `InvalidIdentifier` diagnostic that points directly at the + offending name and shows the full bad text. Previously the same inputs + silently mis-parsed — the prefix was taken as a name plus unique-suffix + and the remainder was picked up as an adjacent term — which surfaced as + a confusing "unexpected '(' expecting ')'" message far from the real + site (see #7742). To use such a string as a name verbatim, wrap it in + backticks: `` `pubKeyHash-305478r71` ``. diff --git a/plutus-core/plutus-core/src/PlutusCore/Error.hs b/plutus-core/plutus-core/src/PlutusCore/Error.hs index 6c4692c1599..e9ee8e74e35 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Error.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Error.hs @@ -52,6 +52,12 @@ data ParserError = BuiltinTypeNotAStar !T.Text !SourcePos | UnknownBuiltinFunction !T.Text !SourcePos ![T.Text] | InvalidBuiltinConstant !T.Text !T.Text !SourcePos + | {-| An unquoted identifier that violates the grammar: a '-' appeared + anywhere other than as the separator of a terminal numeric unique-suffix + (e.g. @pubKeyHash-305478r71@, @foo-bar@, @foo-123-456@). The 'Text' + carries the full offending text as it appeared in the source, so the + user sees their own name back in the diagnostic. -} + InvalidIdentifier !T.Text !SourcePos deriving stock (Eq, Ord, Generic) deriving anyclass (NFData) @@ -192,6 +198,18 @@ instance Pretty ParserError where <+> squotes (pretty s) <+> "at" <+> pretty loc + pretty (InvalidIdentifier txt loc) = + "Invalid identifier" + <+> squotes (pretty txt) + <+> "at" + <+> pretty loc + <> "." + <> hardline + <> "A '-' inside a name is the numeric unique-suffix separator and must be" + <+> "followed only by digits and a word boundary." + <> hardline + <> "To use this text as a name verbatim, quote it with backticks:" + <+> pretty ("`" <> txt <> "`") instance ShowErrorComponent ParserError where showErrorComponent = show . pretty diff --git a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs index 18d3be7e2bc..eb3154059e7 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Parser/ParserCommon.hs @@ -13,6 +13,7 @@ import Control.Monad.Except import Control.Monad.Reader (ReaderT, ask, local, runReaderT) import Control.Monad.State (StateT, evalStateT) import Data.Map qualified as M +import Data.Set qualified as Set import Data.Text (Text) import Data.Text qualified as Text import Text.Megaparsec hiding (ParseError, State, parse, some) @@ -217,9 +218,33 @@ name = try $ parseUnquoted <|> parseQuoted where parseUnquoted :: Parser Name parseUnquoted = do + startOffset <- getOffset + startPos <- getSourcePos' _ <- lookAhead (satisfy isIdentifierStartingChar) + inputBefore <- getInput str <- takeWhileP (Just "identifier-unquoted") isIdentifierChar - Name str <$> uniqueSuffix str + u <- uniqueSuffix str + {- The parsed prefix is only a valid identifier if the next character is + a real word-boundary. If instead we see more identifier chars or another + '-', the user wrote something like `foo-bar` or `pubKeyHash-305478r71` — + the '-NNN' run we just treated as a unique-suffix was actually part of + their intended name (or they have a stray '-' at all). Fail with a + custom diagnostic that points at the whole offending identifier. -} + mBad <- optional (lookAhead (satisfy isNameExtensionChar)) + case mBad of + Nothing -> pure (Name str u) + Just _ -> do + -- Consume the remainder so the reported text covers the full name. + _ <- takeWhileP Nothing isNameExtensionChar + inputAfter <- getInput + let consumed = Text.length inputBefore - Text.length inputAfter + fullText = Text.take consumed inputBefore + parseError $ + FancyError startOffset $ + Set.singleton (ErrorCustom (InvalidIdentifier fullText startPos)) + + isNameExtensionChar :: Char -> Bool + isNameExtensionChar c = isIdentifierChar c || c == '-' parseQuoted :: Parser Name parseQuoted = do diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden index 2c1561edb6b..f851fa8de87 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-double-unique.golden @@ -1,6 +1,7 @@ -test:1:28: +test:1:21: | 1 | (program 1.1.0 (lam foo-123-456 foo-123-456)) - | ^ -unexpected '-' -expecting '`', digit, opening bracket '[', or opening parenthesis '(' + | ^ +Invalid identifier 'foo-123-456' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-123-456` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden index d0113c25b87..effdf9e6cb8 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-letters.golden @@ -1,6 +1,7 @@ -test:1:42: +test:1:21: | 1 | (program 1.1.0 (lam pubKeyHash-305478r71 (lam x x))) - | ^ -unexpected '(' -expecting closing parenthesis ')' + | ^ +Invalid identifier 'pubKeyHash-305478r71' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `pubKeyHash-305478r71` diff --git a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden index f7d50bb245d..0ed707dbbf1 100644 --- a/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden +++ b/plutus-core/untyped-plutus-core/test/Parser/Golden/invalid-identifier-hyphen-word.golden @@ -1,6 +1,7 @@ -test:1:24: +test:1:21: | 1 | (program 1.1.0 (lam foo-bar foo-bar)) - | ^ -unexpected '-' -expecting '`', identifier-unquoted, opening bracket '[', or opening parenthesis '(' + | ^ +Invalid identifier 'foo-bar' at test:1:21. +A '-' inside a name is the numeric unique-suffix separator and must be followed only by digits and a word boundary. +To use this text as a name verbatim, quote it with backticks: `foo-bar`