From 09fb5e38c2e0bcf10dfe109fe38afd9c05cb820a Mon Sep 17 00:00:00 2001 From: Jonathan Tatum Date: Mon, 29 Jun 2026 18:07:45 -0700 Subject: [PATCH] Draft: improve source mapping in policy compiler. Removes the odd behavior where we interpret source sections directly instead of the YAML-parsed value. PiperOrigin-RevId: 940136257 --- common/source.cc | 1 + policy/BUILD | 7 + policy/cel_policy.cc | 96 ++++- policy/cel_policy.h | 31 +- policy/cel_policy_parse_result.cc | 14 +- policy/compiler.cc | 65 +++- policy/compiler_test.cc | 338 ++++++++++++++++-- policy/internal/BUILD | 34 ++ policy/internal/alignment_table.cc | 51 +++ policy/internal/alignment_table.h | 54 +++ policy/internal/optimizer_expr_factory.cc | 16 +- policy/internal/optimizer_expr_factory.h | 6 + .../internal/optimizer_expr_factory_test.cc | 23 ++ .../internal/yaml_string_element_scanner.cc | 237 ++++++++++++ policy/internal/yaml_string_element_scanner.h | 43 +++ .../yaml_string_element_scanner_test.cc | 141 ++++++++ policy/yaml_policy_parser.cc | 13 + 17 files changed, 1111 insertions(+), 59 deletions(-) create mode 100644 policy/internal/alignment_table.cc create mode 100644 policy/internal/alignment_table.h create mode 100644 policy/internal/yaml_string_element_scanner.cc create mode 100644 policy/internal/yaml_string_element_scanner.h create mode 100644 policy/internal/yaml_string_element_scanner_test.cc diff --git a/common/source.cc b/common/source.cc index f268bc833..81b1ada9a 100644 --- a/common/source.cc +++ b/common/source.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/policy/BUILD b/policy/BUILD index 19195be2b..8ca993a93 100644 --- a/policy/BUILD +++ b/policy/BUILD @@ -27,10 +27,12 @@ cc_library( ], deps = [ "//common:source", + "//policy/internal:alignment_table", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", ], ) @@ -41,6 +43,7 @@ cc_test( ":cel_policy", "//common:source", "//internal:testing", + "//policy/internal:alignment_table", "@com_google_absl//absl/strings", ], ) @@ -84,6 +87,7 @@ cc_library( ":cel_policy_parser", "//common:source", "//internal:status_macros", + "//policy/internal:yaml_string_element_scanner", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -190,6 +194,7 @@ cc_test( ":compiler", ":yaml_policy_parser", "//common:ast", + "//common:ast_proto", "//common:decl", "//common:navigable_ast", "//common:source", @@ -211,6 +216,8 @@ cc_test( "//runtime:runtime_builder", "//runtime:runtime_options", "//runtime:standard_runtime_builder_factory", + "//tools:cel_unparser", + "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/status", "@com_google_absl//absl/status:status_matchers", "@com_google_absl//absl/status:statusor", diff --git a/policy/cel_policy.cc b/policy/cel_policy.cc index c2d97edeb..03d2b4dd9 100644 --- a/policy/cel_policy.cc +++ b/policy/cel_policy.cc @@ -27,6 +27,7 @@ #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "common/source.h" namespace cel { @@ -59,25 +60,100 @@ std::string IndentBlock(absl::string_view text) { void CelPolicySource::NoteSourcePosition(CelPolicyElementId id, SourcePosition position) { - source_positions_[id] = position; + source_info_[id].position = position; +} + +void CelPolicySource::NoteSourceRange( + CelPolicyElementId id, std::optional range, bool quoted, + std::vector alignment_table) { + ElementSourceInfo& info = source_info_[id]; + info.range = range; + info.quoted = quoted; + info.alignment_table = std::move(alignment_table); + if (range.has_value() && info.position == -1) { + info.position = range->begin; + } +} + +void CelPolicySource::NoteAlignmentTable( + CelPolicyElementId id, + std::vector alignment_table) { + source_info_[id].alignment_table = std::move(alignment_table); +} + +absl::Span +CelPolicySource::GetAlignmentTable(CelPolicyElementId id) const { + auto it = source_info_.find(id); + if (it == source_info_.end()) { + return {}; + } + return absl::MakeConstSpan(it->second.alignment_table); +} + +std::optional CelPolicySource::MapPosition( + CelPolicyElementId id, SourcePosition relative_position) const { + auto it = source_info_.find(id); + if (it != source_info_.end() && !it->second.alignment_table.empty()) { + const auto& points = it->second.alignment_table; + auto pt_it = std::upper_bound( + points.begin(), points.end(), relative_position, + [](SourcePosition pos, const policy_internal::AlignmentPoint& pt) { + return pos < pt.value_position; + }); + if (pt_it == points.begin()) { + return points.front().source_position + + (relative_position - points.front().value_position); + } + --pt_it; + return pt_it->source_position + (relative_position - pt_it->value_position); + } + std::optional range = GetSourceRange(id); + std::optional base = GetSourcePosition(id); + if (range.has_value()) { + base = range->begin; + } + if (id == -1 && !base.has_value()) { + base.emplace(0); + } + if (base.has_value()) { + return *base + relative_position; + } + return std::nullopt; } std::optional CelPolicySource::GetSourcePosition( CelPolicyElementId id) const { - auto it = source_positions_.find(id); - if (it == source_positions_.end()) { + auto it = source_info_.find(id); + if (it == source_info_.end() || it->second.position == -1) { + return std::nullopt; + } + return it->second.position; +} + +std::optional CelPolicySource::GetSourceRange( + CelPolicyElementId id) const { + auto it = source_info_.find(id); + if (it == source_info_.end() || !it->second.range.has_value()) { + return std::nullopt; + } + return it->second.range; +} + +std::optional CelPolicySource::IsQuoted(CelPolicyElementId id) const { + auto it = source_info_.find(id); + if (it == source_info_.end() || !it->second.range.has_value()) { return std::nullopt; } - return it->second; + return it->second.quoted; } std::optional CelPolicySource::GetSourceLocation( CelPolicyElementId id) const { - auto it = source_positions_.find(id); - if (it == source_positions_.end()) { + auto it = source_info_.find(id); + if (it == source_info_.end() || it->second.position == -1) { return std::nullopt; } - return policy_source_->GetLocation(it->second); + return policy_source_->GetLocation(it->second.position); } std::string CelPolicySource::DebugString() const { @@ -85,8 +161,10 @@ std::string CelPolicySource::DebugString() const { // Sort the source elements in descending order of position std::vector> sorted_positions; - for (const auto& pair : source_positions_) { - sorted_positions.push_back(pair); + for (const auto& [id, info] : source_info_) { + if (info.position != -1) { + sorted_positions.push_back({id, info.position}); + } } std::sort(sorted_positions.begin(), sorted_positions.end(), [](const auto& a, const auto& b) { diff --git a/policy/cel_policy.h b/policy/cel_policy.h index af8f7c977..8bf33cc81 100644 --- a/policy/cel_policy.h +++ b/policy/cel_policy.h @@ -28,12 +28,21 @@ #include "absl/container/flat_hash_map.h" #include "absl/log/absl_check.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" #include "common/source.h" +#include "policy/internal/alignment_table.h" namespace cel { using CelPolicyElementId = int32_t; +struct ElementSourceInfo { + SourcePosition position = -1; + std::optional range; + bool quoted = false; + std::vector alignment_table; +}; + class CelPolicySource { public: explicit CelPolicySource(cel::SourcePtr policy_source) @@ -41,17 +50,37 @@ class CelPolicySource { const Source* absl_nonnull content() const { return policy_source_.get(); } + // Note: NoteSource* and NoteAlignmentTable methods are intended for internal + // use only and are not supported for clients outside of the policy package. void NoteSourcePosition(CelPolicyElementId id, SourcePosition position); + void NoteSourceRange( + CelPolicyElementId id, std::optional range, bool quoted, + std::vector alignment_table = {}); + + void NoteAlignmentTable( + CelPolicyElementId id, + std::vector alignment_table); + std::optional GetSourcePosition(CelPolicyElementId id) const; + std::optional GetSourceRange(CelPolicyElementId id) const; + + std::optional IsQuoted(CelPolicyElementId id) const; + + absl::Span GetAlignmentTable( + CelPolicyElementId id) const; + + std::optional MapPosition( + CelPolicyElementId id, SourcePosition relative_position) const; + std::optional GetSourceLocation(CelPolicyElementId id) const; std::string DebugString() const; private: cel::SourcePtr policy_source_; - absl::flat_hash_map source_positions_; + absl::flat_hash_map source_info_; }; class ValueString { diff --git a/policy/cel_policy_parse_result.cc b/policy/cel_policy_parse_result.cc index 32d6431bb..b9e0f510e 100644 --- a/policy/cel_policy_parse_result.cc +++ b/policy/cel_policy_parse_result.cc @@ -52,15 +52,11 @@ std::string CelPolicyIssue::ToDisplayString( std::string snippet; if (source != nullptr) { if (relative_position_) { - std::optional base = - source->GetSourcePosition(element_id_); - if (element_id_ == -1) { - base.emplace(0); - } - if (base) { - location = source->content() - ->GetLocation(*base + *relative_position_) - .value_or(SourceLocation{}); + if (std::optional abs_pos = + source->MapPosition(element_id_, *relative_position_); + abs_pos.has_value()) { + location = + source->content()->GetLocation(*abs_pos).value_or(SourceLocation{}); } } else { location = diff --git a/policy/compiler.cc b/policy/compiler.cc index 7a892447c..45a87e564 100644 --- a/policy/compiler.cc +++ b/policy/compiler.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -157,11 +158,19 @@ class IntermediateCompiledPolicy { void set_semantics(RuleSemantics semantics) { semantics_ = semantics; } RuleSemantics semantics() const { return semantics_; } + void set_policy_source(const CelPolicySource* absl_nullable src) { + policy_source_ = src; + } + const CelPolicySource* absl_nullable policy_source() const { + return policy_source_; + } + private: std::string name_; std::string display_name_; std::string description_; RuleSemantics semantics_ = RuleSemantics::kFirstMatch; + const CelPolicySource* absl_nullable policy_source_ = nullptr; CompiledRule root_rule_; }; @@ -315,6 +324,19 @@ class PolicyCompiler { return src_->content()->description(); } + absl::StatusOr CompileExpression(CelPolicyElementId id, + absl::string_view val, + const Compiler* env) { + CEL_ASSIGN_OR_RETURN( + auto source, cel::NewSource(val, std::string(GetSourceDescription()))); + auto result = env->Compile(*source, &arena_); + if (!result.ok()) { + return result; + } + result->SetSource(std::move(source)); + return result; + } + void AdaptTypeCheckIssues(CelPolicyElementId id, const ValidationResult& r) { const Source* source = r.GetSource(); @@ -336,8 +358,8 @@ class PolicyCompiler { const cel::OutputBlock& output_block, const Compiler* env) { CompiledOutputBlock output; CEL_ASSIGN_OR_RETURN(auto output_validation, - env->Compile(output_block.output().value(), - GetSourceDescription(), &arena_)); + CompileExpression(output_block.output().id(), + output_block.output().value(), env)); AdaptTypeCheckIssues(output_block.output().id(), output_validation); cel::Type result_type = DynType(); @@ -352,9 +374,10 @@ class PolicyCompiler { } } if (output_block.explanation().has_value()) { - CEL_ASSIGN_OR_RETURN(auto explanation_validation, - env->Compile(output_block.explanation()->value(), - GetSourceDescription(), &arena_)); + CEL_ASSIGN_OR_RETURN( + auto explanation_validation, + CompileExpression(output_block.explanation()->id(), + output_block.explanation()->value(), env)); AdaptTypeCheckIssues(output_block.explanation()->id(), explanation_validation); if (explanation_validation.IsValid()) { @@ -378,8 +401,8 @@ class PolicyCompiler { c_match.id = match.id(); if (match.condition().has_value()) { CEL_ASSIGN_OR_RETURN(auto validation, - env->Compile(match.condition()->value(), - GetSourceDescription(), &arena_)); + CompileExpression(match.condition()->id(), + match.condition()->value(), env)); AdaptTypeCheckIssues(match.condition()->id(), validation); if (validation.IsValid()) { CEL_ASSIGN_OR_RETURN(auto ast, validation.ReleaseAst()); @@ -422,9 +445,10 @@ class PolicyCompiler { continue; } std::string ident = absl::StrCat("variables.", name); - CEL_ASSIGN_OR_RETURN(auto validation, - env->Compile(variable.expression().value(), - GetSourceDescription(), &arena_)); + CEL_ASSIGN_OR_RETURN( + auto validation, + CompileExpression(variable.expression().id(), + variable.expression().value(), env)); AdaptTypeCheckIssues(variable.expression().id(), validation); if (!validation.IsValid()) { continue; @@ -480,6 +504,7 @@ class PolicyCompiler { absl::Status CompilePolicy(const CelPolicy& policy, IntermediateCompiledPolicy* out) { src_ = policy.source(); + out->set_policy_source(src_); out->set_semantics(RuleSemantics::kFirstMatch); out->set_name(policy.name().value()); out->set_display_name( @@ -513,6 +538,15 @@ class FirstMatchComposer { std::unique_ptr ReleaseAst() { return std::move(ast_); } private: + auto GetPositionMapper(CelPolicyElementId id) const { + return [this, id](SourcePosition pos) -> std::optional { + if (icp_.policy_source() == nullptr) { + return std::nullopt; + } + return icp_.policy_source()->MapPosition(id, pos); + }; + } + using VariableScope = absl::flat_hash_map; std::optional ResolvePolicyVariable(absl::string_view reference); @@ -733,7 +767,8 @@ absl::StatusOr FirstMatchComposer::ComposeRule(const CompiledRule& rule, MapVariables(condition); factory_.StartCopyContext(); auto copy = factory_.Copy(condition.root_expr()); - auto source_info = factory_.RemapSourceInfo(condition.source_info()); + auto source_info = factory_.RemapSourceInfo( + condition.source_info(), GetPositionMapper(match.condition->id)); factory_.MergeSourceInfo(source_info); *insertion_point = factory_.NewCall("_?_:_", std::move(copy)); insertion_point->mutable_call_expr().mutable_args().push_back( @@ -792,7 +827,8 @@ absl::StatusOr FirstMatchComposer::ComposeProduction( MapVariables(ast); factory_.StartCopyContext(); Expr to_insert = factory_.Copy(ast.root_expr()); - auto source_info = factory_.RemapSourceInfo(ast.source_info()); + auto source_info = factory_.RemapSourceInfo(ast.source_info(), + GetPositionMapper(output_ast.id)); factory_.MergeSourceInfo(source_info); insertion_expr = std::move(to_insert); @@ -832,8 +868,9 @@ void FirstMatchComposer::ComposeRuleVariables(const CompiledRule& rule, MapVariables(ast); factory_.StartCopyContext(); auto insertion = factory_.Copy(ast.root_expr()); - // TODO(b/506179116): apply the position offsets here. - auto info = factory_.RemapSourceInfo(ast.source_info()); + auto info = factory_.RemapSourceInfo(ast.source_info(), + GetPositionMapper(variable.ast.id)); + factory_.MergeSourceInfo(info); ABSL_DCHECK(init.has_list_expr()); int index = init.mutable_list_expr().elements().size(); init.mutable_list_expr().mutable_elements().push_back( diff --git a/policy/compiler_test.cc b/policy/compiler_test.cc index 8db494b45..fdd705fbf 100644 --- a/policy/compiler_test.cc +++ b/policy/compiler_test.cc @@ -19,11 +19,13 @@ #include #include +#include "absl/log/absl_check.h" #include "absl/status/status.h" #include "absl/status/status_matchers.h" #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "common/ast.h" +#include "common/ast_proto.h" #include "common/decl.h" #include "common/navigable_ast.h" #include "common/source.h" @@ -50,6 +52,7 @@ #include "runtime/runtime_builder.h" #include "runtime/runtime_options.h" #include "runtime/standard_runtime_builder_factory.h" +#include "tools/cel_unparser.h" #include "google/protobuf/arena.h" #include "google/protobuf/descriptor.h" @@ -112,6 +115,21 @@ absl::StatusOr> ParsePolicyFromYaml( return parse_result.ReleasePolicy(); } +std::unique_ptr BuildTestRuntime() { + cel::RuntimeOptions opts; + opts.enable_qualified_type_identifiers = true; + absl::StatusOr rt_builder_or = + cel::CreateStandardRuntimeBuilder( + cel::internal::GetSharedTestingDescriptorPool(), opts); + ABSL_CHECK_OK(rt_builder_or.status()); + cel::RuntimeBuilder rt_builder = std::move(*rt_builder_or); + ABSL_CHECK_OK(cel::extensions::EnableOptionalTypes(rt_builder)); + absl::StatusOr> rt_or = + std::move(rt_builder).Build(); + ABSL_CHECK_OK(rt_or.status()); + return std::move(*rt_or); +} + TEST(CompilerTest, SmokeTest) { std::string contents; std::string test_file = @@ -189,6 +207,163 @@ name: cel_policy testing::HasSubstr("undeclared reference")); } +TEST(CompilerTest, DisplayErrorFormattingForInvalidExpression) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: undeclared_var +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:5:15: undeclared reference to 'undeclared_var' (in container '') + | output: undeclared_var + | ..............^)err")); +} + +TEST(CompilerTest, DisplayErrorFormattingForMultilineBlockLiteral) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: | + undeclared_var +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:6:9: undeclared reference to 'undeclared_var' (in container '') + | undeclared_var + | ........^)err")); +} + +TEST(CompilerTest, DisplayErrorFormattingForDoubleQuotedExpression) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: "undeclared_var" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:5:16: undeclared reference to 'undeclared_var' (in container '') + | output: "undeclared_var" + | ...............^)err")); +} + +TEST(CompilerTest, ComposedAstPositionsAreRelativeToMainYaml) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + variables: + - name: v1 + expression: 10 + match: + - condition: variables.v1 > 5 + output: "variables.v1 + 20" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()); + const cel::Ast* ast = result.GetAst(); + ASSERT_NE(ast, nullptr); + const cel::Source* source = result.GetSource()->content(); + ASSERT_NE(source, nullptr); + + auto nav_ast = cel::NavigableAst::Build(ast->root_expr()); + const cel::NavigableAstNode* var_node = nullptr; + const cel::NavigableAstNode* cond_node = nullptr; + const cel::NavigableAstNode* out_node = nullptr; + + for (const cel::NavigableAstNode& node : + nav_ast.Root().DescendantsPostorder()) { + if (node.expr()->has_const_expr() && + node.expr()->const_expr().has_int_value() && + node.expr()->const_expr().int_value() == 10) { + var_node = &node; + } else if (node.expr()->has_call_expr() && + node.expr()->call_expr().function() == "_>_") { + cond_node = &node; + } else if (node.expr()->has_call_expr() && + node.expr()->call_expr().function() == "_+_") { + out_node = &node; + } + } + + ASSERT_NE(var_node, nullptr); + ASSERT_NE(cond_node, nullptr); + ASSERT_NE(out_node, nullptr); + + auto var_pos = ast->source_info().positions().find(var_node->expr()->id()); + ASSERT_NE(var_pos, ast->source_info().positions().end()); + auto var_loc = source->GetLocation(var_pos->second); + ASSERT_TRUE(var_loc.has_value()); + EXPECT_THAT( + source->DisplayErrorLocation(*var_loc), + testing::HasSubstr(" expression: 10\n | ..................^")); + + auto cond_pos = ast->source_info().positions().find(cond_node->expr()->id()); + ASSERT_NE(cond_pos, ast->source_info().positions().end()); + auto cond_loc = source->GetLocation(cond_pos->second); + ASSERT_TRUE(cond_loc.has_value()); + EXPECT_THAT(source->DisplayErrorLocation(*cond_loc), + testing::HasSubstr(" - condition: variables.v1 > 5\n | " + "..............................^")); + + auto out_pos = ast->source_info().positions().find(out_node->expr()->id()); + ASSERT_NE(out_pos, ast->source_info().positions().end()); + auto out_loc = source->GetLocation(out_pos->second); + ASSERT_TRUE(out_loc.has_value()); + EXPECT_THAT(source->DisplayErrorLocation(*out_loc), + testing::HasSubstr(" output: \"variables.v1 + 20\"\n | " + "............................^")); +} + +TEST(CompilerTest, UnparseComposedAstWithMacros) { + absl::string_view yaml = R"yaml(name: macro_policy +rule: + variables: + - name: var_inner + expression: "[1, 2].all(i, i > 0)" + - name: var_outer + expression: "[3, 4].exists(j, j > 0 && variables.var_inner)" + match: + - condition: "[5, 6].all(k, k > 0)" + output: "[1].map(m, m > 0 && variables.var_outer)" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()) << result.FormatIssues(); + const cel::Ast* ast = result.GetAst(); + ASSERT_NE(ast, nullptr); + + cel::expr::ParsedExpr parsed_expr; + ASSERT_THAT(cel::AstToParsedExpr(*ast, &parsed_expr), IsOk()); + ASSERT_OK_AND_ASSIGN(std::string unparsed, + google::api::expr::Unparse(parsed_expr)); + + EXPECT_EQ( + unparsed, + "cel.@block([[1, 2].all(i, i > 0), [3, 4].exists(j, j > 0 && @index0)], " + "[5, 6].all(k, k > 0) ? optional.of([1].map(m, m > 0 && @index1)) : " + "optional.none())"); +} + TEST(CompilerTest, UnreachableMatchAfterTriviallyTrueCondition) { absl::string_view yaml = R"yaml( name: cel_policy @@ -321,18 +496,7 @@ TEST_P(PolicyEvaluationTest, Evaluate) { ASSERT_TRUE(validation_result.IsValid()); ASSERT_OK_AND_ASSIGN(auto ast, validation_result.ReleaseAst()); - // Set up runtime - cel::RuntimeOptions opts; - opts.enable_qualified_type_identifiers = true; - ASSERT_OK_AND_ASSIGN( - cel::RuntimeBuilder rt_builder, - cel::CreateStandardRuntimeBuilder( - cel::internal::GetSharedTestingDescriptorPool(), opts)); - ASSERT_THAT(cel::extensions::EnableOptionalTypes(rt_builder), IsOk()); - - ASSERT_OK_AND_ASSIGN(std::unique_ptr runtime, - std::move(rt_builder).Build()); - + std::unique_ptr runtime = BuildTestRuntime(); ASSERT_OK_AND_ASSIGN(auto program, runtime->CreateProgram(std::move(ast))); // Set up activation @@ -877,17 +1041,7 @@ TEST_P(UnnestedDeepPolicyEvaluationTest, Evaluate) { ASSERT_TRUE(result.IsValid()); ASSERT_OK_AND_ASSIGN(auto ast, result.ReleaseAst()); - // Set up runtime - cel::RuntimeOptions opts; - opts.enable_qualified_type_identifiers = true; - ASSERT_OK_AND_ASSIGN( - cel::RuntimeBuilder rt_builder, - cel::CreateStandardRuntimeBuilder( - cel::internal::GetSharedTestingDescriptorPool(), opts)); - ASSERT_THAT(cel::extensions::EnableOptionalTypes(rt_builder), IsOk()); - ASSERT_OK_AND_ASSIGN(std::unique_ptr runtime, - std::move(rt_builder).Build()); - + std::unique_ptr runtime = BuildTestRuntime(); ASSERT_OK_AND_ASSIGN(auto program, runtime->CreateProgram(std::move(ast))); cel::Activation activation; @@ -942,5 +1096,143 @@ name: cel_policy nav_ast.Root().expr()->call_expr().function() == "cel.@block"); EXPECT_TRUE(nav_ast.Root().expr()->has_const_expr()); } + +TEST(CompilerTest, BlockScalarConditionCompilesWithoutIndentationErrors) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: | + true && + false + output: '"ok"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_TRUE(result.IsValid()) << result.FormatIssues(); +} + +TEST(CompilerTest, MultilineStringInBlockScalar) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: | + '''multiline + string''' == "multiline\nstring" + output: '"matched"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()) << result.FormatIssues(); + + ASSERT_OK_AND_ASSIGN(auto ast, result.ReleaseAst()); + std::unique_ptr runtime = BuildTestRuntime(); + ASSERT_OK_AND_ASSIGN(auto program, runtime->CreateProgram(std::move(ast))); + + cel::Activation activation; + google::protobuf::Arena arena; + ASSERT_OK_AND_ASSIGN(cel::Value res, program->Evaluate(&arena, activation)); + EXPECT_THAT(res, OptionalValueIs(StringValueIs("matched"))); +} + +TEST(CompilerTest, MultilineStringInFoldedBlockScalar) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: > + '''multiline + string''' == "multiline string" + output: '"matched"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()) << result.FormatIssues(); + + ASSERT_OK_AND_ASSIGN(auto ast, result.ReleaseAst()); + std::unique_ptr runtime = BuildTestRuntime(); + ASSERT_OK_AND_ASSIGN(auto program, runtime->CreateProgram(std::move(ast))); + + cel::Activation activation; + google::protobuf::Arena arena; + ASSERT_OK_AND_ASSIGN(cel::Value res, program->Evaluate(&arena, activation)); + EXPECT_THAT(res, OptionalValueIs(StringValueIs("matched"))); +} + +TEST(CompilerTest, TypeErrorInMultilineBlockScalarReportsExactYamlLocation) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: | + true && + 1 + "string_on_line_8" + output: '"ok"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:8:9: found no matching overload for '_+_' applied to '(int, string)' + | 1 + "string_on_line_8" + | ........^)err")); +} + +TEST(CompilerTest, TypeErrorAfterEscapeSequenceInQuotedScalar) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: "true && \u0020 1 + 'string'" + output: '"ok"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:6:34: found no matching overload for '_+_' applied to '(int, string)' + | - condition: "true && \u0020 1 + 'string'" + | .................................^)err")); +} + +TEST(CompilerTest, TypeErrorInFoldedBlockScalarReportsExactYamlLocation) { + absl::string_view yaml = R"yaml( +name: cel_policy +rule: + id: test_rule + match: + - condition: > + true && + false && + 1 + "string_on_line_9" + output: '"ok"' +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:9:9: found no matching overload for '_+_' applied to '(int, string)' + | 1 + "string_on_line_9" + | ........^)err")); +} + } // namespace } // namespace cel diff --git a/policy/internal/BUILD b/policy/internal/BUILD index 30f43d431..b7420105d 100644 --- a/policy/internal/BUILD +++ b/policy/internal/BUILD @@ -3,6 +3,15 @@ load("@rules_cc//cc:cc_test.bzl", "cc_test") package(default_visibility = ["//visibility:public"]) +cc_library( + name = "alignment_table", + srcs = ["alignment_table.cc"], + hdrs = ["alignment_table.h"], + deps = [ + "//common:source", + ], +) + cc_library( name = "issue_reporter", srcs = ["issue_reporter.cc"], @@ -33,6 +42,7 @@ cc_library( "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/functional:function_ref", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:optional", ], @@ -66,3 +76,27 @@ cc_test( "@com_google_absl//absl/types:span", ], ) + +cc_library( + name = "yaml_string_element_scanner", + srcs = ["yaml_string_element_scanner.cc"], + hdrs = ["yaml_string_element_scanner.h"], + deps = [ + ":alignment_table", + "//common:source", + "//internal:utf8", + "@com_google_absl//absl/strings:string_view", + ], +) + +cc_test( + name = "yaml_string_element_scanner_test", + srcs = ["yaml_string_element_scanner_test.cc"], + deps = [ + ":alignment_table", + ":yaml_string_element_scanner", + "//common:source", + "//internal:testing", + "@com_google_absl//absl/strings:string_view", + ], +) diff --git a/policy/internal/alignment_table.cc b/policy/internal/alignment_table.cc new file mode 100644 index 000000000..d76055320 --- /dev/null +++ b/policy/internal/alignment_table.cc @@ -0,0 +1,51 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "policy/internal/alignment_table.h" + +#include +#include +#include + +#include "common/source.h" + +namespace cel::policy_internal { + +AlignmentTable::AlignmentTable(std::vector points) + : points_(std::move(points)) {} + +SourcePosition AlignmentTable::MapPosition(SourcePosition val_position) const { + if (points_.empty()) { + return val_position; + } + auto it = std::upper_bound(points_.begin(), points_.end(), val_position, + [](SourcePosition pos, const AlignmentPoint& pt) { + return pos < pt.value_position; + }); + if (it == points_.begin()) { + return points_.front().source_position + + (val_position - points_.front().value_position); + } + --it; + return it->source_position + (val_position - it->value_position); +} + +AlignmentTable AlignmentTable::CreateOffset(SourcePosition offset) { + if (offset == 0) { + return AlignmentTable(); + } + return AlignmentTable({AlignmentPoint{0, offset}}); +} + +} // namespace cel::policy_internal diff --git a/policy/internal/alignment_table.h b/policy/internal/alignment_table.h new file mode 100644 index 000000000..52fb43e82 --- /dev/null +++ b/policy/internal/alignment_table.h @@ -0,0 +1,54 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_ALIGNMENT_TABLE_H_ +#define THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_ALIGNMENT_TABLE_H_ + +#include +#include + +#include "common/source.h" + +namespace cel::policy_internal { + +struct AlignmentPoint { + SourcePosition value_position = -1; // Interpreted code point offset in val + SourcePosition source_position = -1; // Code point offset in raw YAML doc +}; + +class AlignmentTable { + public: + AlignmentTable() = default; + explicit AlignmentTable(std::vector points); + + // Maps a SourcePosition in the interpreted CEL expression (val) to a + // SourcePosition in the underlying YAML source document. + SourcePosition MapPosition(SourcePosition val_position) const; + + bool empty() const { return points_.empty(); } + const std::vector& points() const { return points_; } + std::vector release() { return std::move(points_); } + + static AlignmentTable CreateOffset(SourcePosition offset); + + private: + // A sparse table mapping code point offsets in the expression to offsets + // in the YAML document. Only records entries where the relative delta + // (source_position - value_position) shifts. + std::vector points_; +}; + +} // namespace cel::policy_internal + +#endif // THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_ALIGNMENT_TABLE_H_ diff --git a/policy/internal/optimizer_expr_factory.cc b/policy/internal/optimizer_expr_factory.cc index 6c89ae958..56d8bf196 100644 --- a/policy/internal/optimizer_expr_factory.cc +++ b/policy/internal/optimizer_expr_factory.cc @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -219,8 +220,8 @@ ExprId OptimizerExprFactory::CopyId(ExprId id) { return new_id; } -SourceInfo OptimizerExprFactory::RemapSourceInfo(const SourceInfo& info, - SourcePosition offset) { +SourceInfo OptimizerExprFactory::RemapSourceInfo( + const SourceInfo& info, PositionMapper position_mapper) { SourceInfo out; for (const auto& [old_id, macro_expr] : info.macro_calls()) { @@ -232,13 +233,22 @@ SourceInfo OptimizerExprFactory::RemapSourceInfo(const SourceInfo& info, for (const auto& [old_id, new_id] : renumbers_) { if (auto it = info.positions().find(old_id); it != info.positions().end()) { - out.mutable_positions()[new_id] = it->second + offset; + if (auto mapped = position_mapper(it->second); mapped.has_value()) { + out.mutable_positions()[new_id] = *mapped; + } } } return out; } +SourceInfo OptimizerExprFactory::RemapSourceInfo(const SourceInfo& info, + SourcePosition offset) { + return RemapSourceInfo(info, [offset](SourcePosition pos) { + return std::make_optional(pos + offset); + }); +} + void OptimizerExprFactory::MergeSourceInfo(const SourceInfo& info) { auto& target_info = ast_.mutable_source_info(); diff --git a/policy/internal/optimizer_expr_factory.h b/policy/internal/optimizer_expr_factory.h index 6f63f1485..f9eb32328 100644 --- a/policy/internal/optimizer_expr_factory.h +++ b/policy/internal/optimizer_expr_factory.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/container/flat_hash_map.h" +#include "absl/functional/function_ref.h" #include "absl/strings/string_view.h" #include "common/ast.h" #include "common/expr.h" @@ -73,9 +75,13 @@ class OptimizerExprFactory : protected ExprFactory { void RecordReplacement(ExprId id, const Expr& replacement, bool keep_metadata = false); + using PositionMapper = + absl::FunctionRef(SourcePosition)>; // Makes a copy of source metadata that is remapped to new expr Ids using // current renumberings. This is suitable for merging into the main source // info. + SourceInfo RemapSourceInfo(const SourceInfo& info, + PositionMapper position_mapper); SourceInfo RemapSourceInfo(const SourceInfo& info, SourcePosition offset = 0); // Merge a remapped SourceInfo into the current one. diff --git a/policy/internal/optimizer_expr_factory_test.cc b/policy/internal/optimizer_expr_factory_test.cc index 1b14b5628..9dcc37ecf 100644 --- a/policy/internal/optimizer_expr_factory_test.cc +++ b/policy/internal/optimizer_expr_factory_test.cc @@ -566,5 +566,28 @@ TEST(OptimizerExprFactory, MergeSourceInfoMacroConflict) { EXPECT_EQ(factory.issues()[0].message, "conflicting ID in macro calls merge"); } +TEST(OptimizerExprFactory, RemapSourceInfoFunctionalMapper) { + SourceInfo info; + info.mutable_positions()[1] = 5; + info.mutable_positions()[2] = 15; + + TestOptimizerExprFactory factory{Ast()}; + factory.StartCopyContext(); + // Ensure IDs 1 and 2 are copied and renumbered + (void)factory.Copy(factory.NewIdent(1, "foo")); + (void)factory.Copy(factory.NewIdent(2, "bar")); + + auto remapped = factory.RemapSourceInfo( + info, [](SourcePosition pos) -> std::optional { + if (pos == 5) return 100; + if (pos == 15) return 200; + return std::nullopt; + }); + + ASSERT_THAT(remapped.positions(), ::testing::UnorderedElementsAre( + ::testing::Pair(::testing::_, 100), + ::testing::Pair(::testing::_, 200))); +} + } // namespace } // namespace cel diff --git a/policy/internal/yaml_string_element_scanner.cc b/policy/internal/yaml_string_element_scanner.cc new file mode 100644 index 000000000..f90ab98f4 --- /dev/null +++ b/policy/internal/yaml_string_element_scanner.cc @@ -0,0 +1,237 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "policy/internal/yaml_string_element_scanner.h" + +#include +#include +#include + +#include "absl/strings/string_view.h" +#include "common/source.h" +#include "internal/utf8.h" +#include "policy/internal/alignment_table.h" + +namespace cel::policy_internal { +namespace { + +AlignmentTable ScanAlignmentTable(const SourceContentView& view, + SourcePosition start_pos, + SourcePosition end_pos, absl::string_view val, + bool double_quoted, bool single_quoted) { + std::vector points; + if (val.empty()) { + return AlignmentTable(); + } + + SourcePosition view_pos = start_pos; + SourcePosition val_pos = 0; + SourcePosition current_offset = -123456789; // sentinel + + absl::string_view remaining = val; + while (!remaining.empty()) { + auto [cp, code_units] = cel::internal::Utf8Decode(remaining); + remaining.remove_prefix(code_units); + + // If cp is whitespace, skip matching it directly; the next non-whitespace + // token will anchor the alignment. + if (cp == ' ' || cp == '\t' || cp == '\n' || cp == '\r') { + val_pos++; + continue; + } + + // Search forward in view for cp + while (view_pos < end_pos) { + char32_t ch = view.at(view_pos); + + // Handle YAML escape sequences in double-quoted strings + if (double_quoted && ch == '\\' && view_pos + 1 < end_pos) { + char32_t next_ch = view.at(view_pos + 1); + if (next_ch == 'x' || next_ch == 'u' || next_ch == 'U') { + // This hex/unicode escape decoded to cp in val. + break; + } + if ((next_ch == '"' && cp == '"') || (next_ch == '\\' && cp == '\\') || + (next_ch == '/' && cp == '/')) { + break; + } + // For 2-char escape sequences like \n, \t, etc., since cp is + // non-whitespace, \n or \t cannot match cp. We skip the escape + // sequence. + view_pos += 2; + continue; + } + + // Handle YAML single-quoted escaped quote '' + if (single_quoted && ch == '\'' && view_pos + 1 < end_pos && + view.at(view_pos + 1) == '\'') { + if (cp == '\'') { + break; + } + view_pos += 2; + continue; + } + + if (ch == cp) { + break; + } + view_pos++; + } + + if (view_pos == end_pos) { + break; + } + + SourcePosition offset = view_pos - val_pos; + if (points.empty() || offset != current_offset) { + points.push_back(AlignmentPoint{val_pos, view_pos}); + current_offset = offset; + } + + // Advance view_pos past this matched character or escape sequence + if (double_quoted && view.at(view_pos) == '\\' && view_pos + 1 < end_pos) { + char32_t next_ch = view.at(view_pos + 1); + if (next_ch == 'x') { + view_pos += 4; + } else if (next_ch == 'u') { + view_pos += 6; + } else if (next_ch == 'U') { + view_pos += 10; + } else { + view_pos += 2; + } + } else if (single_quoted && view.at(view_pos) == '\'' && + view_pos + 1 < end_pos && view.at(view_pos + 1) == '\'') { + view_pos += 2; + } else { + view_pos++; + } + val_pos++; + } + + if (points.empty()) { + points.push_back(AlignmentPoint{0, start_pos}); + } + return AlignmentTable(std::move(points)); +} + +std::pair ScanDoubleQuotedExpression( + const SourceContentView& view, SourcePosition pos, absl::string_view val) { + SourcePosition start = pos + 1; + SourcePosition cur = start; + while (cur < view.size()) { + char32_t ch = view.at(cur); + if (ch == '\\') { + cur += 2; + continue; + } + if (ch == '"') { + break; + } + cur++; + } + AlignmentTable table = + ScanAlignmentTable(view, start, cur, val, /*double_quoted=*/true, + /*single_quoted=*/false); + return {SourceRange{start, cur}, std::move(table)}; +} + +std::pair ScanSingleQuotedExpression( + const SourceContentView& view, SourcePosition pos, absl::string_view val) { + SourcePosition start = pos + 1; + SourcePosition cur = start; + while (cur < view.size()) { + char32_t ch = view.at(cur); + if (ch == '\'') { + if (cur + 1 < view.size() && view.at(cur + 1) == '\'') { + cur += 2; + continue; + } + break; + } + cur++; + } + AlignmentTable table = + ScanAlignmentTable(view, start, cur, val, /*double_quoted=*/false, + /*single_quoted=*/true); + return {SourceRange{start, cur}, std::move(table)}; +} + +std::pair ScanPlainOrBlockExpression( + const SourceContentView& view, SourcePosition pos, absl::string_view val) { + if (val.empty()) { + return {SourceRange{pos, pos}, AlignmentTable()}; + } + + char32_t first_char = view.at(pos); + SourcePosition start = pos; + if (first_char == '|' || first_char == '>') { + // Skip block header line + while (start < view.size() && view.at(start) != '\n') { + start++; + } + if (start < view.size() && view.at(start) == '\n') { + start++; + } + while (start < view.size() && + (view.at(start) == ' ' || view.at(start) == '\t')) { + start++; + } + } + + SourcePosition cur = start; + absl::string_view remaining = val; + while (!remaining.empty()) { + auto [code_point, code_units] = cel::internal::Utf8Decode(remaining); + remaining.remove_prefix(code_units); + if (code_point == ' ' || code_point == '\t' || code_point == '\n') continue; + while (cur < view.size() && view.at(cur) != code_point) { + cur++; + } + if (cur < view.size()) cur++; + } + AlignmentTable table = + ScanAlignmentTable(view, start, view.size(), val, /*double_quoted=*/false, + /*single_quoted=*/false); + return {SourceRange{start, cur}, std::move(table)}; +} + +} // namespace + +// Scans the YAML string element starting at `pos` in `view` with the parsed +// string value `val`. +// +// Returns a `YamlStringElement` reporting the starting position, optional +// source range, whether it was quoted, and the alignment table. +YamlStringElement ScanYamlStringElement(const SourceContentView& view, + SourcePosition pos, + absl::string_view val) { + if (pos < 0 || pos >= view.size()) { + return YamlStringElement{pos, std::nullopt, false, {}}; + } + + char32_t first_char = view.at(pos); + if (first_char == '"') { + auto [range, table] = ScanDoubleQuotedExpression(view, pos, val); + return YamlStringElement{pos, range, true, table.release()}; + } + if (first_char == '\'') { + auto [range, table] = ScanSingleQuotedExpression(view, pos, val); + return YamlStringElement{pos, range, true, table.release()}; + } + auto [range, table] = ScanPlainOrBlockExpression(view, pos, val); + return YamlStringElement{pos, range, false, table.release()}; +} + +} // namespace cel::policy_internal diff --git a/policy/internal/yaml_string_element_scanner.h b/policy/internal/yaml_string_element_scanner.h new file mode 100644 index 000000000..2aac0bf1e --- /dev/null +++ b/policy/internal/yaml_string_element_scanner.h @@ -0,0 +1,43 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ +#define THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "common/source.h" +#include "policy/internal/alignment_table.h" + +namespace cel::policy_internal { + +struct YamlStringElement { + SourcePosition starting_position = -1; + std::optional source_range; + bool quoted = false; + std::vector alignment_table; +}; + +// Scans a YAML scalar string element directly from the SourceContentView +// (behaving as an array of char32_t unicode codepoints) starting at `pos`, +// matching against the decoded value `val`. +YamlStringElement ScanYamlStringElement(const SourceContentView& view, + SourcePosition pos, + absl::string_view val); + +} // namespace cel::policy_internal + +#endif // THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ diff --git a/policy/internal/yaml_string_element_scanner_test.cc b/policy/internal/yaml_string_element_scanner_test.cc new file mode 100644 index 000000000..80c28e717 --- /dev/null +++ b/policy/internal/yaml_string_element_scanner_test.cc @@ -0,0 +1,141 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "policy/internal/yaml_string_element_scanner.h" + +#include "absl/strings/string_view.h" +#include "common/source.h" +#include "internal/testing.h" +#include "policy/internal/alignment_table.h" + +namespace cel::policy_internal { +namespace { + +using ::testing::Eq; + +TEST(YamlStringElementScannerTest, QuotedScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: \"a + b\"")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(true)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(13)); + EXPECT_THAT(element.source_range->end, Eq(18)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: 'a + b'")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(true)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(13)); + EXPECT_THAT(element2.source_range->end, Eq(18)); + + ASSERT_OK_AND_ASSIGN(auto source3, NewSource("expression: \"a + \\n b\"")); + YamlStringElement element3 = + ScanYamlStringElement(source3->content(), 12, "a + \n b"); + EXPECT_THAT(element3.starting_position, Eq(12)); + EXPECT_THAT(element3.quoted, Eq(true)); + ASSERT_THAT(element3.source_range.has_value(), Eq(true)); + EXPECT_THAT(element3.source_range->begin, Eq(13)); + EXPECT_THAT(element3.source_range->end, Eq(21)); +} + +TEST(YamlStringElementScannerTest, PlainScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: a + b")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(false)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(12)); + EXPECT_THAT(element.source_range->end, Eq(17)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: a +\n b")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(false)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(12)); + EXPECT_THAT(element2.source_range->end, Eq(19)); +} + +TEST(YamlStringElementScannerTest, BlockScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: |\n a + b\n")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b\n"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(false)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(16)); + EXPECT_THAT(element.source_range->end, Eq(21)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: >2-\n a + b\n")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(false)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(20)); + EXPECT_THAT(element2.source_range->end, Eq(25)); +} + +TEST(YamlStringElementScannerTest, InvalidPosition) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: a + b")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 100, "a + b"); + EXPECT_THAT(element.starting_position, Eq(100)); + EXPECT_THAT(element.quoted, Eq(false)); + EXPECT_THAT(element.source_range.has_value(), Eq(false)); +} + +TEST(YamlStringElementScannerTest, EscapeSequencesInQuotedScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: \"a\\nb\"")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a\nb"); + EXPECT_THAT(element.quoted, Eq(true)); + EXPECT_FALSE(element.alignment_table.empty()); + AlignmentTable table1(element.alignment_table); + // In "a\nb", 'a' is at YAML pos 13. '\n' is at YAML pos 14. + // 'b' is after '\n' (2 chars in YAML), at YAML pos 16. In val, 'b' is at 2. + EXPECT_THAT(table1.MapPosition(0), Eq(13)); + EXPECT_THAT(table1.MapPosition(1), Eq(14)); + EXPECT_THAT(table1.MapPosition(2), Eq(16)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: 'a''b'")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a'b"); + EXPECT_THAT(element2.quoted, Eq(true)); + EXPECT_FALSE(element2.alignment_table.empty()); + AlignmentTable table2(element2.alignment_table); + EXPECT_THAT(table2.MapPosition(0), Eq(13)); + EXPECT_THAT(table2.MapPosition(1), Eq(14)); + EXPECT_THAT(table2.MapPosition(2), Eq(16)); +} + +TEST(YamlStringElementScannerTest, LiteralNewlinesInQuotedScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: \"a +\n b\"")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b"); + EXPECT_THAT(element.quoted, Eq(true)); + EXPECT_FALSE(element.alignment_table.empty()); + AlignmentTable table(element.alignment_table); + EXPECT_THAT(table.MapPosition(0), Eq(13)); + EXPECT_THAT(table.MapPosition(4), Eq(19)); +} + +} // namespace +} // namespace cel::policy_internal diff --git a/policy/yaml_policy_parser.cc b/policy/yaml_policy_parser.cc index c838cff33..4a29194f4 100644 --- a/policy/yaml_policy_parser.cc +++ b/policy/yaml_policy_parser.cc @@ -29,6 +29,7 @@ #include "policy/cel_policy_parse_context.h" #include "policy/cel_policy_parse_result.h" #include "policy/cel_policy_parser.h" +#include "policy/internal/yaml_string_element_scanner.h" #include "yaml-cpp/exceptions.h" #include "yaml-cpp/node/node.h" #include "yaml-cpp/node/parse.h" @@ -60,6 +61,18 @@ std::optional YamlPolicyParser::GetValueString( return std::nullopt; } + if (!node.Mark().is_null() && ctx.policy_source().content() != nullptr) { + policy_internal::YamlStringElement element = + policy_internal::ScanYamlStringElement( + ctx.policy_source().content()->content(), node.Mark().pos, + node.as()); + + ctx.policy_source().NoteSourcePosition(id, element.starting_position); + ctx.policy_source().NoteSourceRange(id, element.source_range, + element.quoted, + std::move(element.alignment_table)); + } + try { return ValueString(id, node.as()); } catch (YAML::Exception& e) {