diff --git a/common/source.cc b/common/source.cc index f268bc833..81b1ada9a 100644 --- a/common/source.cc +++ b/common/source.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/policy/BUILD b/policy/BUILD index 19195be2b..cace76f5b 100644 --- a/policy/BUILD +++ b/policy/BUILD @@ -84,6 +84,7 @@ cc_library( ":cel_policy_parser", "//common:source", "//internal:status_macros", + "//policy/internal:yaml_string_element_scanner", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", @@ -190,6 +191,7 @@ cc_test( ":compiler", ":yaml_policy_parser", "//common:ast", + "//common:ast_proto", "//common:decl", "//common:navigable_ast", "//common:source", @@ -211,6 +213,7 @@ cc_test( "//runtime:runtime_builder", "//runtime:runtime_options", "//runtime:standard_runtime_builder_factory", + "//tools:cel_unparser", "@com_google_absl//absl/status", "@com_google_absl//absl/status:status_matchers", "@com_google_absl//absl/status:statusor", diff --git a/policy/cel_policy.cc b/policy/cel_policy.cc index c2d97edeb..0f5edcc19 100644 --- a/policy/cel_policy.cc +++ b/policy/cel_policy.cc @@ -59,25 +59,53 @@ std::string IndentBlock(absl::string_view text) { void CelPolicySource::NoteSourcePosition(CelPolicyElementId id, SourcePosition position) { - source_positions_[id] = position; + source_info_[id].position = position; +} + +void CelPolicySource::NoteSourceRange(CelPolicyElementId id, + std::optional range, + bool quoted) { + ElementSourceInfo& info = source_info_[id]; + info.range = range; + info.quoted = quoted; + if (range.has_value() && info.position == -1) { + info.position = range->begin; + } } std::optional CelPolicySource::GetSourcePosition( CelPolicyElementId id) const { - auto it = source_positions_.find(id); - if (it == source_positions_.end()) { + auto it = source_info_.find(id); + if (it == source_info_.end() || it->second.position == -1) { return std::nullopt; } - return it->second; + return it->second.position; +} + +std::optional CelPolicySource::GetSourceRange( + CelPolicyElementId id) const { + auto it = source_info_.find(id); + if (it == source_info_.end() || !it->second.range.has_value()) { + return std::nullopt; + } + return it->second.range; +} + +std::optional CelPolicySource::IsQuoted(CelPolicyElementId id) const { + auto it = source_info_.find(id); + if (it == source_info_.end() || !it->second.range.has_value()) { + return std::nullopt; + } + return it->second.quoted; } std::optional CelPolicySource::GetSourceLocation( CelPolicyElementId id) const { - auto it = source_positions_.find(id); - if (it == source_positions_.end()) { + auto it = source_info_.find(id); + if (it == source_info_.end() || it->second.position == -1) { return std::nullopt; } - return policy_source_->GetLocation(it->second); + return policy_source_->GetLocation(it->second.position); } std::string CelPolicySource::DebugString() const { @@ -85,8 +113,10 @@ std::string CelPolicySource::DebugString() const { // Sort the source elements in descending order of position std::vector> sorted_positions; - for (const auto& pair : source_positions_) { - sorted_positions.push_back(pair); + for (const auto& [id, info] : source_info_) { + if (info.position != -1) { + sorted_positions.push_back({id, info.position}); + } } std::sort(sorted_positions.begin(), sorted_positions.end(), [](const auto& a, const auto& b) { diff --git a/policy/cel_policy.h b/policy/cel_policy.h index af8f7c977..2e8ab1414 100644 --- a/policy/cel_policy.h +++ b/policy/cel_policy.h @@ -34,6 +34,12 @@ namespace cel { using CelPolicyElementId = int32_t; +struct ElementSourceInfo { + SourcePosition position = -1; + std::optional range; + bool quoted = false; +}; + class CelPolicySource { public: explicit CelPolicySource(cel::SourcePtr policy_source) @@ -43,15 +49,22 @@ class CelPolicySource { void NoteSourcePosition(CelPolicyElementId id, SourcePosition position); + void NoteSourceRange(CelPolicyElementId id, std::optional range, + bool quoted); + std::optional GetSourcePosition(CelPolicyElementId id) const; + std::optional GetSourceRange(CelPolicyElementId id) const; + + std::optional IsQuoted(CelPolicyElementId id) const; + std::optional GetSourceLocation(CelPolicyElementId id) const; std::string DebugString() const; private: cel::SourcePtr policy_source_; - absl::flat_hash_map source_positions_; + absl::flat_hash_map source_info_; }; class ValueString { diff --git a/policy/cel_policy_parse_result.cc b/policy/cel_policy_parse_result.cc index 32d6431bb..d8c9bb2b0 100644 --- a/policy/cel_policy_parse_result.cc +++ b/policy/cel_policy_parse_result.cc @@ -52,8 +52,12 @@ std::string CelPolicyIssue::ToDisplayString( std::string snippet; if (source != nullptr) { if (relative_position_) { + std::optional range = source->GetSourceRange(element_id_); std::optional base = source->GetSourcePosition(element_id_); + if (range.has_value()) { + base = range->begin; + } if (element_id_ == -1) { base.emplace(0); } diff --git a/policy/compiler.cc b/policy/compiler.cc index 7a892447c..98103dbe1 100644 --- a/policy/compiler.cc +++ b/policy/compiler.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -157,11 +158,19 @@ class IntermediateCompiledPolicy { void set_semantics(RuleSemantics semantics) { semantics_ = semantics; } RuleSemantics semantics() const { return semantics_; } + void set_policy_source(const CelPolicySource* absl_nullable src) { + policy_source_ = src; + } + const CelPolicySource* absl_nullable policy_source() const { + return policy_source_; + } + private: std::string name_; std::string display_name_; std::string description_; RuleSemantics semantics_ = RuleSemantics::kFirstMatch; + const CelPolicySource* absl_nullable policy_source_ = nullptr; CompiledRule root_rule_; }; @@ -315,6 +324,33 @@ class PolicyCompiler { return src_->content()->description(); } + absl::StatusOr CompileExpression(CelPolicyElementId id, + absl::string_view val, + const Compiler* env) { + std::unique_ptr source; + if (src_ != nullptr && src_->content() != nullptr) { + std::optional range; + range = src_->GetSourceRange(id); + bool use_subrange = !(src_->IsQuoted(id).value_or(true)); + if (range.has_value() && use_subrange) { + source = std::make_unique(*src_->content(), *range); + } + } + + if (source == nullptr) { + // For quoted strings, the source should be generated from the interpreted + // YAML value. + CEL_ASSIGN_OR_RETURN( + source, cel::NewSource(val, std::string(GetSourceDescription()))); + } + auto result = env->Compile(*source, &arena_); + if (!result.ok()) { + return result; + } + result->SetSource(std::move(source)); + return result; + } + void AdaptTypeCheckIssues(CelPolicyElementId id, const ValidationResult& r) { const Source* source = r.GetSource(); @@ -336,8 +372,8 @@ class PolicyCompiler { const cel::OutputBlock& output_block, const Compiler* env) { CompiledOutputBlock output; CEL_ASSIGN_OR_RETURN(auto output_validation, - env->Compile(output_block.output().value(), - GetSourceDescription(), &arena_)); + CompileExpression(output_block.output().id(), + output_block.output().value(), env)); AdaptTypeCheckIssues(output_block.output().id(), output_validation); cel::Type result_type = DynType(); @@ -352,9 +388,10 @@ class PolicyCompiler { } } if (output_block.explanation().has_value()) { - CEL_ASSIGN_OR_RETURN(auto explanation_validation, - env->Compile(output_block.explanation()->value(), - GetSourceDescription(), &arena_)); + CEL_ASSIGN_OR_RETURN( + auto explanation_validation, + CompileExpression(output_block.explanation()->id(), + output_block.explanation()->value(), env)); AdaptTypeCheckIssues(output_block.explanation()->id(), explanation_validation); if (explanation_validation.IsValid()) { @@ -378,8 +415,8 @@ class PolicyCompiler { c_match.id = match.id(); if (match.condition().has_value()) { CEL_ASSIGN_OR_RETURN(auto validation, - env->Compile(match.condition()->value(), - GetSourceDescription(), &arena_)); + CompileExpression(match.condition()->id(), + match.condition()->value(), env)); AdaptTypeCheckIssues(match.condition()->id(), validation); if (validation.IsValid()) { CEL_ASSIGN_OR_RETURN(auto ast, validation.ReleaseAst()); @@ -422,9 +459,10 @@ class PolicyCompiler { continue; } std::string ident = absl::StrCat("variables.", name); - CEL_ASSIGN_OR_RETURN(auto validation, - env->Compile(variable.expression().value(), - GetSourceDescription(), &arena_)); + CEL_ASSIGN_OR_RETURN( + auto validation, + CompileExpression(variable.expression().id(), + variable.expression().value(), env)); AdaptTypeCheckIssues(variable.expression().id(), validation); if (!validation.IsValid()) { continue; @@ -480,6 +518,7 @@ class PolicyCompiler { absl::Status CompilePolicy(const CelPolicy& policy, IntermediateCompiledPolicy* out) { src_ = policy.source(); + out->set_policy_source(src_); out->set_semantics(RuleSemantics::kFirstMatch); out->set_name(policy.name().value()); out->set_display_name( @@ -513,6 +552,21 @@ class FirstMatchComposer { std::unique_ptr ReleaseAst() { return std::move(ast_); } private: + SourcePosition GetAstOffset(CelPolicyElementId id) const { + if (icp_.policy_source() == nullptr) { + return 0; + } + if (auto range = icp_.policy_source()->GetSourceRange(id); + range.has_value()) { + return range->begin; + } + if (auto pos = icp_.policy_source()->GetSourcePosition(id); + pos.has_value()) { + return *pos; + } + return 0; + } + using VariableScope = absl::flat_hash_map; std::optional ResolvePolicyVariable(absl::string_view reference); @@ -733,7 +787,8 @@ absl::StatusOr FirstMatchComposer::ComposeRule(const CompiledRule& rule, MapVariables(condition); factory_.StartCopyContext(); auto copy = factory_.Copy(condition.root_expr()); - auto source_info = factory_.RemapSourceInfo(condition.source_info()); + auto source_info = factory_.RemapSourceInfo( + condition.source_info(), GetAstOffset(match.condition->id)); factory_.MergeSourceInfo(source_info); *insertion_point = factory_.NewCall("_?_:_", std::move(copy)); insertion_point->mutable_call_expr().mutable_args().push_back( @@ -792,7 +847,8 @@ absl::StatusOr FirstMatchComposer::ComposeProduction( MapVariables(ast); factory_.StartCopyContext(); Expr to_insert = factory_.Copy(ast.root_expr()); - auto source_info = factory_.RemapSourceInfo(ast.source_info()); + auto source_info = + factory_.RemapSourceInfo(ast.source_info(), GetAstOffset(output_ast.id)); factory_.MergeSourceInfo(source_info); insertion_expr = std::move(to_insert); @@ -832,8 +888,9 @@ void FirstMatchComposer::ComposeRuleVariables(const CompiledRule& rule, MapVariables(ast); factory_.StartCopyContext(); auto insertion = factory_.Copy(ast.root_expr()); - // TODO(b/506179116): apply the position offsets here. - auto info = factory_.RemapSourceInfo(ast.source_info()); + auto info = factory_.RemapSourceInfo(ast.source_info(), + GetAstOffset(variable.ast.id)); + factory_.MergeSourceInfo(info); ABSL_DCHECK(init.has_list_expr()); int index = init.mutable_list_expr().elements().size(); init.mutable_list_expr().mutable_elements().push_back( diff --git a/policy/compiler_test.cc b/policy/compiler_test.cc index 8db494b45..6702b5002 100644 --- a/policy/compiler_test.cc +++ b/policy/compiler_test.cc @@ -24,6 +24,7 @@ #include "absl/status/statusor.h" #include "absl/strings/string_view.h" #include "common/ast.h" +#include "common/ast_proto.h" #include "common/decl.h" #include "common/navigable_ast.h" #include "common/source.h" @@ -50,6 +51,7 @@ #include "runtime/runtime_builder.h" #include "runtime/runtime_options.h" #include "runtime/standard_runtime_builder_factory.h" +#include "tools/cel_unparser.h" #include "google/protobuf/arena.h" #include "google/protobuf/descriptor.h" @@ -189,6 +191,163 @@ name: cel_policy testing::HasSubstr("undeclared reference")); } +TEST(CompilerTest, DisplayErrorFormattingForInvalidExpression) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: undeclared_var +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:5:15: undeclared reference to 'undeclared_var' (in container '') + | output: undeclared_var + | ..............^)err")); +} + +TEST(CompilerTest, DisplayErrorFormattingForMultilineBlockLiteral) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: | + undeclared_var +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:6:9: undeclared reference to 'undeclared_var' (in container '') + | undeclared_var + | ........^)err")); +} + +TEST(CompilerTest, DisplayErrorFormattingForDoubleQuotedExpression) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + match: + - condition: x > 0 + output: "undeclared_var" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + EXPECT_FALSE(result.IsValid()); + EXPECT_THAT( + result.FormatIssues(), + testing::HasSubstr( + R"err(ERROR: test.yaml:5:16: undeclared reference to 'undeclared_var' (in container '') + | output: "undeclared_var" + | ...............^)err")); +} + +TEST(CompilerTest, ComposedAstPositionsAreRelativeToMainYaml) { + absl::string_view yaml = R"yaml(name: cel_policy +rule: + variables: + - name: v1 + expression: 10 + match: + - condition: variables.v1 > 5 + output: "variables.v1 + 20" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()); + const cel::Ast* ast = result.GetAst(); + ASSERT_NE(ast, nullptr); + const cel::Source* source = result.GetSource()->content(); + ASSERT_NE(source, nullptr); + + auto nav_ast = cel::NavigableAst::Build(ast->root_expr()); + const cel::NavigableAstNode* var_node = nullptr; + const cel::NavigableAstNode* cond_node = nullptr; + const cel::NavigableAstNode* out_node = nullptr; + + for (const cel::NavigableAstNode& node : + nav_ast.Root().DescendantsPostorder()) { + if (node.expr()->has_const_expr() && + node.expr()->const_expr().has_int_value() && + node.expr()->const_expr().int_value() == 10) { + var_node = &node; + } else if (node.expr()->has_call_expr() && + node.expr()->call_expr().function() == "_>_") { + cond_node = &node; + } else if (node.expr()->has_call_expr() && + node.expr()->call_expr().function() == "_+_") { + out_node = &node; + } + } + + ASSERT_NE(var_node, nullptr); + ASSERT_NE(cond_node, nullptr); + ASSERT_NE(out_node, nullptr); + + auto var_pos = ast->source_info().positions().find(var_node->expr()->id()); + ASSERT_NE(var_pos, ast->source_info().positions().end()); + auto var_loc = source->GetLocation(var_pos->second); + ASSERT_TRUE(var_loc.has_value()); + EXPECT_THAT( + source->DisplayErrorLocation(*var_loc), + testing::HasSubstr(" expression: 10\n | ..................^")); + + auto cond_pos = ast->source_info().positions().find(cond_node->expr()->id()); + ASSERT_NE(cond_pos, ast->source_info().positions().end()); + auto cond_loc = source->GetLocation(cond_pos->second); + ASSERT_TRUE(cond_loc.has_value()); + EXPECT_THAT(source->DisplayErrorLocation(*cond_loc), + testing::HasSubstr(" - condition: variables.v1 > 5\n | " + "..............................^")); + + auto out_pos = ast->source_info().positions().find(out_node->expr()->id()); + ASSERT_NE(out_pos, ast->source_info().positions().end()); + auto out_loc = source->GetLocation(out_pos->second); + ASSERT_TRUE(out_loc.has_value()); + EXPECT_THAT(source->DisplayErrorLocation(*out_loc), + testing::HasSubstr(" output: \"variables.v1 + 20\"\n | " + "............................^")); +} + +TEST(CompilerTest, UnparseComposedAstWithMacros) { + absl::string_view yaml = R"yaml(name: macro_policy +rule: + variables: + - name: var_inner + expression: "[1, 2].all(i, i > 0)" + - name: var_outer + expression: "[3, 4].exists(j, j > 0 && variables.var_inner)" + match: + - condition: "[5, 6].all(k, k > 0)" + output: "[1].map(m, m > 0 && variables.var_outer)" +)yaml"; + ASSERT_OK_AND_ASSIGN(auto policy, ParsePolicyFromYaml(yaml)); + ASSERT_OK_AND_ASSIGN(auto compiler, BuildTestCompiler()); + ASSERT_OK_AND_ASSIGN(auto result, CompilePolicy(*compiler, *policy)); + ASSERT_TRUE(result.IsValid()) << result.FormatIssues(); + const cel::Ast* ast = result.GetAst(); + ASSERT_NE(ast, nullptr); + + cel::expr::ParsedExpr parsed_expr; + ASSERT_THAT(cel::AstToParsedExpr(*ast, &parsed_expr), IsOk()); + ASSERT_OK_AND_ASSIGN(std::string unparsed, + google::api::expr::Unparse(parsed_expr)); + + EXPECT_EQ( + unparsed, + "cel.@block([[1, 2].all(i, i > 0), [3, 4].exists(j, j > 0 && @index0)], " + "[5, 6].all(k, k > 0) ? optional.of([1].map(m, m > 0 && @index1)) : " + "optional.none())"); +} + TEST(CompilerTest, UnreachableMatchAfterTriviallyTrueCondition) { absl::string_view yaml = R"yaml( name: cel_policy diff --git a/policy/internal/BUILD b/policy/internal/BUILD index 30f43d431..98aeaeebb 100644 --- a/policy/internal/BUILD +++ b/policy/internal/BUILD @@ -66,3 +66,25 @@ cc_test( "@com_google_absl//absl/types:span", ], ) + +cc_library( + name = "yaml_string_element_scanner", + srcs = ["yaml_string_element_scanner.cc"], + hdrs = ["yaml_string_element_scanner.h"], + deps = [ + "//common:source", + "//internal:utf8", + "@com_google_absl//absl/strings:string_view", + ], +) + +cc_test( + name = "yaml_string_element_scanner_test", + srcs = ["yaml_string_element_scanner_test.cc"], + deps = [ + ":yaml_string_element_scanner", + "//common:source", + "//internal:testing", + "@com_google_absl//absl/strings:string_view", + ], +) diff --git a/policy/internal/yaml_string_element_scanner.cc b/policy/internal/yaml_string_element_scanner.cc new file mode 100644 index 000000000..06bd8e899 --- /dev/null +++ b/policy/internal/yaml_string_element_scanner.cc @@ -0,0 +1,127 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "policy/internal/yaml_string_element_scanner.h" + +#include + +#include "absl/strings/string_view.h" +#include "common/source.h" +#include "internal/utf8.h" + +namespace cel::policy_internal { +namespace { + +SourceRange ScanDoubleQuotedExpression(const SourceContentView& view, + SourcePosition pos) { + SourcePosition start = pos + 1; + SourcePosition cur = start; + while (cur < view.size()) { + char32_t ch = view.at(cur); + if (ch == '\\') { + cur += 2; + continue; + } + if (ch == '"') { + break; + } + cur++; + } + return SourceRange{start, cur}; +} + +SourceRange ScanSingleQuotedExpression(const SourceContentView& view, + SourcePosition pos) { + SourcePosition start = pos + 1; + SourcePosition cur = start; + while (cur < view.size()) { + char32_t ch = view.at(cur); + if (ch == '\'') { + if (cur + 1 < view.size() && view.at(cur + 1) == '\'') { + cur += 2; + continue; + } + break; + } + cur++; + } + return SourceRange{start, cur}; +} + +SourceRange ScanPlainOrBlockExpression(const SourceContentView& view, + SourcePosition pos, + absl::string_view val) { + if (val.empty()) { + return SourceRange{pos, pos}; + } + + char32_t first_char = view.at(pos); + SourcePosition start = pos; + if (first_char == '|' || first_char == '>') { + // Skip block header line + while (start < view.size() && view.at(start) != '\n') { + start++; + } + if (start < view.size() && view.at(start) == '\n') { + start++; + } + while (start < view.size() && + (view.at(start) == ' ' || view.at(start) == '\t')) { + start++; + } + } + + SourcePosition cur = start; + absl::string_view remaining = val; + while (!remaining.empty()) { + auto [code_point, code_units] = cel::internal::Utf8Decode(remaining); + remaining.remove_prefix(code_units); + if (code_point == ' ' || code_point == '\t' || code_point == '\n') continue; + while (cur < view.size() && view.at(cur) != code_point) { + cur++; + } + if (cur < view.size()) cur++; + } + return SourceRange{start, cur}; +} + +} // namespace + +// Scans the YAML string element starting at `pos` in `view` with the parsed +// string value `val`. +// +// Returns a `YamlStringElement` reporting the starting position, optional +// source range, and whether it was quoted. +// +// TODO(b/525495513): Implement support for correctly aligning the source YAML +// to the decoded string value (the component CEL expression). +YamlStringElement ScanYamlStringElement(const SourceContentView& view, + SourcePosition pos, + absl::string_view val) { + if (pos < 0 || pos >= view.size()) { + return YamlStringElement{pos, std::nullopt, false}; + } + + char32_t first_char = view.at(pos); + if (first_char == '"') { + return YamlStringElement{pos, ScanDoubleQuotedExpression(view, pos), true}; + } + if (first_char == '\'') { + return YamlStringElement{pos, ScanSingleQuotedExpression(view, pos), true}; + } + return YamlStringElement{pos, ScanPlainOrBlockExpression(view, pos, val), + false}; +} + +} // namespace cel::policy_internal diff --git a/policy/internal/yaml_string_element_scanner.h b/policy/internal/yaml_string_element_scanner.h new file mode 100644 index 000000000..f4f7536b9 --- /dev/null +++ b/policy/internal/yaml_string_element_scanner.h @@ -0,0 +1,40 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ +#define THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ + +#include + +#include "absl/strings/string_view.h" +#include "common/source.h" + +namespace cel::policy_internal { + +struct YamlStringElement { + SourcePosition starting_position = -1; + std::optional source_range; + bool quoted = false; +}; + +// Scans a YAML scalar string element directly from the SourceContentView +// (behaving as an array of char32_t unicode codepoints) starting at `pos`, +// matching against the decoded value `val`. +YamlStringElement ScanYamlStringElement(const SourceContentView& view, + SourcePosition pos, + absl::string_view val); + +} // namespace cel::policy_internal + +#endif // THIRD_PARTY_CEL_CPP_POLICY_INTERNAL_YAML_STRING_ELEMENT_SCANNER_H_ diff --git a/policy/internal/yaml_string_element_scanner_test.cc b/policy/internal/yaml_string_element_scanner_test.cc new file mode 100644 index 000000000..94cab4ba8 --- /dev/null +++ b/policy/internal/yaml_string_element_scanner_test.cc @@ -0,0 +1,105 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "policy/internal/yaml_string_element_scanner.h" + +#include "absl/strings/string_view.h" +#include "common/source.h" +#include "internal/testing.h" + +namespace cel::policy_internal { +namespace { + +using ::testing::Eq; + +TEST(YamlStringElementScannerTest, QuotedScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: \"a + b\"")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(true)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(13)); + EXPECT_THAT(element.source_range->end, Eq(18)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: 'a + b'")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(true)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(13)); + EXPECT_THAT(element2.source_range->end, Eq(18)); + + ASSERT_OK_AND_ASSIGN(auto source3, NewSource("expression: \"a + \\n b\"")); + YamlStringElement element3 = + ScanYamlStringElement(source3->content(), 12, "a + \n b"); + EXPECT_THAT(element3.starting_position, Eq(12)); + EXPECT_THAT(element3.quoted, Eq(true)); + ASSERT_THAT(element3.source_range.has_value(), Eq(true)); + EXPECT_THAT(element3.source_range->begin, Eq(13)); + EXPECT_THAT(element3.source_range->end, Eq(21)); +} + +TEST(YamlStringElementScannerTest, PlainScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: a + b")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(false)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(12)); + EXPECT_THAT(element.source_range->end, Eq(17)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: a +\n b")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(false)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(12)); + EXPECT_THAT(element2.source_range->end, Eq(19)); +} + +TEST(YamlStringElementScannerTest, BlockScalars) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: |\n a + b\n")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 12, "a + b\n"); + EXPECT_THAT(element.starting_position, Eq(12)); + EXPECT_THAT(element.quoted, Eq(false)); + ASSERT_THAT(element.source_range.has_value(), Eq(true)); + EXPECT_THAT(element.source_range->begin, Eq(16)); + EXPECT_THAT(element.source_range->end, Eq(21)); + + ASSERT_OK_AND_ASSIGN(auto source2, NewSource("expression: >2-\n a + b\n")); + YamlStringElement element2 = + ScanYamlStringElement(source2->content(), 12, "a + b"); + EXPECT_THAT(element2.starting_position, Eq(12)); + EXPECT_THAT(element2.quoted, Eq(false)); + ASSERT_THAT(element2.source_range.has_value(), Eq(true)); + EXPECT_THAT(element2.source_range->begin, Eq(20)); + EXPECT_THAT(element2.source_range->end, Eq(25)); +} + +TEST(YamlStringElementScannerTest, InvalidPosition) { + ASSERT_OK_AND_ASSIGN(auto source, NewSource("expression: a + b")); + YamlStringElement element = + ScanYamlStringElement(source->content(), 100, "a + b"); + EXPECT_THAT(element.starting_position, Eq(100)); + EXPECT_THAT(element.quoted, Eq(false)); + EXPECT_THAT(element.source_range.has_value(), Eq(false)); +} + +} // namespace +} // namespace cel::policy_internal diff --git a/policy/yaml_policy_parser.cc b/policy/yaml_policy_parser.cc index c838cff33..4b4a37cb9 100644 --- a/policy/yaml_policy_parser.cc +++ b/policy/yaml_policy_parser.cc @@ -29,6 +29,7 @@ #include "policy/cel_policy_parse_context.h" #include "policy/cel_policy_parse_result.h" #include "policy/cel_policy_parser.h" +#include "policy/internal/yaml_string_element_scanner.h" #include "yaml-cpp/exceptions.h" #include "yaml-cpp/node/node.h" #include "yaml-cpp/node/parse.h" @@ -60,6 +61,17 @@ std::optional YamlPolicyParser::GetValueString( return std::nullopt; } + if (!node.Mark().is_null() && ctx.policy_source().content() != nullptr) { + policy_internal::YamlStringElement element = + policy_internal::ScanYamlStringElement( + ctx.policy_source().content()->content(), node.Mark().pos, + node.as()); + + ctx.policy_source().NoteSourcePosition(id, element.starting_position); + ctx.policy_source().NoteSourceRange(id, element.source_range, + element.quoted); + } + try { return ValueString(id, node.as()); } catch (YAML::Exception& e) {