From 061061e391a6b49a8e025efc5b1c8603619151bf Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 8 Jan 2026 15:54:02 +0800 Subject: [PATCH 01/23] succeed to graph lookup single index Signed-off-by: Lantao Jin --- .../org/opensearch/sql/analysis/Analyzer.java | 6 + .../sql/ast/AbstractNodeVisitor.java | 5 + .../opensearch/sql/ast/tree/GraphLookup.java | 55 +++++++ .../sql/calcite/CalciteRelNodeVisitor.java | 80 ++++++++++ .../sql/calcite/utils/GraphLookupUtils.java | 66 +++++++++ .../calcite/utils/OpenSearchTypeFactory.java | 2 +- .../executor/OpenSearchExecutionEngine.java | 4 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 5 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 16 ++ .../opensearch/sql/ppl/parser/AstBuilder.java | 28 ++++ .../calcite/CalcitePPLGraphLookupTest.java | 140 ++++++++++++++++++ .../sql/ppl/parser/AstBuilderTest.java | 37 +++++ 12 files changed, 443 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java create mode 100644 core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 24cef144c97..957bfe91fc1 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -73,6 +73,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -533,6 +534,11 @@ public LogicalPlan visitAddColTotals(AddColTotals node, AnalysisContext context) throw getOnlyForCalciteException("addcoltotals"); } + @Override + public LogicalPlan visitGraphLookup(GraphLookup node, AnalysisContext context) { + throw getOnlyForCalciteException("graphlookup"); + } + /** Build {@link ParseExpression} to context and skip to child nodes. */ @Override public LogicalPlan visitParse(Parse node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index a6ef5e7547a..62ce9875daf 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -61,6 +61,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -461,4 +462,8 @@ public T visitAddTotals(AddTotals node, C context) { public T visitAddColTotals(AddColTotals node, C context) { return visitChildren(node, context); } + + public T visitGraphLookup(GraphLookup node, C context) { + return visitChildren(node, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java new file mode 100644 index 00000000000..53e1a6d2654 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import javax.annotation.Nullable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.Literal; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@Setter +@ToString +@EqualsAndHashCode(callSuper = false) +@RequiredArgsConstructor +@AllArgsConstructor +@Builder(toBuilder = true) +public class GraphLookup extends UnresolvedPlan { + private final Field from; + private final Field to; + private final Field as; + // zero means no limit + private final Literal maxDepth; + private @Nullable final UnresolvedExpression startWith; + + private UnresolvedPlan child; + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? ImmutableList.of() : ImmutableList.of(child); + } + + @Override + public T accept(AbstractNodeVisitor visitor, C context) { + return visitor.visitGraphLookup(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 19dce3e3609..764a104fc5d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -14,6 +14,14 @@ import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.ANCHOR_FROM_ALIAS; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.ANCHOR_TO_ALIAS; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.DEPTH_FIELD; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.HIER_FIELD_SUFFIX; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_FROM_ALIAS; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_TABLE_NAME; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_TO_ALIAS; +import static org.opensearch.sql.calcite.utils.GraphLookupUtils.SRC_FIELD_SUFFIX; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_JOIN_MAX_DEDUP; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_MAIN; @@ -39,6 +47,7 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -115,6 +124,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -150,6 +160,7 @@ import org.opensearch.sql.calcite.plan.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.plan.OpenSearchConstants; import org.opensearch.sql.calcite.utils.BinUtils; +import org.opensearch.sql.calcite.utils.GraphLookupUtils; import org.opensearch.sql.calcite.utils.JoinAndLookupUtils; import org.opensearch.sql.calcite.utils.PPLHintUtils; import org.opensearch.sql.calcite.utils.PlanUtils; @@ -2518,6 +2529,75 @@ public RelNode visitAddColTotals(AddColTotals node, CalcitePlanContext context) context, fieldsToAggregate, false, true, null, labelField, label); } + @Override + public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { + visitChildren(node, context); + RelBuilder builder = context.relBuilder; + + List allFields = + builder.peek().getRowType().getFieldNames().stream() + .filter(Predicate.not(OpenSearchConstants.METADATAFIELD_TYPE_MAP::containsKey)) + .toList(); + List aliases = GraphLookupUtils.createAliases(allFields); + Literal maxDepth = node.getMaxDepth(); + String connectFromFieldName = node.getFrom().getField().toString(); + String connectToFieldName = node.getTo().getField().toString(); + RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(maxDepth, context); + int maxDepthValue = maxDepthNode.getValueAs(Integer.class); + maxDepthValue = maxDepthValue <= 0 ? -1 : maxDepthValue; + UnresolvedExpression startWith = node.getStartWith(); + if (startWith != null) { + RexNode startWithNode = rexVisitor.analyze(startWith, context); + } + String outputFiledName = node.getAs().getField().toString(); + + // 1. build anchor query + RelNode self = builder.peek(); + builder.as(ANCHOR_FROM_ALIAS); + builder.push(self).as(ANCHOR_TO_ALIAS); + builder + .join( + JoinRelType.INNER, + builder.equals( + builder.field(2, ANCHOR_FROM_ALIAS, connectFromFieldName), + builder.field(2, ANCHOR_TO_ALIAS, connectToFieldName))) + .project(GraphLookupUtils.createAnchorProjections(builder, allFields), aliases) + .as("anchor"); + + // 2. recursive query + builder.transientScan(RECURSIVE_TABLE_NAME).as(RECURSIVE_FROM_ALIAS); + builder.push(self).as(RECURSIVE_TO_ALIAS); + String hierConnectFromField = HIER_FIELD_SUFFIX + connectFromFieldName; + builder + .join( + JoinRelType.INNER, + builder.equals( + builder.field(2, RECURSIVE_FROM_ALIAS, hierConnectFromField), + builder.field(2, RECURSIVE_TO_ALIAS, connectToFieldName))) + .project(GraphLookupUtils.createRecursiveProjections(builder, allFields), aliases); + + // 3. combine RepeatUnion + builder.repeatUnion(RECURSIVE_TABLE_NAME, true, maxDepthValue); + + // 4. collect aggregation + List groupByFields = new ArrayList<>(); + for (String field : allFields) { + groupByFields.add(builder.field(SRC_FIELD_SUFFIX + field)); + } + List collectFields = new ArrayList<>(); + for (String field : allFields) { + collectFields.add(builder.field(HIER_FIELD_SUFFIX + field)); + } + collectFields.add(builder.field(DEPTH_FIELD)); + + RexNode rowExpr = builder.call(SqlStdOperatorTable.ROW, collectFields); + builder.aggregate( + builder.groupKey(groupByFields), + builder.aggregateCall(SqlStdOperatorTable.COLLECT, rowExpr).as(outputFiledName)); + + return builder.peek(); + } + /** * Cast integer sum to long, real/float to double to avoid ClassCastException * diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java new file mode 100644 index 00000000000..e71d721811a --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java @@ -0,0 +1,66 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import java.util.ArrayList; +import java.util.List; +import lombok.experimental.UtilityClass; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilder; + +@UtilityClass +public class GraphLookupUtils { + public static final String GRAPH_LOOKUP_PREFIX = "gl_"; + public static final String SRC_FIELD_SUFFIX = GRAPH_LOOKUP_PREFIX + "src_"; + public static final String HIER_FIELD_SUFFIX = GRAPH_LOOKUP_PREFIX + "hier_"; + public static final String ANCHOR_FROM_ALIAS = GRAPH_LOOKUP_PREFIX + "anchor_from"; + public static final String ANCHOR_TO_ALIAS = GRAPH_LOOKUP_PREFIX + "anchor_to"; + public static final String DEPTH_FIELD = GRAPH_LOOKUP_PREFIX + "depth"; + public static final String RECURSIVE_FROM_ALIAS = GRAPH_LOOKUP_PREFIX + "rec_from"; + public static final String RECURSIVE_TO_ALIAS = GRAPH_LOOKUP_PREFIX + "rec_to"; + public static final String RECURSIVE_TABLE_NAME = GRAPH_LOOKUP_PREFIX + "recursive"; + + public List createAliases(List fields) { + List aliases = new ArrayList<>(); + for (String field : fields) { + aliases.add(SRC_FIELD_SUFFIX + field); + } + for (String field : fields) { + aliases.add(HIER_FIELD_SUFFIX + field); + } + aliases.add(DEPTH_FIELD); + return aliases; + } + + public List createAnchorProjections(RelBuilder builder, List fields) { + List projections = new ArrayList<>(); + for (String field : fields) { + projections.add(builder.field(ANCHOR_FROM_ALIAS, field)); + } + for (String field : fields) { + projections.add(builder.field(ANCHOR_TO_ALIAS, field)); + } + projections.add(builder.literal(1)); + return projections; + } + + public List createRecursiveProjections(RelBuilder builder, List fields) { + List projections = new ArrayList<>(); + for (String field : fields) { + projections.add(builder.field(RECURSIVE_FROM_ALIAS, SRC_FIELD_SUFFIX + field)); + } + for (String field : fields) { + projections.add(builder.field(RECURSIVE_TO_ALIAS, field)); + } + projections.add( + builder.call( + SqlStdOperatorTable.PLUS, + builder.field(RECURSIVE_FROM_ALIAS, DEPTH_FIELD), + builder.literal(1))); + return projections; + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 17d99fb4fbb..8dfe963081c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -223,7 +223,7 @@ public static ExprType convertSqlTypeNameToExprType(SqlTypeName sqlTypeName) { case BIGINT -> LONG; case FLOAT, REAL -> FLOAT; case DOUBLE, DECIMAL -> DOUBLE; // TODO the decimal is only used for literal - case CHAR, VARCHAR -> STRING; + case CHAR, VARCHAR, MULTISET -> STRING; // call toString() for MULTISET case BOOLEAN -> BOOLEAN; case DATE -> DATE; case TIME, TIME_TZ, TIME_WITH_LOCAL_TIME_ZONE -> TIME; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 6fd3927fbfb..db19af75294 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -19,6 +19,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import org.apache.calcite.avatica.util.StructImpl; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; @@ -235,6 +236,9 @@ private static Object processValue(Object value) { } return convertedMap; } + if (value instanceof StructImpl) { + return ((StructImpl) value).toString(); + } if (value instanceof List) { List list = (List) value; List convertedList = new ArrayList<>(); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 71162e81bd8..a27b2cb0c85 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -51,6 +51,11 @@ TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; ADDTOTALS: 'ADDTOTALS'; ADDCOLTOTALS: 'ADDCOLTOTALS'; +GRAPHLOOKUP: 'GRAPHLOOKUP'; +START_WITH: 'STARTWITH'; +CONNECT_FROM: 'CONNECTFROM'; +CONNECT_TO: 'CONNECTTO'; +MAX_DEPTH: 'MAXDEPTH'; ROW: 'ROW'; COL: 'COL'; EXPAND: 'EXPAND'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 7045796a03c..0eef361375d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -88,6 +88,7 @@ commands | rexCommand | appendPipeCommand | replaceCommand + | graphLookupCommand ; commandName @@ -131,6 +132,7 @@ commandName | REX | APPENDPIPE | REPLACE + | GRAPHLOOKUP ; searchCommand @@ -607,6 +609,17 @@ addcoltotalsOption | (LABELFIELD EQUAL stringLiteral) ; +graphLookupCommand + : GRAPHLOOKUP graphLookupOption* AS outputField = fieldExpression + ; + +graphLookupOption + : (CONNECT_FROM EQUAL fieldExpression) + | (CONNECT_TO EQUAL fieldExpression) + | (MAX_DEPTH EQUAL integerLiteral) + | (START_WITH EQUAL valueExpression) + ; + // clauses fromClause : SOURCE EQUAL tableOrSubqueryClause @@ -1657,5 +1670,8 @@ searchableKeyWord | FIELDNAME | ROW | COL + | CONNECT_FROM + | CONNECT_TO + | MAX_DEPTH ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 3f4f3049365..343089d4999 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -84,6 +84,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -1451,4 +1452,31 @@ public UnresolvedPlan visitAddcoltotalsCommand( java.util.Map options = cmdOptionsBuilder.build(); return new AddColTotals(fieldList, options); } + + @Override + public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCommandContext ctx) { + Field from = null; + Field to = null; + Literal maxDepth = Literal.ZERO; + UnresolvedExpression startWith = null; + for (OpenSearchPPLParser.GraphLookupOptionContext option : ctx.graphLookupOption()) { + if (option.CONNECT_FROM() != null) { + from = (Field) internalVisitExpression(option.fieldExpression()); + } + if (option.CONNECT_TO() != null) { + to = (Field) internalVisitExpression(option.fieldExpression()); + } + if (option.MAX_DEPTH() != null) { + maxDepth = (Literal) internalVisitExpression(option.integerLiteral()); + } + if (option.START_WITH() != null) { + startWith = internalVisitExpression(option.valueExpression()); + } + } + Field as = (Field) internalVisitExpression(ctx.outputField); + if (from == null || to == null) { + throw new SemanticCheckException("connectFrom and connectTo must be specified"); + } + return new GraphLookup(from, to, as, maxDepth, startWith); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java new file mode 100644 index 00000000000..548d7ab05e2 --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -0,0 +1,140 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +public class CalcitePPLGraphLookupTest extends CalcitePPLAbstractTest { + + public CalcitePPLGraphLookupTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Test + public void testGraphLookup() { + String ppl = + "source=employee | graphLookup connectFrom=reportsTo connectTo=name as reportingHierarchy"; + + String expectedLogical = + "LogicalAggregate(group=[{0, 1, 2}], reportingHierarchy=[COLLECT($3)])\n" + + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," + + " $f7=[ROW($3, $4, $5, $6)])\n" + + " LogicalRepeatUnion(all=[true])\n" + + " LogicalTableSpool(readType=[LAZY], writeType=[LAZY], table=[[gl_recursive]])\n" + + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," + + " gl_hier_id=[$3], gl_hier_name=[$4], gl_hier_reportsTo=[$5], gl_depth=[1])\n" + + " LogicalJoin(condition=[=($2, $4)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableSpool(readType=[LAZY], writeType=[LAZY], table=[[gl_recursive]])\n" + + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," + + " gl_hier_id=[$7], gl_hier_name=[$8], gl_hier_reportsTo=[$9], gl_depth=[+($6, 1)])\n" + + " LogicalJoin(condition=[=($5, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[gl_recursive]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + RelNode root = getRelNode(ppl); + verifyLogical(root, expectedLogical); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Add events table for graphLookup tests + ImmutableList rows = + ImmutableList.of( + new Object[] {1, "Dev", null}, + new Object[] {2, "Eliot", "Dev"}, + new Object[] {3, "Ron", "Eliot"}, + new Object[] {4, "Andrew", "Eliot"}, + new Object[] {5, "Asya", "Ron"}, + new Object[] {6, "Dan", "Andrew"}); + schema.add("employee", new EmployeeTable(rows)); + + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @RequiredArgsConstructor + public static class EmployeeTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("id", SqlTypeName.INTEGER) + .nullable(false) + .add("name", SqlTypeName.VARCHAR) + .nullable(false) + .add("reportsTo", SqlTypeName.VARCHAR) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 9e1cfe05a4b..dbf33d38e86 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -77,6 +77,7 @@ import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.Chart; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -1643,4 +1644,40 @@ public void testMvmapWithNonFieldFirstArgThrowsException() { () -> plan("source=t | eval result = mvmap(123, 123 * 10)")) .getMessage()); } + + @Test + public void testGraphLookupCommand() { + assertEqual( + "source=t | graphLookup connectFrom=name connectTo=reportTo maxDepth=3 as" + + " reportingHierarchy", + GraphLookup.builder() + .child(relation("t")) + .from(field("name")) + .to(field("reportTo")) + .as(field("reportingHierarchy")) + .maxDepth(intLiteral(3)) + .startWith(null) + .build()); + assertEqual( + "source=t | graphLookup connectFrom=name connectTo=reportTo startWith='hello' as" + + " reportingHierarchy", + GraphLookup.builder() + .child(relation("t")) + .from(field("name")) + .to(field("reportTo")) + .as(field("reportingHierarchy")) + .maxDepth(intLiteral(0)) + .startWith(stringLiteral("hello")) + .build()); + + assertThrows( + SyntaxCheckException.class, + () -> plan("| graphLookup connectTo=reportTo startWith='hello' as reportingHierarchy")); + assertThrows( + SyntaxCheckException.class, + () -> plan("| graphLookup connectFrom=name connectTo=reportTo startWith='hello'")); + assertThrows( + SyntaxCheckException.class, + () -> plan("| graphLookup connectFrom=name as reportingHierarchy")); + } } From 8d591e1a9ea8565742c83150b5a1196317baee17 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 30 Jan 2026 12:16:48 +0800 Subject: [PATCH 02/23] Implement graph lookup RelNode Signed-off-by: Heng Qian --- .../ast/analysis/FieldResolutionVisitor.java | 7 + .../opensearch/sql/ast/tree/GraphLookup.java | 40 +- .../sql/calcite/CalciteRelNodeVisitor.java | 101 ++--- .../sql/calcite/plan/rel/GraphLookup.java | 139 +++++++ .../calcite/plan/rel/LogicalGraphLookup.java | 114 ++++++ .../sql/calcite/utils/GraphLookupUtils.java | 66 ---- .../function/BuiltinFunctionName.java | 5 +- .../executor/OpenSearchExecutionEngine.java | 3 + .../functions/GraphLookupBfsFunction.java | 80 ++++ .../functions/GraphLookupFunction.java | 240 ++++++++++++ .../rules/EnumerableGraphLookupRule.java | 103 +++++ .../planner/rules/OpenSearchIndexRules.java | 8 +- .../opensearch/request/PredicateAnalyzer.java | 8 +- .../opensearch/storage/OpenSearchIndex.java | 2 +- .../scan/CalciteEnumerableGraphLookup.java | 365 ++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 8 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 18 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 49 ++- .../calcite/CalcitePPLGraphLookupTest.java | 67 ++-- .../sql/ppl/parser/AstBuilderTest.java | 64 ++- 20 files changed, 1287 insertions(+), 200 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java create mode 100644 core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java delete mode 100644 core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java diff --git a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java index a6f6671084a..a44f0bca41c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java @@ -40,6 +40,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; @@ -529,6 +530,12 @@ public Node visitLookup(Lookup node, FieldResolutionContext context) { throw new IllegalArgumentException("Lookup command cannot be used together with spath command"); } + @Override + public Node visitGraphLookup(GraphLookup node, FieldResolutionContext context) { + throw new IllegalArgumentException( + "GraphLookup command cannot be used together with spath command"); + } + @Override public Node visitValues(Values node, FieldResolutionContext context) { // do nothing diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index 53e1a6d2654..d39d5a665ae 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -18,8 +18,13 @@ import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.UnresolvedExpression; +/** + * AST node for graphLookup command. Performs BFS graph traversal on a lookup table. + * + *

Example: source=employees | graphLookup employees connectFromField=manager connectToField=name + * maxDepth=3 depthField=level direction=uni as hierarchy + */ @Getter @Setter @ToString @@ -28,12 +33,37 @@ @AllArgsConstructor @Builder(toBuilder = true) public class GraphLookup extends UnresolvedPlan { - private final Field from; - private final Field to; + /** Direction mode for graph traversal. */ + public enum Direction { + /** Unidirectional - traverse edges in one direction only. */ + UNI, + /** Bidirectional - traverse edges in both directions. */ + BI + } + + /** Target table for graph traversal lookup. */ + private final UnresolvedPlan fromTable; + + /** Field in sourceTable to start with. */ + private final Field startWith; + + /** Field in fromTable that represents the outgoing edge. */ + private final Field connectFromField; + + /** Field in input/fromTable to match against for traversal. */ + private final Field connectToField; + + /** Output field name for collected traversal results. */ private final Field as; - // zero means no limit + + /** Maximum traversal depth. Zero means no limit. */ private final Literal maxDepth; - private @Nullable final UnresolvedExpression startWith; + + /** Optional field name to include recursion depth in output. */ + private @Nullable final Field depthField; + + /** Direction mode: UNI (default) or BIO for bidirectional. */ + private final Direction direction; private UnresolvedPlan child; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index bc8bbd4eed3..5305602b79c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -14,14 +14,6 @@ import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.ASC; import static org.opensearch.sql.ast.tree.Sort.SortOrder.DESC; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.ANCHOR_FROM_ALIAS; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.ANCHOR_TO_ALIAS; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.DEPTH_FIELD; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.HIER_FIELD_SUFFIX; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_FROM_ALIAS; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_TABLE_NAME; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.RECURSIVE_TO_ALIAS; -import static org.opensearch.sql.calcite.utils.GraphLookupUtils.SRC_FIELD_SUFFIX; import static org.opensearch.sql.calcite.plan.DynamicFieldsConstants.DYNAMIC_FIELDS_MAP; import static org.opensearch.sql.calcite.plan.rule.PPLDedupConvertRule.buildDedupNotNull; import static org.opensearch.sql.calcite.plan.rule.PPLDedupConvertRule.buildDedupOrNull; @@ -48,7 +40,6 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -128,6 +119,7 @@ import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; +import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -161,10 +153,10 @@ import org.opensearch.sql.ast.tree.Window; import org.opensearch.sql.calcite.plan.AliasFieldsWrappable; import org.opensearch.sql.calcite.plan.OpenSearchConstants; +import org.opensearch.sql.calcite.plan.rel.LogicalGraphLookup; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit; import org.opensearch.sql.calcite.plan.rel.LogicalSystemLimit.SystemLimitType; import org.opensearch.sql.calcite.utils.BinUtils; -import org.opensearch.sql.calcite.utils.GraphLookupUtils; import org.opensearch.sql.calcite.utils.JoinAndLookupUtils; import org.opensearch.sql.calcite.utils.JoinAndLookupUtils.OverwriteMode; import org.opensearch.sql.calcite.utils.PPLHintUtils; @@ -2586,70 +2578,43 @@ public RelNode visitAddColTotals(AddColTotals node, CalcitePlanContext context) @Override public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { + // 1. Visit source (child) table visitChildren(node, context); RelBuilder builder = context.relBuilder; + RelNode sourceTable = builder.build(); - List allFields = - builder.peek().getRowType().getFieldNames().stream() - .filter(Predicate.not(OpenSearchConstants.METADATAFIELD_TYPE_MAP::containsKey)) - .toList(); - List aliases = GraphLookupUtils.createAliases(allFields); - Literal maxDepth = node.getMaxDepth(); - String connectFromFieldName = node.getFrom().getField().toString(); - String connectToFieldName = node.getTo().getField().toString(); - RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(maxDepth, context); + // 2. Extract parameters + String startWith = node.getStartWith().getField().toString(); + String connectFromFieldName = node.getConnectFromField().getField().toString(); + String connectToFieldName = node.getConnectToField().getField().toString(); + String outputFieldName = node.getAs().getField().toString(); + String depthFieldName = node.getDepthField().toString(); + boolean bidirectional = node.getDirection() == Direction.BI; + + RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(node.getMaxDepth(), context); int maxDepthValue = maxDepthNode.getValueAs(Integer.class); maxDepthValue = maxDepthValue <= 0 ? -1 : maxDepthValue; - UnresolvedExpression startWith = node.getStartWith(); - if (startWith != null) { - RexNode startWithNode = rexVisitor.analyze(startWith, context); - } - String outputFiledName = node.getAs().getField().toString(); - - // 1. build anchor query - RelNode self = builder.peek(); - builder.as(ANCHOR_FROM_ALIAS); - builder.push(self).as(ANCHOR_TO_ALIAS); - builder - .join( - JoinRelType.INNER, - builder.equals( - builder.field(2, ANCHOR_FROM_ALIAS, connectFromFieldName), - builder.field(2, ANCHOR_TO_ALIAS, connectToFieldName))) - .project(GraphLookupUtils.createAnchorProjections(builder, allFields), aliases) - .as("anchor"); - - // 2. recursive query - builder.transientScan(RECURSIVE_TABLE_NAME).as(RECURSIVE_FROM_ALIAS); - builder.push(self).as(RECURSIVE_TO_ALIAS); - String hierConnectFromField = HIER_FIELD_SUFFIX + connectFromFieldName; - builder - .join( - JoinRelType.INNER, - builder.equals( - builder.field(2, RECURSIVE_FROM_ALIAS, hierConnectFromField), - builder.field(2, RECURSIVE_TO_ALIAS, connectToFieldName))) - .project(GraphLookupUtils.createRecursiveProjections(builder, allFields), aliases); - - // 3. combine RepeatUnion - builder.repeatUnion(RECURSIVE_TABLE_NAME, true, maxDepthValue); - - // 4. collect aggregation - List groupByFields = new ArrayList<>(); - for (String field : allFields) { - groupByFields.add(builder.field(SRC_FIELD_SUFFIX + field)); - } - List collectFields = new ArrayList<>(); - for (String field : allFields) { - collectFields.add(builder.field(HIER_FIELD_SUFFIX + field)); - } - collectFields.add(builder.field(DEPTH_FIELD)); - - RexNode rowExpr = builder.call(SqlStdOperatorTable.ROW, collectFields); - builder.aggregate( - builder.groupKey(groupByFields), - builder.aggregateCall(SqlStdOperatorTable.COLLECT, rowExpr).as(outputFiledName)); + // 3. Visit and materialize lookup table + analyze(node.getFromTable(), context); + tryToRemoveMetaFields(context, true); + RelNode lookupTable = builder.build(); + + // 4. Create LogicalGraphLookup RelNode + // The conversion rule will extract the OpenSearchIndex from the lookup table + RelNode graphLookup = + LogicalGraphLookup.create( + sourceTable, + lookupTable, + startWith, + connectFromFieldName, + connectToFieldName, + outputFieldName, + depthFieldName, + maxDepthValue, + bidirectional); + + builder.push(graphLookup); return builder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java new file mode 100644 index 00000000000..a99907eb80f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -0,0 +1,139 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan.rel; + +import java.util.List; +import lombok.Getter; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.BiRel; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelWriter; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.type.SqlTypeName; + +/** + * Abstract RelNode for graphLookup command. + * + *

Has two inputs: + * + *

    + *
  • source: source table (rows to start BFS from) + *
  • lookup: lookup table (graph edges to traverse) + *
+ * + *

At execution time, performs BFS by dynamically querying OpenSearch with filter pushdown + * instead of loading all lookup data into memory. + * + *

This is a storage-agnostic logical node. Storage-specific implementations (e.g., for + * OpenSearch) should extract the necessary index information from the lookup RelNode during + * conversion to the physical plan. + */ +@Getter +public abstract class GraphLookup extends BiRel { + + // TODO: use RexInputRef instead of String for there fields + protected final String startWith; // Field in source table (start entities) + protected final String connectFromField; // Field in lookup table (edge source) + protected final String connectToField; // Field in lookup table (edge target) + protected final String outputField; // Name of output array field + protected final String depthField; // Name of output array field + protected final int maxDepth; // -1 = unlimited + protected final boolean bidirectional; + + /** + * Creates a LogicalGraphLookup. + * + * @param cluster Cluster + * @param traitSet Trait set + * @param source Source table RelNode + * @param lookup Lookup table RelNode + * @param startWith Field name for start entities + * @param connectFromField Field name for outgoing edges + * @param connectToField Field name for incoming edges + * @param outputField Name of the output array field + * @param depthField Name of the depth field + * @param maxDepth Maximum traversal depth (-1 for unlimited) + * @param bidirectional Whether to traverse edges in both directions + */ + protected GraphLookup( + RelOptCluster cluster, + RelTraitSet traitSet, + RelNode source, + RelNode lookup, + String startWith, + String connectFromField, + String connectToField, + String outputField, + String depthField, + int maxDepth, + boolean bidirectional) { + super(cluster, traitSet, source, lookup); + this.startWith = startWith; + this.connectFromField = connectFromField; + this.connectToField = connectToField; + this.outputField = outputField; + this.depthField = depthField; + this.maxDepth = maxDepth; + this.bidirectional = bidirectional; + } + + /** Returns the source table RelNode. */ + public RelNode getSource() { + return left; + } + + /** Returns the lookup table RelNode. */ + public RelNode getLookup() { + return right; + } + + @Override + public abstract RelNode copy(RelTraitSet traitSet, List inputs); + + @Override + protected RelDataType deriveRowType() { + // Output = source fields + output array field + RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder(); + + // Add all source fields + for (var field : getSource().getRowType().getFieldList()) { + builder.add(field); + } + + // Add output field (ARRAY type containing lookup row struct) + RelDataType lookupRowType = getLookup().getRowType(); + if (this.depthField != null) { + final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); + lookupBuilder.addAll(lookupRowType.getFieldList()); + RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); + lookupBuilder.add(this.depthField, depthType); + lookupRowType = lookupBuilder.build(); + } + RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); + builder.add(outputField, arrayType); + + return builder.build(); + } + + @Override + public double estimateRowCount(RelMetadataQuery mq) { + return getSource().estimateRowCount(mq); + } + + @Override + public RelWriter explainTerms(RelWriter pw) { + return super.explainTerms(pw) + .item("connectFromField", connectFromField) + .item("connectToField", connectToField) + .item("outputField", outputField) + .item("depthField", depthField) + .item("maxDepth", maxDepth) + .item("bidirectional", bidirectional); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java new file mode 100644 index 00000000000..9ca386b618f --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -0,0 +1,114 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.plan.rel; + +import java.util.List; +import lombok.Getter; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; + +/** Logical RelNode for graphLookup command. */ +@Getter +public class LogicalGraphLookup extends GraphLookup { + + /** + * Creates a LogicalGraphLookup. + * + * @param cluster Cluster + * @param traitSet Trait set + * @param source Source table RelNode + * @param lookup Lookup table RelNode + * @param connectFromField Field name for outgoing edges + * @param connectToField Field name for incoming edges + * @param outputField Name of the output array field + * @param depthField Name of the depth field + * @param maxDepth Maximum traversal depth (-1 for unlimited) + * @param bidirectional Whether to traverse edges in both directions + */ + protected LogicalGraphLookup( + RelOptCluster cluster, + RelTraitSet traitSet, + RelNode source, + RelNode lookup, + String startWith, + String connectFromField, + String connectToField, + String outputField, + String depthField, + int maxDepth, + boolean bidirectional) { + super( + cluster, + traitSet, + source, + lookup, + startWith, + connectFromField, + connectToField, + outputField, + depthField, + maxDepth, + bidirectional); + } + + /** + * Creates a LogicalGraphLookup with Convention.NONE. + * + * @param source Source table RelNode + * @param lookup Lookup table RelNode + * @param startWith Field name for start with entities + * @param connectFromField Field name for outgoing edges + * @param connectToField Field name for incoming edges + * @param outputField Name of the output array field + * @param maxDepth Maximum traversal depth (-1 for unlimited) + * @param bidirectional Whether to traverse edges in both directions + * @param depthField Named of the output depth field + * @return A new LogicalGraphLookup instance + */ + public static LogicalGraphLookup create( + RelNode source, + RelNode lookup, + String startWith, + String connectFromField, + String connectToField, + String outputField, + String depthField, + int maxDepth, + boolean bidirectional) { + RelOptCluster cluster = source.getCluster(); + RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); + return new LogicalGraphLookup( + cluster, + traitSet, + source, + lookup, + startWith, + connectFromField, + connectToField, + outputField, + depthField, + maxDepth, + bidirectional); + } + + @Override + public RelNode copy(RelTraitSet traitSet, List inputs) { + return new LogicalGraphLookup( + getCluster(), + traitSet, + inputs.get(0), + inputs.get(1), + startWith, + connectFromField, + connectToField, + outputField, + depthField, + maxDepth, + bidirectional); + } +} diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java deleted file mode 100644 index e71d721811a..00000000000 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/GraphLookupUtils.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.calcite.utils; - -import java.util.ArrayList; -import java.util.List; -import lombok.experimental.UtilityClass; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.sql.fun.SqlStdOperatorTable; -import org.apache.calcite.tools.RelBuilder; - -@UtilityClass -public class GraphLookupUtils { - public static final String GRAPH_LOOKUP_PREFIX = "gl_"; - public static final String SRC_FIELD_SUFFIX = GRAPH_LOOKUP_PREFIX + "src_"; - public static final String HIER_FIELD_SUFFIX = GRAPH_LOOKUP_PREFIX + "hier_"; - public static final String ANCHOR_FROM_ALIAS = GRAPH_LOOKUP_PREFIX + "anchor_from"; - public static final String ANCHOR_TO_ALIAS = GRAPH_LOOKUP_PREFIX + "anchor_to"; - public static final String DEPTH_FIELD = GRAPH_LOOKUP_PREFIX + "depth"; - public static final String RECURSIVE_FROM_ALIAS = GRAPH_LOOKUP_PREFIX + "rec_from"; - public static final String RECURSIVE_TO_ALIAS = GRAPH_LOOKUP_PREFIX + "rec_to"; - public static final String RECURSIVE_TABLE_NAME = GRAPH_LOOKUP_PREFIX + "recursive"; - - public List createAliases(List fields) { - List aliases = new ArrayList<>(); - for (String field : fields) { - aliases.add(SRC_FIELD_SUFFIX + field); - } - for (String field : fields) { - aliases.add(HIER_FIELD_SUFFIX + field); - } - aliases.add(DEPTH_FIELD); - return aliases; - } - - public List createAnchorProjections(RelBuilder builder, List fields) { - List projections = new ArrayList<>(); - for (String field : fields) { - projections.add(builder.field(ANCHOR_FROM_ALIAS, field)); - } - for (String field : fields) { - projections.add(builder.field(ANCHOR_TO_ALIAS, field)); - } - projections.add(builder.literal(1)); - return projections; - } - - public List createRecursiveProjections(RelBuilder builder, List fields) { - List projections = new ArrayList<>(); - for (String field : fields) { - projections.add(builder.field(RECURSIVE_FROM_ALIAS, SRC_FIELD_SUFFIX + field)); - } - for (String field : fields) { - projections.add(builder.field(RECURSIVE_TO_ALIAS, field)); - } - projections.add( - builder.call( - SqlStdOperatorTable.PLUS, - builder.field(RECURSIVE_FROM_ALIAS, DEPTH_FIELD), - builder.literal(1))); - return projections; - } -} diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 37052ec858c..fa34041f703 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -351,7 +351,10 @@ public enum BuiltinFunctionName { INTERNAL_PARSE(FunctionName.of("parse"), true), INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true), INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true), - INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true); + INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true), + + /** Graph functions */ + GRAPH_LOOKUP(FunctionName.of("graph_lookup"), true); private final FunctionName name; private boolean isInternal; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 47d10735ae3..58d797f4bf9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -334,6 +334,9 @@ private void registerOpenSearchFunctions() { BuiltinFunctionName.DISTINCT_COUNT_APPROX, approxDistinctCountFunction); OperatorTable.addOperator( BuiltinFunctionName.DISTINCT_COUNT_APPROX.name(), approxDistinctCountFunction); + + // Note: GraphLookup is now implemented as a custom RelNode (LogicalGraphLookup) + // instead of a UDF, so no registration is needed here. } /** diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java new file mode 100644 index 00000000000..1a3b1fa59cc --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java @@ -0,0 +1,80 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.functions; + +import java.util.List; +import org.apache.calcite.adapter.enumerable.NotNullImplementor; +import org.apache.calcite.adapter.enumerable.NullPolicy; +import org.apache.calcite.adapter.enumerable.RexToLixTranslator; +import org.apache.calcite.linq4j.tree.Expression; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.linq4j.tree.Types; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexSubQuery; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.sql.expression.function.ImplementorUDF; +import org.opensearch.sql.expression.function.UDFOperandMetadata; + +/** + * UDF wrapper for graph traversal BFS function. + * + *

Parameters: + * + *

    + *
  • startValue: Object - starting value for BFS + *
  • lookupData: List - collected rows from lookup table + *
  • connectFromIdx: int - index of connectFrom field + *
  • connectToIdx: int - index of connectTo field + *
  • maxDepth: int - max traversal depth (-1 = unlimited) + *
  • bidirectional: boolean - traverse both directions + *
  • includeDepth: boolean - include depth in output + *
+ * + *

Returns: List - array of [row_fields..., depth?] + */ +public class GraphLookupBfsFunction extends ImplementorUDF { + + public GraphLookupBfsFunction() { + super(new GraphLookupBfsImplementor(), NullPolicy.ANY); + } + + @Override + public SqlReturnTypeInference getReturnTypeInference() { + // Return ARRAY - actual struct type depends on lookup table schema + return opBinding -> { + var typeFactory = opBinding.getTypeFactory(); + var anyType = typeFactory.createSqlType(SqlTypeName.ANY); + return typeFactory.createArrayType(anyType, -1); + }; + } + + @Override + public UDFOperandMetadata getOperandMetadata() { + return null; // Accept any operand types + } + + private static class GraphLookupBfsImplementor implements NotNullImplementor { + @Override + public Expression implement( + RexToLixTranslator translator, RexCall call, List translatedOperands) { + // Args: startValue, lookupData, connectFromIdx, connectToIdx, maxDepth, bidirectional, + // includeDepth + return Expressions.call( + Types.lookupMethod( + GraphLookupFunction.class, + "executeWithDynamicLookup", + Object.class, // startValue + RexSubQuery.class, // lookupData + int.class, // connectFromIdx + int.class, // connectToIdx + int.class, // maxDepth + boolean.class, // bidirectional + boolean.class), // includeDepth + translatedOperands); + } + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java new file mode 100644 index 00000000000..a38f9c1d739 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java @@ -0,0 +1,240 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.functions; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Queue; +import java.util.Set; +import org.apache.calcite.rex.RexSubQuery; + +/** + * BFS-based graph traversal function for the graphLookup command. + * + *

This function performs breadth-first search traversal on a graph represented by rows in a + * lookup table. It follows edges from starting nodes and collects all reachable nodes up to a + * specified depth. + * + *

The algorithm is inspired by MongoDB's $graphLookup operator. + */ +public class GraphLookupFunction { + + /** Internal class to track nodes during BFS with their depth. */ + public record NodeWithDepth(Object value, int depth) { + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NodeWithDepth that = (NodeWithDepth) o; + return Objects.equals(value, that.value); + } + + @Override + public int hashCode() { + return Objects.hash(value); + } + } + + /** Result of a single node traversal, including the row data and traversal depth. */ + public record TraversalResult(Object[] row, int depth) {} + + /** + * Execute BFS graph traversal starting from a given value. + * + * @param startValue The starting value to begin traversal from + * @param lookupTableRows All rows from the lookup table + * @param connectFromFieldIndex Index of the field that represents outgoing edges (the field + * values we traverse FROM) + * @param connectToFieldIndex Index of the field that represents the target to match against (the + * field values we traverse TO) + * @param maxDepth Maximum traversal depth (-1 or 0 for unlimited) + * @param bidirectional If true, traverse edges in both directions + * @return List of traversal results containing row data and depth + */ + public static List execute( + Object startValue, + List lookupTableRows, + int connectFromFieldIndex, + int connectToFieldIndex, + int maxDepth, + boolean bidirectional) { + + if (startValue == null || lookupTableRows == null || lookupTableRows.isEmpty()) { + return List.of(); + } + + // Build adjacency index: connectToField value -> list of rows with matching connectFromField + // This creates edges: when we're at a node with connectFromField=X, we can traverse to nodes + // where connectToField=X + Map> forwardAdjacency = new HashMap<>(); + + // For bidirectional: also index reverse edges + // connectFromField value -> list of rows with matching connectToField + Map> reverseAdjacency = bidirectional ? new HashMap<>() : null; + + for (Object[] row : lookupTableRows) { + Object connectFromValue = row[connectFromFieldIndex]; + Object connectToValue = row[connectToFieldIndex]; + + // Forward edge: from connectFromValue, we can reach this row + if (connectFromValue != null) { + forwardAdjacency.computeIfAbsent(connectFromValue, k -> new ArrayList<>()).add(row); + } + + // Reverse edge (for bidirectional): from connectToValue, we can reach this row + if (bidirectional && connectToValue != null) { + reverseAdjacency.computeIfAbsent(connectToValue, k -> new ArrayList<>()).add(row); + } + } + + // BFS traversal + List results = new ArrayList<>(); + Set visited = new HashSet<>(); + Queue queue = new ArrayDeque<>(); + + // Start BFS from the starting value + queue.offer(new NodeWithDepth(startValue, 0)); + visited.add(startValue); + + while (!queue.isEmpty()) { + NodeWithDepth current = queue.poll(); + int currentDepth = current.depth(); + + // Check depth limit + if (maxDepth > 0 && currentDepth >= maxDepth) { + continue; + } + + // Get adjacent nodes via forward edges + List forwardNeighbors = forwardAdjacency.get(current.value()); + if (forwardNeighbors != null) { + for (Object[] neighborRow : forwardNeighbors) { + Object neighborKey = neighborRow[connectToFieldIndex]; + if (!visited.contains(neighborKey)) { + visited.add(neighborKey); + results.add(new TraversalResult(neighborRow, currentDepth + 1)); + queue.offer(new NodeWithDepth(neighborKey, currentDepth + 1)); + } + } + } + + // For bidirectional: also traverse reverse edges + if (bidirectional && reverseAdjacency != null) { + List reverseNeighbors = reverseAdjacency.get(current.value()); + if (reverseNeighbors != null) { + for (Object[] neighborRow : reverseNeighbors) { + Object neighborKey = neighborRow[connectFromFieldIndex]; + if (!visited.contains(neighborKey)) { + visited.add(neighborKey); + results.add(new TraversalResult(neighborRow, currentDepth + 1)); + queue.offer(new NodeWithDepth(neighborKey, currentDepth + 1)); + } + } + } + } + } + + return results; + } + + /** + * Convenience method to get the starting value from an input row. + * + * @param inputRow The input row + * @param connectToFieldIndex Index of the field in input that contains the starting value + * @return The starting value for traversal + */ + public static Object getStartValue(Object[] inputRow, int connectToFieldIndex) { + if (inputRow == null || connectToFieldIndex < 0 || connectToFieldIndex >= inputRow.length) { + return null; + } + return inputRow[connectToFieldIndex]; + } + + /** + * Convert traversal results to an array format suitable for aggregation. + * + * @param results List of traversal results + * @param includeDepth Whether to include depth information in the output + * @return Array of row arrays (with optional depth appended) + */ + public static Object[] toResultArray(List results, boolean includeDepth) { + if (results == null || results.isEmpty()) { + return new Object[0]; + } + + Object[] resultArray = new Object[results.size()]; + for (int i = 0; i < results.size(); i++) { + TraversalResult result = results.get(i); + if (includeDepth) { + // Append depth to the row + Object[] rowWithDepth = new Object[result.row().length + 1]; + System.arraycopy(result.row(), 0, rowWithDepth, 0, result.row().length); + rowWithDepth[result.row().length] = result.depth(); + resultArray[i] = rowWithDepth; + } else { + resultArray[i] = result.row(); + } + } + return resultArray; + } + + /** + * Entry point for UDF invocation. Converts List to Object[] and returns results. + * + * @param startValue Starting value for BFS traversal + * @param lookupTable Collected rows from lookup table + * @param connectFromIdx Index of connectFrom field in lookup rows + * @param connectToIdx Index of connectTo field in lookup rows + * @param maxDepth Maximum traversal depth (-1 = unlimited) + * @param bidirectional Whether to traverse edges in both directions + * @param includeDepth Whether to include depth in output rows + * @return List of result rows as Object arrays + */ + public static List executeWithDynamicLookup( + Object startValue, + RexSubQuery lookupTable, + int connectFromIdx, + int connectToIdx, + int maxDepth, + boolean bidirectional, + boolean includeDepth) { + + if (lookupTable == null) { + return List.of(); + } + + // Convert List to List + List rows = new ArrayList<>(); + for (Object item : List.of()) { + if (item instanceof Object[] arr) { + rows.add(arr); + } + } + + List results = + execute(startValue, rows, connectFromIdx, connectToIdx, maxDepth, bidirectional); + + // Convert to output format + List output = new ArrayList<>(); + for (TraversalResult result : results) { + if (includeDepth) { + Object[] rowWithDepth = new Object[result.row().length + 1]; + System.arraycopy(result.row(), 0, rowWithDepth, 0, result.row().length); + rowWithDepth[result.row().length] = result.depth(); + output.add(rowWithDepth); + } else { + output.add(result.row()); + } + } + return output; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java new file mode 100644 index 00000000000..558286d1af3 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java @@ -0,0 +1,103 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.planner.rules; + +import org.apache.calcite.adapter.enumerable.EnumerableConvention; +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterRule; +import org.opensearch.sql.calcite.plan.rel.LogicalGraphLookup; +import org.opensearch.sql.opensearch.storage.OpenSearchIndex; +import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan; +import org.opensearch.sql.opensearch.storage.scan.CalciteEnumerableGraphLookup; + +/** Rule to convert a {@link LogicalGraphLookup} to a {@link CalciteEnumerableGraphLookup}. */ +public class EnumerableGraphLookupRule extends ConverterRule { + + /** Default configuration. */ + public static final Config DEFAULT_CONFIG = + Config.INSTANCE + .as(Config.class) + .withConversion( + LogicalGraphLookup.class, + Convention.NONE, + EnumerableConvention.INSTANCE, + "EnumerableGraphLookupRule") + .withRuleFactory(EnumerableGraphLookupRule::new); + + /** Creates an EnumerableGraphLookupRule. */ + protected EnumerableGraphLookupRule(Config config) { + super(config); + } + + @Override + public boolean matches(RelOptRuleCall call) { + LogicalGraphLookup graphLookup = call.rel(0); + // Only match if we can extract the OpenSearchIndex from the lookup table + return extractOpenSearchIndex(graphLookup.getLookup()) != null; + } + + /** + * Recursively extracts OpenSearchIndex from a RelNode by traversing down to find the index scan. + * + * @param node The RelNode to extract from + * @return The OpenSearchIndex, or null if not found + */ + private static OpenSearchIndex extractOpenSearchIndex(RelNode node) { + if (node instanceof AbstractCalciteIndexScan scan) { + return scan.getOsIndex(); + } + if (node instanceof RelSubset subset) { + return extractOpenSearchIndex(subset.getOriginal()); + } + // Recursively check inputs + for (RelNode input : node.getInputs()) { + OpenSearchIndex index = extractOpenSearchIndex(input); + if (index != null) { + return index; + } + } + return null; + } + + @Override + public RelNode convert(RelNode rel) { + final LogicalGraphLookup graphLookup = (LogicalGraphLookup) rel; + + // Extract the OpenSearchIndex from the lookup table + OpenSearchIndex lookupIndex = extractOpenSearchIndex(graphLookup.getLookup()); + if (lookupIndex == null) { + throw new IllegalStateException("Cannot extract OpenSearchIndex from lookup table"); + } + + // Convert inputs to enumerable convention + RelTraitSet traitSet = graphLookup.getTraitSet().replace(EnumerableConvention.INSTANCE); + + RelNode convertedSource = + convert( + graphLookup.getSource(), + graphLookup.getSource().getTraitSet().replace(EnumerableConvention.INSTANCE)); + RelNode convertedLookup = + convert( + graphLookup.getLookup(), + graphLookup.getLookup().getTraitSet().replace(EnumerableConvention.INSTANCE)); + return new CalciteEnumerableGraphLookup( + graphLookup.getCluster(), + traitSet, + convertedSource, + convertedLookup, + graphLookup.getStartWith(), + graphLookup.getConnectFromField(), + graphLookup.getConnectToField(), + graphLookup.getOutputField(), + graphLookup.getDepthField(), + graphLookup.getMaxDepth(), + graphLookup.isBidirectional()); + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index db65bb51a80..0068f445ce7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -16,6 +16,8 @@ public class OpenSearchIndexRules { EnumerableSystemIndexScanRule.DEFAULT_CONFIG.toRule(); private static final RelOptRule NESTED_AGGREGATE_RULE = EnumerableNestedAggregateRule.DEFAULT_CONFIG.toRule(); + private static final RelOptRule GRAPH_LOOKUP_RULE = + EnumerableGraphLookupRule.DEFAULT_CONFIG.toRule(); // Rule that always pushes down relevance functions regardless of pushdown settings private static final RelevanceFunctionPushdownRule RELEVANCE_FUNCTION_RULE = RelevanceFunctionPushdownRule.Config.DEFAULT.toRule(); @@ -23,7 +25,11 @@ public class OpenSearchIndexRules { /** The rules will apply whatever the pushdown setting is. */ public static final List OPEN_SEARCH_NON_PUSHDOWN_RULES = ImmutableList.of( - INDEX_SCAN_RULE, SYSTEM_INDEX_SCAN_RULE, NESTED_AGGREGATE_RULE, RELEVANCE_FUNCTION_RULE); + INDEX_SCAN_RULE, + SYSTEM_INDEX_SCAN_RULE, + NESTED_AGGREGATE_RULE, + GRAPH_LOOKUP_RULE, + RELEVANCE_FUNCTION_RULE); private static final ProjectIndexScanRule PROJECT_INDEX_SCAN = ProjectIndexScanRule.Config.DEFAULT.toRule(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 355262b2d6a..50f782cd154 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -1057,7 +1057,7 @@ QueryExpression isTrue() { throw new PredicateAnalyzerException("isTrue cannot be applied to " + this.getClass()); } - QueryExpression in(LiteralExpression literal) { + public QueryExpression in(LiteralExpression literal) { throw new PredicateAnalyzerException("in cannot be applied to " + this.getClass()); } @@ -1065,7 +1065,7 @@ QueryExpression notIn(LiteralExpression literal) { throw new PredicateAnalyzerException("notIn cannot be applied to " + this.getClass()); } - static QueryExpression create(TerminalExpression expression) { + public static QueryExpression create(TerminalExpression expression) { if (expression instanceof CastExpression) { expression = CastExpression.unpack(expression); } @@ -1673,11 +1673,11 @@ public String getReferenceForTermQuery() { } /** Literal like {@code 'foo' or 42 or true} etc. */ - static final class LiteralExpression implements TerminalExpression { + public static final class LiteralExpression implements TerminalExpression { final RexLiteral literal; - LiteralExpression(RexLiteral literal) { + public LiteralExpression(RexLiteral literal) { this.literal = literal; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java index d7539312cd1..3350c00fb0c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/OpenSearchIndex.java @@ -80,7 +80,7 @@ public class OpenSearchIndex extends AbstractOpenSearchTable { @Getter private final Settings settings; /** {@link OpenSearchRequest.IndexName}. */ - private final OpenSearchRequest.IndexName indexName; + @Getter private final OpenSearchRequest.IndexName indexName; /** The cached mapping of field and type in index. */ private Map cachedFieldOpenSearchTypes = null; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java new file mode 100644 index 00000000000..4ee4b78a8ca --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -0,0 +1,365 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.scan; + +import static org.opensearch.index.query.QueryBuilders.termsQuery; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import lombok.Getter; +import org.apache.calcite.adapter.enumerable.EnumerableRel; +import org.apache.calcite.adapter.enumerable.EnumerableRelImplementor; +import org.apache.calcite.adapter.enumerable.PhysType; +import org.apache.calcite.adapter.enumerable.PhysTypeImpl; +import org.apache.calcite.linq4j.AbstractEnumerable; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Enumerator; +import org.apache.calcite.linq4j.tree.Blocks; +import org.apache.calcite.linq4j.tree.Expressions; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptCost; +import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.sql.calcite.plan.Scannable; +import org.opensearch.sql.calcite.plan.rel.GraphLookup; +import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; +import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; +import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; +import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; + +/** + * Enumerable implementation for graphLookup command. + * + *

Performs BFS graph traversal by dynamically querying OpenSearch with filter pushdown instead + * of loading all lookup data into memory. For each source row, it executes BFS queries to find all + * connected nodes in the graph. + */ +@Getter +public class CalciteEnumerableGraphLookup extends GraphLookup implements EnumerableRel, Scannable { + + /** + * Creates a CalciteEnumerableGraphLookup. + * + * @param cluster Cluster + * @param traitSet Trait set (must include EnumerableConvention) + * @param source Source table RelNode + * @param lookup Lookup table RelNode // * @param lookupIndex OpenSearchIndex for the lookup table + * (extracted from lookup RelNode) + * @param connectFromField Field name for outgoing edges + * @param connectToField Field name for incoming edges + * @param outputField Name of the output array field + * @param depthField Name of the depth field + * @param maxDepth Maximum traversal depth (-1 for unlimited) + * @param bidirectional Whether to traverse edges in both directions + */ + public CalciteEnumerableGraphLookup( + RelOptCluster cluster, + RelTraitSet traitSet, + RelNode source, + RelNode lookup, + String startWith, + String connectFromField, + String connectToField, + String outputField, + String depthField, + int maxDepth, + boolean bidirectional) { + super( + cluster, + traitSet, + source, + lookup, + startWith, + connectFromField, + connectToField, + outputField, + depthField, + maxDepth, + bidirectional); + } + + @Override + public RelNode copy(RelTraitSet traitSet, List inputs) { + return new CalciteEnumerableGraphLookup( + getCluster(), + traitSet, + inputs.get(0), + inputs.get(1), + startWith, + connectFromField, + connectToField, + outputField, + depthField, + maxDepth, + bidirectional); + } + + @Override + public @Nullable RelOptCost computeSelfCost(RelOptPlanner planner, RelMetadataQuery mq) { + // TODO: make it more accurate + return super.computeSelfCost(planner, mq); + } + + @Override + public Result implement(EnumerableRelImplementor implementor, Prefer pref) { + PhysType physType = + PhysTypeImpl.of( + implementor.getTypeFactory(), + OpenSearchRelOptUtil.replaceDot(getCluster().getTypeFactory(), getRowType()), + pref.preferArray()); + + var scanOperator = implementor.stash(this, CalciteEnumerableGraphLookup.class); + return implementor.result(physType, Blocks.toBlock(Expressions.call(scanOperator, "scan"))); + } + + @Override + public Enumerable<@Nullable Object> scan() { + return new GraphLookupEnumerable(this); + } + + /** Enumerable implementation that performs BFS traversal for each source row. */ + private static class GraphLookupEnumerable extends AbstractEnumerable<@Nullable Object> { + + private final CalciteEnumerableGraphLookup graphLookup; + + GraphLookupEnumerable(CalciteEnumerableGraphLookup graphLookup) { + this.graphLookup = graphLookup; + } + + @Override + public Enumerator<@Nullable Object> enumerator() { + return new GraphLookupEnumerator(graphLookup); + } + } + + /** Enumerator that performs BFS for each source row. */ + private static class GraphLookupEnumerator implements Enumerator<@Nullable Object> { + + private final CalciteEnumerableGraphLookup graphLookup; + private final CalciteEnumerableIndexScan lookupScan; + private final Enumerator<@Nullable Object> sourceEnumerator; + private final List lookupFields; + private final int startWithIndex; + private final int connectFromIdx; + private final int connectToIdx; + + private Object[] current = null; + + @SuppressWarnings("unchecked") + GraphLookupEnumerator(CalciteEnumerableGraphLookup graphLookup) { + this.graphLookup = graphLookup; + this.lookupScan = (CalciteEnumerableIndexScan) graphLookup.getLookup(); + + // Get the source enumerator + if (graphLookup.getSource() instanceof Scannable scannable) { + Enumerable sourceEnum = scannable.scan(); + this.sourceEnumerator = (Enumerator<@Nullable Object>) sourceEnum.enumerator(); + } else { + throw new IllegalStateException( + "Source must be Scannable, got: " + graphLookup.getSource().getClass()); + } + + List sourceFields = graphLookup.getSource().getRowType().getFieldNames(); + this.lookupFields = graphLookup.getLookup().getRowType().getFieldNames(); + this.startWithIndex = sourceFields.indexOf(graphLookup.getStartWith()); + this.connectFromIdx = lookupFields.indexOf(graphLookup.connectFromField); + this.connectToIdx = lookupFields.indexOf(graphLookup.connectToField); + } + + @Override + public Object current() { + // source fields + output array + return current; + } + + // TODO: currently we perform BFS for each single row. + // We can improve this by performing BFS for batch of rows. + @Override + public boolean moveNext() { + if (!sourceEnumerator.moveNext()) { + return false; + } + + // Get current source row + Object sourceRow = sourceEnumerator.current(); + Object[] sourceValues; + + if (sourceRow instanceof Object[] arr) { + sourceValues = arr; + } else { + // Single column case + sourceValues = new Object[] {sourceRow}; + } + + // Get the start value for BFS + Object startValue = + (startWithIndex >= 0 && startWithIndex < sourceValues.length) + ? sourceValues[startWithIndex] + : null; + + // Perform BFS traversal + List bfsResults = performBfs(startValue); + + // Build output row: source fields + array of BFS results + current = new Object[sourceValues.length + 1]; + System.arraycopy(sourceValues, 0, current, 0, sourceValues.length); + current[sourceValues.length] = bfsResults; + + return true; + } + + /** + * Performs BFS traversal starting from the given value by dynamically querying OpenSearch. + * + * @param startValue The starting value for BFS + * @return List of rows found during traversal + */ + private List performBfs(Object startValue) { + if (startValue == null) { + return List.of(); + } + + List results = new ArrayList<>(); + Set visited = new HashSet<>(); + Queue queue = new ArrayDeque<>(); + + // Initialize BFS with start value + queue.offer(new BfsNode(startValue, -1)); + visited.add(startValue); + + while (!queue.isEmpty()) { + // Collect all values at current level for batch query + List currentLevelValues = new ArrayList<>(); + List currentLevelDepths = new ArrayList<>(); + + while (!queue.isEmpty()) { + BfsNode node = queue.poll(); + + // Check depth limit before processing + if (graphLookup.maxDepth > 0 && node.depth >= graphLookup.maxDepth) { + continue; + } + + currentLevelValues.add(node.value); + currentLevelDepths.add(node.depth); + } + + if (currentLevelValues.isEmpty()) { + break; + } + + // Query OpenSearch for all current level values + // Forward direction: connectFromField = currentLevelValues + List forwardResults = queryLookupTable(currentLevelValues); + + for (Object row : forwardResults) { + Object[] rowArray = (Object[]) (row); + Object nextValue = rowArray[connectFromIdx]; + if (!visited.contains(nextValue)) { + int depth = + currentLevelDepths.get(0) + 1; // Simplified; in production track per-value depth + + if (graphLookup.depthField != null) { + Object[] rowWithDepth = new Object[rowArray.length + 1]; + System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); + rowWithDepth[rowArray.length] = depth; + results.add(rowWithDepth); + } else { + results.add(rowArray); + } + + if (nextValue != null) { + visited.add(nextValue); + queue.offer(new BfsNode(nextValue, depth)); + } + } + } + + // Bidirectional: also query reverse direction + if (graphLookup.bidirectional) { + List reverseResults = queryLookupTable(currentLevelValues); + + for (Object row : reverseResults) { + Object[] rowArray = (Object[]) (row); + Object nextValue = rowArray[connectFromIdx]; + if (!visited.contains(nextValue)) { + visited.add(nextValue); + int depth = currentLevelDepths.get(0) + 1; + + if (graphLookup.depthField != null) { + Object[] rowWithDepth = new Object[rowArray.length + 1]; + System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); + rowWithDepth[rowArray.length] = depth; + results.add(rowWithDepth); + } else { + results.add(rowArray); + } + + if (nextValue != null) { + visited.add(nextValue); + queue.offer(new BfsNode(nextValue, depth)); + } + } + } + } + } + + return results; + } + + /** + * Queries the lookup table with a terms filter. + * + * @param values Values to match + * @return List of matching rows + */ + private List queryLookupTable(List values) { + if (values.isEmpty()) { + return List.of(); + } + + var fieldExpression = + new NamedFieldExpression( + connectToIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); + QueryBuilder query = termsQuery(fieldExpression.getReferenceForTermQuery(), values); + CalciteEnumerableIndexScan newScan = (CalciteEnumerableIndexScan) this.lookupScan.copy(); + newScan.pushDownContext.add( + PushDownType.FILTER, + null, + (OSRequestBuilderAction) + requestBuilder -> requestBuilder.pushDownFilterForCalcite(query)); + Iterator<@Nullable Object> res = newScan.scan().iterator(); + List results = new ArrayList<>(); + while (res.hasNext()) { + results.add(res.next()); + } + return results; + } + + @Override + public void reset() { + sourceEnumerator.reset(); + current = null; + } + + @Override + public void close() { + sourceEnumerator.close(); + } + } + + /** Simple record to track BFS nodes with their depth. */ + private record BfsNode(Object value, int depth) {} +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 8710baec039..5408d6fb21f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -54,9 +54,13 @@ ADDTOTALS: 'ADDTOTALS'; ADDCOLTOTALS: 'ADDCOLTOTALS'; GRAPHLOOKUP: 'GRAPHLOOKUP'; START_WITH: 'STARTWITH'; -CONNECT_FROM: 'CONNECTFROM'; -CONNECT_TO: 'CONNECTTO'; +CONNECT_FROM_FIELD: 'CONNECTFROMFIELD'; +CONNECT_TO_FIELD: 'CONNECTTOFIELD'; MAX_DEPTH: 'MAXDEPTH'; +DEPTH_FIELD: 'DEPTHFIELD'; +DIRECTION: 'DIRECTION'; +UNI: 'UNI'; +BI: 'BI'; ROW: 'ROW'; COL: 'COL'; EXPAND: 'EXPAND'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 1a0a278f3d0..3654043811c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -628,14 +628,16 @@ addcoltotalsOption ; graphLookupCommand - : GRAPHLOOKUP graphLookupOption* AS outputField = fieldExpression + : GRAPHLOOKUP lookupTable = tableSourceClause graphLookupOption* AS outputField = fieldExpression ; graphLookupOption - : (CONNECT_FROM EQUAL fieldExpression) - | (CONNECT_TO EQUAL fieldExpression) + : (START_WITH EQUAL fieldExpression) + | (CONNECT_FROM_FIELD EQUAL fieldExpression) + | (CONNECT_TO_FIELD EQUAL fieldExpression) | (MAX_DEPTH EQUAL integerLiteral) - | (START_WITH EQUAL valueExpression) + | (DEPTH_FIELD EQUAL fieldExpression) + | (DIRECTION EQUAL (UNI | BI)) ; // clauses @@ -1689,7 +1691,11 @@ searchableKeyWord | ROW | COL | COLUMN_NAME - | CONNECT_FROM - | CONNECT_TO + | CONNECT_FROM_FIELD + | CONNECT_TO_FIELD | MAX_DEPTH + | DEPTH_FIELD + | DIRECTION + | UNI + | BIO ; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 05a1a1b5360..c9ace02e79b 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -85,6 +85,7 @@ import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; import org.opensearch.sql.ast.tree.GraphLookup; +import org.opensearch.sql.ast.tree.GraphLookup.Direction; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; @@ -1485,28 +1486,54 @@ public UnresolvedPlan visitAddcoltotalsCommand( @Override public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCommandContext ctx) { - Field from = null; - Field to = null; + // Parse lookup table + UnresolvedPlan fromTable = visitTableSourceClause(ctx.lookupTable); + + // Parse options with defaults + Field connectFromField = null; + Field connectToField = null; Literal maxDepth = Literal.ZERO; - UnresolvedExpression startWith = null; + Field startWith = null; + Field depthField = null; + Direction direction = Direction.UNI; + for (OpenSearchPPLParser.GraphLookupOptionContext option : ctx.graphLookupOption()) { - if (option.CONNECT_FROM() != null) { - from = (Field) internalVisitExpression(option.fieldExpression()); + if (option.CONNECT_FROM_FIELD() != null) { + connectFromField = (Field) internalVisitExpression(option.fieldExpression()); } - if (option.CONNECT_TO() != null) { - to = (Field) internalVisitExpression(option.fieldExpression()); + if (option.CONNECT_TO_FIELD() != null) { + connectToField = (Field) internalVisitExpression(option.fieldExpression()); } if (option.MAX_DEPTH() != null) { maxDepth = (Literal) internalVisitExpression(option.integerLiteral()); } if (option.START_WITH() != null) { - startWith = internalVisitExpression(option.valueExpression()); + startWith = (Field) internalVisitExpression(option.fieldExpression()); + } + if (option.DEPTH_FIELD() != null) { + depthField = (Field) internalVisitExpression(option.fieldExpression()); + } + if (option.DIRECTION() != null) { + direction = option.BI() != null ? Direction.BI : Direction.UNI; } } + Field as = (Field) internalVisitExpression(ctx.outputField); - if (from == null || to == null) { - throw new SemanticCheckException("connectFrom and connectTo must be specified"); + + if (connectFromField == null || connectToField == null) { + throw new SemanticCheckException( + "connectFromField and connectToField must be specified for graphLookup"); } - return new GraphLookup(from, to, as, maxDepth, startWith); + + return GraphLookup.builder() + .fromTable(fromTable) + .connectFromField(connectFromField) + .connectToField(connectToField) + .as(as) + .maxDepth(maxDepth) + .startWith(startWith) + .depthField(depthField) + .direction(direction) + .build(); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index 548d7ab05e2..1af85e7a3c6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -40,36 +40,59 @@ public CalcitePPLGraphLookupTest() { } @Test - public void testGraphLookup() { + public void testGraphLookupBasic() { + // Test basic graphLookup with same source and lookup table String ppl = - "source=employee | graphLookup connectFrom=reportsTo connectTo=name as reportingHierarchy"; - - String expectedLogical = - "LogicalAggregate(group=[{0, 1, 2}], reportingHierarchy=[COLLECT($3)])\n" - + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," - + " $f7=[ROW($3, $4, $5, $6)])\n" - + " LogicalRepeatUnion(all=[true])\n" - + " LogicalTableSpool(readType=[LAZY], writeType=[LAZY], table=[[gl_recursive]])\n" - + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," - + " gl_hier_id=[$3], gl_hier_name=[$4], gl_hier_reportsTo=[$5], gl_depth=[1])\n" - + " LogicalJoin(condition=[=($2, $4)], joinType=[inner])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" - + " LogicalTableSpool(readType=[LAZY], writeType=[LAZY], table=[[gl_recursive]])\n" - + " LogicalProject(gl_src_id=[$0], gl_src_name=[$1], gl_src_reportsTo=[$2]," - + " gl_hier_id=[$7], gl_hier_name=[$8], gl_hier_reportsTo=[$9], gl_depth=[+($6, 1)])\n" - + " LogicalJoin(condition=[=($5, $8)], joinType=[inner])\n" - + " LogicalTableScan(table=[[gl_recursive]])\n" - + " LogicalTableScan(table=[[scott, employee]])\n"; + "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" + + " as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + // Verify it produces a valid logical plan (actual plan structure depends on implementation) + org.junit.Assert.assertNotNull(root); + System.out.println("Logical plan:\n" + root.explain()); + } + + @Test + public void testGraphLookupWithDepthField() { + // Test graphLookup with depthField parameter + String ppl = + "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" + + " depthField=level as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + org.junit.Assert.assertNotNull(root); + System.out.println("Logical plan with depthField:\n" + root.explain()); + } + + @Test + public void testGraphLookupWithMaxDepth() { + // Test graphLookup with maxDepth parameter + String ppl = + "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" + + " maxDepth=3 as reportingHierarchy"; + + RelNode root = getRelNode(ppl); + org.junit.Assert.assertNotNull(root); + System.out.println("Logical plan with maxDepth:\n" + root.explain()); + } + + @Test + public void testGraphLookupBidirectional() { + // Test graphLookup with bidirectional traversal + String ppl = + "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" + + " direction=bio as reportingHierarchy"; + RelNode root = getRelNode(ppl); - verifyLogical(root, expectedLogical); + org.junit.Assert.assertNotNull(root); + System.out.println("Logical plan bidirectional:\n" + root.explain()); } @Override protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { final SchemaPlus rootSchema = Frameworks.createRootSchema(true); final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); - // Add events table for graphLookup tests + // Add employee table for graphLookup tests ImmutableList rows = ImmutableList.of( new Object[] {1, "Dev", null}, diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index dbf33d38e86..4146ad1d5b2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1647,37 +1647,75 @@ public void testMvmapWithNonFieldFirstArgThrowsException() { @Test public void testGraphLookupCommand() { + // Basic graphLookup with required parameters assertEqual( - "source=t | graphLookup connectFrom=name connectTo=reportTo maxDepth=3 as" - + " reportingHierarchy", + "source=t | graphLookup employees connectFromField=manager connectToField=name maxDepth=3" + + " as reportingHierarchy", GraphLookup.builder() .child(relation("t")) - .from(field("name")) - .to(field("reportTo")) + .fromTable(relation("employees")) + .connectFromField(field("manager")) + .connectToField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(3)) .startWith(null) + .depthField(null) + .direction(GraphLookup.Direction.UNI) .build()); + + // graphLookup with startWith filter assertEqual( - "source=t | graphLookup connectFrom=name connectTo=reportTo startWith='hello' as" - + " reportingHierarchy", + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " startWith='hello' as reportingHierarchy", GraphLookup.builder() .child(relation("t")) - .from(field("name")) - .to(field("reportTo")) + .fromTable(relation("employees")) + .connectFromField(field("manager")) + .connectToField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(0)) .startWith(stringLiteral("hello")) + .depthField(null) + .direction(GraphLookup.Direction.UNI) .build()); + // graphLookup with depthField and bidirectional + assertEqual( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " depthField=level direction=bio as reportingHierarchy", + GraphLookup.builder() + .child(relation("t")) + .fromTable(relation("employees")) + .connectFromField(field("manager")) + .connectToField(field("name")) + .as(field("reportingHierarchy")) + .maxDepth(intLiteral(0)) + .startWith(null) + .depthField(field("level")) + .direction(GraphLookup.Direction.BIO) + .build()); + + // Error: missing connectFromField - SemanticCheckException thrown by AstBuilder assertThrows( - SyntaxCheckException.class, - () -> plan("| graphLookup connectTo=reportTo startWith='hello' as reportingHierarchy")); + SemanticCheckException.class, + () -> + plan( + "source=t | graphLookup employees connectToField=name startWith='hello' as" + + " reportingHierarchy")); + + // Error: missing lookup table - SyntaxCheckException from grammar assertThrows( SyntaxCheckException.class, - () -> plan("| graphLookup connectFrom=name connectTo=reportTo startWith='hello'")); + () -> + plan( + "source=t | graphLookup connectFromField=manager connectToField=name as" + + " reportingHierarchy")); + + // Error: missing connectToField - SemanticCheckException thrown by AstBuilder assertThrows( - SyntaxCheckException.class, - () -> plan("| graphLookup connectFrom=name as reportingHierarchy")); + SemanticCheckException.class, + () -> + plan( + "source=t | graphLookup employees connectFromField=manager as reportingHierarchy")); } } From 0f7c0ef77ab41af6b60dff60b5700c4d35ff5604 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 3 Feb 2026 11:18:01 +0800 Subject: [PATCH 03/23] Refine - remove depth from BFS node Signed-off-by: Heng Qian --- .../sql/calcite/plan/rel/GraphLookup.java | 46 +++++++++++-------- .../calcite/plan/rel/LogicalGraphLookup.java | 5 +- .../scan/CalciteEnumerableGraphLookup.java | 35 +++++--------- 3 files changed, 40 insertions(+), 46 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index a99907eb80f..0ef6d67c8d3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.plan.rel; import java.util.List; +import javax.annotation.Nullable; import lombok.Getter; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelTraitSet; @@ -42,10 +43,12 @@ public abstract class GraphLookup extends BiRel { protected final String connectFromField; // Field in lookup table (edge source) protected final String connectToField; // Field in lookup table (edge target) protected final String outputField; // Name of output array field - protected final String depthField; // Name of output array field + @Nullable protected final String depthField; // Name of output array field protected final int maxDepth; // -1 = unlimited protected final boolean bidirectional; + private RelDataType outputRowType; + /** * Creates a LogicalGraphLookup. * @@ -70,7 +73,7 @@ protected GraphLookup( String connectFromField, String connectToField, String outputField, - String depthField, + @Nullable String depthField, int maxDepth, boolean bidirectional) { super(cluster, traitSet, source, lookup); @@ -98,27 +101,30 @@ public RelNode getLookup() { @Override protected RelDataType deriveRowType() { - // Output = source fields + output array field - RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder(); + if (outputRowType == null) { + // Output = source fields + output array field + RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder(); - // Add all source fields - for (var field : getSource().getRowType().getFieldList()) { - builder.add(field); - } + // Add all source fields + for (var field : getSource().getRowType().getFieldList()) { + builder.add(field); + } - // Add output field (ARRAY type containing lookup row struct) - RelDataType lookupRowType = getLookup().getRowType(); - if (this.depthField != null) { - final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); - lookupBuilder.addAll(lookupRowType.getFieldList()); - RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); - lookupBuilder.add(this.depthField, depthType); - lookupRowType = lookupBuilder.build(); - } - RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); - builder.add(outputField, arrayType); + // Add output field (ARRAY type containing lookup row struct) + RelDataType lookupRowType = getLookup().getRowType(); + if (this.depthField != null) { + final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); + lookupBuilder.addAll(lookupRowType.getFieldList()); + RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); + lookupBuilder.add(this.depthField, depthType); + lookupRowType = lookupBuilder.build(); + } + RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); + builder.add(outputField, arrayType); - return builder.build(); + outputRowType = builder.build(); + } + return outputRowType; } @Override diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index 9ca386b618f..9e29485831e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -6,6 +6,7 @@ package org.opensearch.sql.calcite.plan.rel; import java.util.List; +import javax.annotation.Nullable; import lombok.Getter; import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; @@ -39,7 +40,7 @@ protected LogicalGraphLookup( String connectFromField, String connectToField, String outputField, - String depthField, + @Nullable String depthField, int maxDepth, boolean bidirectional) { super( @@ -77,7 +78,7 @@ public static LogicalGraphLookup create( String connectFromField, String connectToField, String outputField, - String depthField, + @Nullable String depthField, int maxDepth, boolean bidirectional) { RelOptCluster cluster = source.getCluster(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 4ee4b78a8ca..1f79079ae8e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -233,27 +233,20 @@ private List performBfs(Object startValue) { List results = new ArrayList<>(); Set visited = new HashSet<>(); - Queue queue = new ArrayDeque<>(); + Queue queue = new ArrayDeque<>(); // Initialize BFS with start value - queue.offer(new BfsNode(startValue, -1)); + queue.offer(startValue); visited.add(startValue); + int currentLevelDepth = 0; while (!queue.isEmpty()) { // Collect all values at current level for batch query List currentLevelValues = new ArrayList<>(); - List currentLevelDepths = new ArrayList<>(); while (!queue.isEmpty()) { - BfsNode node = queue.poll(); - - // Check depth limit before processing - if (graphLookup.maxDepth > 0 && node.depth >= graphLookup.maxDepth) { - continue; - } - - currentLevelValues.add(node.value); - currentLevelDepths.add(node.depth); + Object value = queue.poll(); + currentLevelValues.add(value); } if (currentLevelValues.isEmpty()) { @@ -268,13 +261,10 @@ private List performBfs(Object startValue) { Object[] rowArray = (Object[]) (row); Object nextValue = rowArray[connectFromIdx]; if (!visited.contains(nextValue)) { - int depth = - currentLevelDepths.get(0) + 1; // Simplified; in production track per-value depth - if (graphLookup.depthField != null) { Object[] rowWithDepth = new Object[rowArray.length + 1]; System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = depth; + rowWithDepth[rowArray.length] = currentLevelDepth; results.add(rowWithDepth); } else { results.add(rowArray); @@ -282,7 +272,7 @@ private List performBfs(Object startValue) { if (nextValue != null) { visited.add(nextValue); - queue.offer(new BfsNode(nextValue, depth)); + queue.offer(nextValue); } } } @@ -296,12 +286,10 @@ private List performBfs(Object startValue) { Object nextValue = rowArray[connectFromIdx]; if (!visited.contains(nextValue)) { visited.add(nextValue); - int depth = currentLevelDepths.get(0) + 1; - if (graphLookup.depthField != null) { Object[] rowWithDepth = new Object[rowArray.length + 1]; System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = depth; + rowWithDepth[rowArray.length] = currentLevelDepth; results.add(rowWithDepth); } else { results.add(rowArray); @@ -309,11 +297,13 @@ private List performBfs(Object startValue) { if (nextValue != null) { visited.add(nextValue); - queue.offer(new BfsNode(nextValue, depth)); + queue.offer(nextValue); } } } } + + if (++currentLevelDepth > graphLookup.maxDepth) break; } return results; @@ -359,7 +349,4 @@ public void close() { sourceEnumerator.close(); } } - - /** Simple record to track BFS nodes with their depth. */ - private record BfsNode(Object value, int depth) {} } From bca0ac148205aa2e36d8ca3283f612166531b472 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 3 Feb 2026 15:43:55 +0800 Subject: [PATCH 04/23] Support bidirectional mode Signed-off-by: Heng Qian --- .../calcite/plan/rel/LogicalGraphLookup.java | 5 +- .../scan/CalciteEnumerableGraphLookup.java | 47 +++++++------------ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index 9e29485831e..d25973d4b9f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -13,7 +13,10 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; -/** Logical RelNode for graphLookup command. */ +/** + * Logical RelNode for graphLookup command. TODO: need to support trim fields and several transpose + * rules for this new added RelNode + */ @Getter public class LogicalGraphLookup extends GraphLookup { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 1f79079ae8e..999b99191c7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -32,6 +32,7 @@ import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; import org.opensearch.sql.calcite.plan.Scannable; import org.opensearch.sql.calcite.plan.rel.GraphLookup; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; @@ -112,6 +113,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { return super.computeSelfCost(planner, mq); } + // TODO: support non-scannable inputs @Override public Result implement(EnumerableRelImplementor implementor, Prefer pref) { PhysType physType = @@ -260,6 +262,9 @@ private List performBfs(Object startValue) { for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); Object nextValue = rowArray[connectFromIdx]; + if (graphLookup.bidirectional && visited.contains(nextValue)) { + nextValue = rowArray[connectToIdx]; + } if (!visited.contains(nextValue)) { if (graphLookup.depthField != null) { Object[] rowWithDepth = new Object[rowArray.length + 1]; @@ -277,32 +282,6 @@ private List performBfs(Object startValue) { } } - // Bidirectional: also query reverse direction - if (graphLookup.bidirectional) { - List reverseResults = queryLookupTable(currentLevelValues); - - for (Object row : reverseResults) { - Object[] rowArray = (Object[]) (row); - Object nextValue = rowArray[connectFromIdx]; - if (!visited.contains(nextValue)) { - visited.add(nextValue); - if (graphLookup.depthField != null) { - Object[] rowWithDepth = new Object[rowArray.length + 1]; - System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = currentLevelDepth; - results.add(rowWithDepth); - } else { - results.add(rowArray); - } - - if (nextValue != null) { - visited.add(nextValue); - queue.offer(nextValue); - } - } - } - } - if (++currentLevelDepth > graphLookup.maxDepth) break; } @@ -320,16 +299,26 @@ private List queryLookupTable(List values) { return List.of(); } - var fieldExpression = + NamedFieldExpression toFieldExpression = new NamedFieldExpression( connectToIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); - QueryBuilder query = termsQuery(fieldExpression.getReferenceForTermQuery(), values); + QueryBuilder query = termsQuery(toFieldExpression.getReferenceForTermQuery(), values); + if (graphLookup.bidirectional) { + NamedFieldExpression fromFieldExpression = + new NamedFieldExpression( + connectFromIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); + query = + QueryBuilders.boolQuery() + .should(query) + .should(termsQuery(fromFieldExpression.getReferenceForTermQuery(), values)); + } CalciteEnumerableIndexScan newScan = (CalciteEnumerableIndexScan) this.lookupScan.copy(); + QueryBuilder finalQuery = query; newScan.pushDownContext.add( PushDownType.FILTER, null, (OSRequestBuilderAction) - requestBuilder -> requestBuilder.pushDownFilterForCalcite(query)); + requestBuilder -> requestBuilder.pushDownFilterForCalcite(finalQuery)); Iterator<@Nullable Object> res = newScan.scan().iterator(); List results = new ArrayList<>(); while (res.hasNext()) { From 9761001b8eb7467b01586f2d1bd8b885627907b1 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 3 Feb 2026 16:06:18 +0800 Subject: [PATCH 05/23] Support anonymize graph lookup Signed-off-by: Heng Qian --- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 22 ++++++++++ .../calcite/CalcitePPLGraphLookupTest.java | 2 +- .../sql/ppl/parser/AstBuilderTest.java | 10 ++--- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 40 +++++++++++++++++++ 4 files changed, 68 insertions(+), 6 deletions(-) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index 4376b5659d4..e599924d153 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -77,6 +77,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Flatten; +import org.opensearch.sql.ast.tree.GraphLookup; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Lookup; @@ -224,6 +225,27 @@ public String visitLookup(Lookup node, String context) { "%s | lookup %s %s%s%s", child, MASK_TABLE, mappingFields, strategy, outputFields); } + @Override + public String visitGraphLookup(GraphLookup node, String context) { + String child = node.getChild().get(0).accept(this, context); + StringBuilder command = new StringBuilder(); + command.append(child).append(" | graphlookup ").append(MASK_TABLE); + if (node.getStartWith() != null) { + command.append(" startwith=").append(MASK_COLUMN); + } + command.append(" connectFromField=").append(MASK_COLUMN); + command.append(" connectToField=").append(MASK_COLUMN); + if (node.getMaxDepth() != null && !Integer.valueOf(0).equals(node.getMaxDepth().getValue())) { + command.append(" maxDepth=").append(MASK_LITERAL); + } + if (node.getDepthField() != null) { + command.append(" depthField=").append(MASK_COLUMN); + } + command.append(" direction=").append(node.getDirection().name().toLowerCase()); + command.append(" as ").append(MASK_COLUMN); + return command.toString(); + } + private String formatFieldAlias(java.util.Map fieldMap) { return fieldMap.entrySet().stream() .map( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index 1af85e7a3c6..0081e63c62b 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -81,7 +81,7 @@ public void testGraphLookupBidirectional() { // Test graphLookup with bidirectional traversal String ppl = "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" - + " direction=bio as reportingHierarchy"; + + " direction=bi as reportingHierarchy"; RelNode root = getRelNode(ppl); org.junit.Assert.assertNotNull(root); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 4146ad1d5b2..6764420d133 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1666,7 +1666,7 @@ public void testGraphLookupCommand() { // graphLookup with startWith filter assertEqual( "source=t | graphLookup employees connectFromField=manager connectToField=name" - + " startWith='hello' as reportingHierarchy", + + " startWith=id as reportingHierarchy", GraphLookup.builder() .child(relation("t")) .fromTable(relation("employees")) @@ -1674,7 +1674,7 @@ public void testGraphLookupCommand() { .connectToField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(0)) - .startWith(stringLiteral("hello")) + .startWith(field("id")) .depthField(null) .direction(GraphLookup.Direction.UNI) .build()); @@ -1682,7 +1682,7 @@ public void testGraphLookupCommand() { // graphLookup with depthField and bidirectional assertEqual( "source=t | graphLookup employees connectFromField=manager connectToField=name" - + " depthField=level direction=bio as reportingHierarchy", + + " depthField=level direction=bi as reportingHierarchy", GraphLookup.builder() .child(relation("t")) .fromTable(relation("employees")) @@ -1692,7 +1692,7 @@ public void testGraphLookupCommand() { .maxDepth(intLiteral(0)) .startWith(null) .depthField(field("level")) - .direction(GraphLookup.Direction.BIO) + .direction(GraphLookup.Direction.BI) .build()); // Error: missing connectFromField - SemanticCheckException thrown by AstBuilder @@ -1700,7 +1700,7 @@ public void testGraphLookupCommand() { SemanticCheckException.class, () -> plan( - "source=t | graphLookup employees connectToField=name startWith='hello' as" + "source=t | graphLookup employees connectToField=name startWith=id as" + " reportingHierarchy")); // Error: missing lookup table - SyntaxCheckException from grammar diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 1e200eb092b..8a8e50081aa 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -643,6 +643,46 @@ public void testLookup() { + " COUNTRY2")); } + @Test + public void testGraphLookup() { + // Basic graphLookup with required parameters + assertEquals( + "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + + " direction=uni as identifier", + anonymize( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " as reportingHierarchy")); + // graphLookup with maxDepth + assertEquals( + "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + + " maxDepth=*** direction=uni as identifier", + anonymize( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " maxDepth=3 as reportingHierarchy")); + // graphLookup with depthField + assertEquals( + "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + + " depthField=identifier direction=uni as identifier", + anonymize( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " depthField=level as reportingHierarchy")); + // graphLookup with bidirectional mode + assertEquals( + "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + + " direction=bi as identifier", + anonymize( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " direction=bi as reportingHierarchy")); + // graphLookup with all optional parameters + assertEquals( + "source=table | graphlookup table startwith=identifier connectFromField=identifier" + + " connectToField=identifier maxDepth=*** depthField=identifier direction=bi" + + " as identifier", + anonymize( + "source=t | graphLookup employees connectFromField=manager connectToField=name" + + " startWith=id maxDepth=5 depthField=level direction=bi as reportingHierarchy")); + } + @Test public void testInSubquery() { assertEquals( From 5c936e9b85f4c721597e0e9646058792df97c5ed Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 3 Feb 2026 16:24:22 +0800 Subject: [PATCH 06/23] Fix UT Signed-off-by: Heng Qian --- .../opensearch/sql/ast/tree/GraphLookup.java | 4 ++ .../sql/calcite/CalciteRelNodeVisitor.java | 2 +- .../sql/calcite/plan/rel/GraphLookup.java | 2 + .../scan/CalciteEnumerableGraphLookup.java | 1 + .../calcite/CalcitePPLGraphLookupTest.java | 53 +++++++++++++------ 5 files changed, 44 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index d39d5a665ae..debe2d48bfd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -67,6 +67,10 @@ public enum Direction { private UnresolvedPlan child; + public String getDepthFieldName() { + return depthField == null ? null : depthField.toString(); + } + @Override public UnresolvedPlan attach(UnresolvedPlan child) { this.child = child; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 5305602b79c..c0424c4627e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2588,7 +2588,7 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { String connectFromFieldName = node.getConnectFromField().getField().toString(); String connectToFieldName = node.getConnectToField().getField().toString(); String outputFieldName = node.getAs().getField().toString(); - String depthFieldName = node.getDepthField().toString(); + String depthFieldName = node.getDepthFieldName(); boolean bidirectional = node.getDirection() == Direction.BI; RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(node.getMaxDepth(), context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index 0ef6d67c8d3..1d12c1e9e7d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -44,6 +44,8 @@ public abstract class GraphLookup extends BiRel { protected final String connectToField; // Field in lookup table (edge target) protected final String outputField; // Name of output array field @Nullable protected final String depthField; // Name of output array field + + //TODO: add limitation on the maxDepth and input rows count protected final int maxDepth; // -1 = unlimited protected final boolean bidirectional; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 999b99191c7..7018a7a21dc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -233,6 +233,7 @@ private List performBfs(Object startValue) { return List.of(); } + // TODO: support spillable for these collections List results = new ArrayList<>(); Set visited = new HashSet<>(); Queue queue = new ArrayDeque<>(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index 0081e63c62b..e23687465b9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -43,49 +43,68 @@ public CalcitePPLGraphLookupTest() { public void testGraphLookupBasic() { // Test basic graphLookup with same source and lookup table String ppl = - "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" - + " as reportingHierarchy"; + "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" + + " connectToField=name as reportingHierarchy"; RelNode root = getRelNode(ppl); - // Verify it produces a valid logical plan (actual plan structure depends on implementation) - org.junit.Assert.assertNotNull(root); - System.out.println("Logical plan:\n" + root.explain()); + String expectedLogical = + "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + + " bidirectional=[false])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); } @Test public void testGraphLookupWithDepthField() { // Test graphLookup with depthField parameter String ppl = - "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" - + " depthField=level as reportingHierarchy"; + "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" + + " connectToField=name depthField=level as reportingHierarchy"; RelNode root = getRelNode(ppl); - org.junit.Assert.assertNotNull(root); - System.out.println("Logical plan with depthField:\n" + root.explain()); + String expectedLogical = + "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + + " outputField=[reportingHierarchy], depthField=[Field(field=level, fieldArgs=[])]," + + " maxDepth=[-1], bidirectional=[false])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); } @Test public void testGraphLookupWithMaxDepth() { // Test graphLookup with maxDepth parameter String ppl = - "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" - + " maxDepth=3 as reportingHierarchy"; + "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" + + " connectToField=name maxDepth=3 as reportingHierarchy"; RelNode root = getRelNode(ppl); - org.junit.Assert.assertNotNull(root); - System.out.println("Logical plan with maxDepth:\n" + root.explain()); + String expectedLogical = + "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[3]," + + " bidirectional=[false])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); } @Test public void testGraphLookupBidirectional() { // Test graphLookup with bidirectional traversal String ppl = - "source=employee | graphLookup employee connectFromField=reportsTo connectToField=name" - + " direction=bi as reportingHierarchy"; + "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" + + " connectToField=name direction=bi as reportingHierarchy"; RelNode root = getRelNode(ppl); - org.junit.Assert.assertNotNull(root); - System.out.println("Logical plan bidirectional:\n" + root.explain()); + String expectedLogical = + "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + + " bidirectional=[true])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalTableScan(table=[[scott, employee]])\n"; + verifyLogical(root, expectedLogical); } @Override From b54d1de56c74290bcfeabfba408f64a6f21c65a8 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Tue, 3 Feb 2026 17:00:49 +0800 Subject: [PATCH 07/23] Add IT Signed-off-by: Heng Qian --- .../remote/CalcitePPLGraphLookupIT.java | 411 ++++++++++++++++++ .../sql/legacy/SQLIntegTestCase.java | 16 + .../org/opensearch/sql/legacy/TestUtils.java | 15 + .../opensearch/sql/legacy/TestsConstants.java | 3 + .../src/test/resources/graph_airports.json | 10 + .../src/test/resources/graph_employees.json | 12 + .../src/test/resources/graph_travelers.json | 8 + .../graph_airports_index_mapping.json | 12 + .../graph_employees_index_mapping.json | 15 + .../graph_travelers_index_mapping.json | 15 + 10 files changed, 517 insertions(+) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java create mode 100644 integ-test/src/test/resources/graph_airports.json create mode 100644 integ-test/src/test/resources/graph_employees.json create mode 100644 integ-test/src/test/resources/graph_travelers.json create mode 100644 integ-test/src/test/resources/indexDefinitions/graph_airports_index_mapping.json create mode 100644 integ-test/src/test/resources/indexDefinitions/graph_employees_index_mapping.json create mode 100644 integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java new file mode 100644 index 00000000000..a94f8db4913 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -0,0 +1,411 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_AIRPORTS; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_EMPLOYEES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_TRAVELERS; +import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for graphLookup command. Test cases are inspired by MongoDB's $graphLookup + * examples. + * + *

Test data: + * + *

    + *
  • graph_employees: Employee hierarchy with reportsTo field (Dev -> Eliot -> Ron -> Andrew) + *
  • graph_travelers: Social network with friends connections + *
  • graph_airports: Airport connections graph (JFK, BOS, ORD, PWM, LHR) + *
+ * + * @see MongoDB + * $graphLookup + */ +public class CalcitePPLGraphLookupIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + + loadIndex(Index.GRAPH_EMPLOYEES); + loadIndex(Index.GRAPH_TRAVELERS); + loadIndex(Index.GRAPH_AIRPORTS); + } + + // ==================== Employee Hierarchy Tests ==================== + + /** + * Test 1: Basic employee hierarchy traversal. Find all managers in the reporting chain for each + * employee. Similar to MongoDB example: "Within a Collection". + * + *

Employee hierarchy: Dev -> Eliot -> Ron -> Andrew (CEO) Asya -> Ron -> Andrew Dan -> Andrew + */ + @Test + public void testEmployeeHierarchyBasicTraversal() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // Should return 6 employees, each with their reporting hierarchy + System.out.println(result); + verifyNumOfRows(result, 6); + } + + /** + * Test 2: Employee hierarchy traversal with depth field. Track the depth of each manager in the + * hierarchy. + */ + @Test + public void testEmployeeHierarchyWithDepthField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " depthField=level" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + System.out.println(result); + verifyNumOfRows(result, 6); + } + + /** + * Test 3: Employee hierarchy traversal with maxDepth limit. Only find managers up to 1 level + * above. + */ + @Test + public void testEmployeeHierarchyWithMaxDepth() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " maxDepth=1" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // Each employee should have at most 2 managers in their hierarchy (depth 0 and 1) + System.out.println(result); + verifyNumOfRows(result, 6); + } + + /** + * Test 4: Query specific employee and find their complete reporting chain. Filter to Dev and find + * all his managers. + */ + @Test + public void testEmployeeHierarchyForSpecificEmployee() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name = 'Dev'" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // Dev's hierarchy: Eliot, Ron, Andrew + System.out.println(result); + verifyNumOfRows(result, 1); + } + + // ==================== Social Network (Travelers) Tests ==================== + + /** + * Test 5: Find all friends (direct and indirect) for travelers. Social network traversal - find + * friends of friends. + */ + @Test + public void testTravelersFriendsNetwork() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=friends" + + " connectFromField=friends" + + " connectToField=name" + + " as socialNetwork", + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + // All 4 travelers with their social networks + System.out.println(result); + verifyNumOfRows(result, 4); + } + + /** + * Test 6: Find friends network with limited depth. Only get direct friends (depth 0) and friends + * of friends (depth 1). + */ + @Test + public void testTravelersFriendsWithMaxDepth() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name = 'Brad Green'" + + " | graphLookup %s" + + " startWith=friends" + + " connectFromField=friends" + + " connectToField=name" + + " maxDepth=1" + + " as socialNetwork", + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + // Brad Green -> Shirley Soto -> {Tanya Jordan, Terry Hawkins} + System.out.println(result); + verifyNumOfRows(result, 1); + } + + /** + * Test 7: Find friends network with depth tracking. Track the degree of connection for each + * friend. + */ + @Test + public void testTravelersFriendsWithDepthField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=friends" + + " connectFromField=friends" + + " connectToField=name" + + " depthField=connectionLevel" + + " as socialNetwork", + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + System.out.println(result); + verifyNumOfRows(result, 4); + } + + // ==================== Airport Connections Tests ==================== + + /** + * Test 8: Find all reachable airports from each airport. Similar to MongoDB example: "Within + * Collection with maxDepth". + */ + @Test + public void testAirportConnections() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=connects" + + " connectFromField=connects" + + " connectToField=airport" + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + + System.out.println(result); + verifyNumOfRows(result, 5); + } + + /** + * Test 9: Find airports reachable within 1 connection. Limited traversal depth. + */ + @Test + public void testAirportConnectionsWithMaxDepth() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where airport = 'JFK'" + + " | graphLookup %s" + + " startWith=connects" + + " connectFromField=connects" + + " connectToField=airport" + + " maxDepth=1" + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + + // JFK -> {BOS, ORD} (depth 0) -> {JFK, PWM} (depth 1, excluding JFK as already visited) + System.out.println(result); + verifyNumOfRows(result, 1); + } + + /** + * Test 10: Find airports with number of connections (hops) tracked. + */ + @Test + public void testAirportConnectionsWithDepthField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=connects" + + " connectFromField=connects" + + " connectToField=airport" + + " depthField=numConnections" + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + + System.out.println(result); + verifyNumOfRows(result, 5); + } + + // ==================== Bidirectional Traversal Tests ==================== + + /** + * Test 11: Bidirectional traversal on employee hierarchy. Find both reports-to and direct-reports + * relationships. + */ + @Test + public void testBidirectionalEmployeeHierarchy() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name = 'Ron'" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " direction=bi" + + " as connections", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // Ron should find both his managers (Andrew) and his reports (Eliot, Asya, and indirectly Dev) + System.out.println(result); + verifyNumOfRows(result, 1); + } + + /** + * Test 12: Bidirectional airport connections. Find all airports connected in either direction. + */ + @Test + public void testBidirectionalAirportConnections() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=connects" + + " connectFromField=connects" + + " connectToField=airport" + + " direction=bi" + + " as allConnections", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + + verifyNumOfRows(result, 5); + } + + // ==================== Edge Cases ==================== + + /** + * Test 13: Graph lookup on empty result set. Filter to non-existent employee. + */ + @Test + public void testEmptySourceResult() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name = 'NonExistent'" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + System.out.println(result); + verifyNumOfRows(result, 0); + } + + /** + * Test 14: Employee at top of hierarchy (CEO with no manager). Andrew has no reportsTo, so his + * hierarchy should be empty. + */ + @Test + public void testEmployeeWithNoManager() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name = 'Andrew'" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // Andrew is CEO, no one above him + System.out.println(result); + verifyNumOfRows(result, 1); + } + + /** + * Test 15: Combined with other PPL commands (stats, sort). Count employees by hierarchy depth. + */ + @Test + public void testGraphLookupWithStats() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy" + + " | stats count() by name", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + // 6 distinct employees + System.out.println(result); + verifyNumOfRows(result, 6); + } + + /** + * Test 16: Graph lookup with fields projection. Only select specific fields in the result. + */ + @Test + public void testGraphLookupWithFieldsProjection() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startWith=reportsTo" + + " connectFromField=reportsTo" + + " connectToField=name" + + " as reportingHierarchy" + + " | fields name, reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + System.out.println(result); + verifyNumOfRows(result, 6); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 4e143951bfa..de4eca4fa1e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -851,6 +851,22 @@ public enum Index { "duplication_nullable", getDuplicationNullableIndexMapping(), "src/test/resources/duplication_nullable.json"), + // Graph lookup test indices (inspired by MongoDB $graphLookup examples) + GRAPH_EMPLOYEES( + TestsConstants.TEST_INDEX_GRAPH_EMPLOYEES, + "graph_employees", + getGraphEmployeesIndexMapping(), + "src/test/resources/graph_employees.json"), + GRAPH_TRAVELERS( + TestsConstants.TEST_INDEX_GRAPH_TRAVELERS, + "graph_travelers", + getGraphTravelersIndexMapping(), + "src/test/resources/graph_travelers.json"), + GRAPH_AIRPORTS( + TestsConstants.TEST_INDEX_GRAPH_AIRPORTS, + "graph_airports", + getGraphAirportsIndexMapping(), + "src/test/resources/graph_airports.json"), TPCH_ORDERS( "orders", "tpch", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 2ac1763836e..4f82f03bfa9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -320,6 +320,21 @@ public static String getWorkInformationIndexMapping() { return getMappingFile(mappingFile); } + public static String getGraphEmployeesIndexMapping() { + String mappingFile = "graph_employees_index_mapping.json"; + return getMappingFile(mappingFile); + } + + public static String getGraphTravelersIndexMapping() { + String mappingFile = "graph_travelers_index_mapping.json"; + return getMappingFile(mappingFile); + } + + public static String getGraphAirportsIndexMapping() { + String mappingFile = "graph_airports_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static String getDuplicationNullableIndexMapping() { String mappingFile = "duplication_nullable_index_mapping.json"; return getMappingFile(mappingFile); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index ad8a232bab3..b5e49bbe022 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -84,6 +84,9 @@ public class TestsConstants { public static final String TEST_INDEX_WORKER = TEST_INDEX + "_worker"; public static final String TEST_INDEX_WORK_INFORMATION = TEST_INDEX + "_work_information"; public static final String TEST_INDEX_DUPLICATION_NULLABLE = TEST_INDEX + "_duplication_nullable"; + public static final String TEST_INDEX_GRAPH_EMPLOYEES = TEST_INDEX + "_graph_employees"; + public static final String TEST_INDEX_GRAPH_TRAVELERS = TEST_INDEX + "_graph_travelers"; + public static final String TEST_INDEX_GRAPH_AIRPORTS = TEST_INDEX + "_graph_airports"; public static final String TEST_INDEX_MERGE_TEST_1 = TEST_INDEX + "_merge_test_1"; public static final String TEST_INDEX_MERGE_TEST_2 = TEST_INDEX + "_merge_test_2"; public static final String TEST_INDEX_MERGE_TEST_WILDCARD = TEST_INDEX + "_merge_test_*"; diff --git a/integ-test/src/test/resources/graph_airports.json b/integ-test/src/test/resources/graph_airports.json new file mode 100644 index 00000000000..c644a24dc04 --- /dev/null +++ b/integ-test/src/test/resources/graph_airports.json @@ -0,0 +1,10 @@ +{"index":{"_id":"1"}} +{"airport":"JFK","connects":["BOS","ORD"]} +{"index":{"_id":"2"}} +{"airport":"BOS","connects":["JFK","PWM"]} +{"index":{"_id":"3"}} +{"airport":"ORD","connects":["JFK"]} +{"index":{"_id":"4"}} +{"airport":"PWM","connects":["BOS","LHR"]} +{"index":{"_id":"5"}} +{"airport":"LHR","connects":["PWM"]} diff --git a/integ-test/src/test/resources/graph_employees.json b/integ-test/src/test/resources/graph_employees.json new file mode 100644 index 00000000000..a9a2630fc05 --- /dev/null +++ b/integ-test/src/test/resources/graph_employees.json @@ -0,0 +1,12 @@ +{"index":{"_id":"1"}} +{"id":1,"name":"Dev","reportsTo":"Eliot"} +{"index":{"_id":"2"}} +{"id":2,"name":"Eliot","reportsTo":"Ron"} +{"index":{"_id":"3"}} +{"id":3,"name":"Ron","reportsTo":"Andrew"} +{"index":{"_id":"4"}} +{"id":4,"name":"Andrew","reportsTo":null} +{"index":{"_id":"5"}} +{"id":5,"name":"Asya","reportsTo":"Ron"} +{"index":{"_id":"6"}} +{"id":6,"name":"Dan","reportsTo":"Andrew"} diff --git a/integ-test/src/test/resources/graph_travelers.json b/integ-test/src/test/resources/graph_travelers.json new file mode 100644 index 00000000000..da4f4ec42f7 --- /dev/null +++ b/integ-test/src/test/resources/graph_travelers.json @@ -0,0 +1,8 @@ +{"index":{"_id":"1"}} +{"name":"Tanya Jordan","friends":["Shirley Soto","Terry Hawkins"],"hobbies":["tennis","reading"]} +{"index":{"_id":"2"}} +{"name":"Shirley Soto","friends":["Tanya Jordan","Terry Hawkins"],"hobbies":["golf","reading"]} +{"index":{"_id":"3"}} +{"name":"Terry Hawkins","friends":["Tanya Jordan","Shirley Soto"],"hobbies":["tennis","golf"]} +{"index":{"_id":"4"}} +{"name":"Brad Green","friends":["Shirley Soto"],"hobbies":["reading"]} diff --git a/integ-test/src/test/resources/indexDefinitions/graph_airports_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/graph_airports_index_mapping.json new file mode 100644 index 00000000000..e93812c8a1a --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/graph_airports_index_mapping.json @@ -0,0 +1,12 @@ +{ + "mappings": { + "properties": { + "airport": { + "type": "keyword" + }, + "connects": { + "type": "keyword" + } + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/graph_employees_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/graph_employees_index_mapping.json new file mode 100644 index 00000000000..8c6674396e4 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/graph_employees_index_mapping.json @@ -0,0 +1,15 @@ +{ + "mappings": { + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "keyword" + }, + "reportsTo": { + "type": "keyword" + } + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json new file mode 100644 index 00000000000..de430eb8220 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json @@ -0,0 +1,15 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "friends": { + "type": "keyword" + }, + "hobbies": { + "type": "keyword" + } + } + } +} From 1bc06a8825a8c38b666ddd8ecdfbc8cdebd073ce Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 10:47:38 +0800 Subject: [PATCH 08/23] Add limitation for GraphLookup Signed-off-by: Heng Qian --- .../sql/calcite/CalciteRelNodeVisitor.java | 2 + .../sql/calcite/plan/rel/GraphLookup.java | 2 +- .../remote/CalcitePPLGraphLookupIT.java | 295 ++++++++++++------ .../scan/CalciteEnumerableGraphLookup.java | 9 + 4 files changed, 210 insertions(+), 98 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index c0424c4627e..b5185ddaa69 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2581,6 +2581,8 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { // 1. Visit source (child) table visitChildren(node, context); RelBuilder builder = context.relBuilder; + // TODO: Limit the number of source rows to 100 for now, make it configurable. + builder.limit(0, 100); RelNode sourceTable = builder.build(); // 2. Extract parameters diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index 1d12c1e9e7d..4853de339a3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -45,7 +45,7 @@ public abstract class GraphLookup extends BiRel { protected final String outputField; // Name of output array field @Nullable protected final String depthField; // Name of output array field - //TODO: add limitation on the maxDepth and input rows count + // TODO: add limitation on the maxDepth and input rows count protected final int maxDepth; // -1 = unlimited protected final boolean bidirectional; diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index a94f8db4913..a814a7db9ac 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -8,9 +8,14 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_AIRPORTS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_EMPLOYEES; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_GRAPH_TRAVELERS; -import static org.opensearch.sql.util.MatcherUtils.verifyNumOfRows; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; import java.io.IOException; +import java.util.Collections; +import java.util.List; import org.json.JSONObject; import org.junit.Test; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -22,9 +27,9 @@ *

Test data: * *

    - *
  • graph_employees: Employee hierarchy with reportsTo field (Dev -> Eliot -> Ron -> Andrew) + *
  • graph_employees: Employee hierarchy (Dev->Eliot->Ron->Andrew, Asya->Ron, Dan->Andrew) *
  • graph_travelers: Social network with friends connections - *
  • graph_airports: Airport connections graph (JFK, BOS, ORD, PWM, LHR) + *
  • graph_airports: Airport connections (JFK, BOS, ORD, PWM, LHR) *
* * @see Employee hierarchy: Dev -> Eliot -> Ron -> Andrew (CEO) Asya -> Ron -> Andrew Dan -> Andrew - */ + /** Test 1: Basic employee hierarchy traversal. Find all managers in the reporting chain. */ @Test public void testEmployeeHierarchyBasicTraversal() throws IOException { JSONObject result = @@ -64,15 +64,23 @@ public void testEmployeeHierarchyBasicTraversal() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // Should return 6 employees, each with their reporting hierarchy - System.out.println(result); - verifyNumOfRows(result, 6); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows("Dev", "Eliot", 1, List.of("{Eliot, Ron, 2}")), + rows("Eliot", "Ron", 2, List.of("{Ron, Andrew, 3}")), + rows("Ron", "Andrew", 3, List.of("{Andrew, null, 4}")), + rows("Andrew", null, 4, Collections.emptyList()), + rows("Asya", "Ron", 5, List.of("{Ron, Andrew, 3}")), + rows("Dan", "Andrew", 6, List.of("{Andrew, null, 4}"))); } - /** - * Test 2: Employee hierarchy traversal with depth field. Track the depth of each manager in the - * hierarchy. - */ + /** Test 2: Employee hierarchy traversal with depth field. */ @Test public void testEmployeeHierarchyWithDepthField() throws IOException { JSONObject result = @@ -87,14 +95,23 @@ public void testEmployeeHierarchyWithDepthField() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - System.out.println(result); - verifyNumOfRows(result, 6); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows("Dev", "Eliot", 1, List.of("{Eliot, Ron, 2, 0}")), + rows("Eliot", "Ron", 2, List.of("{Ron, Andrew, 3, 0}")), + rows("Ron", "Andrew", 3, List.of("{Andrew, null, 4, 0}")), + rows("Andrew", null, 4, Collections.emptyList()), + rows("Asya", "Ron", 5, List.of("{Ron, Andrew, 3, 0}")), + rows("Dan", "Andrew", 6, List.of("{Andrew, null, 4, 0}"))); } - /** - * Test 3: Employee hierarchy traversal with maxDepth limit. Only find managers up to 1 level - * above. - */ + /** Test 3: Employee hierarchy with maxDepth=1 (allows 2 levels of traversal). */ @Test public void testEmployeeHierarchyWithMaxDepth() throws IOException { JSONObject result = @@ -109,15 +126,23 @@ public void testEmployeeHierarchyWithMaxDepth() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // Each employee should have at most 2 managers in their hierarchy (depth 0 and 1) - System.out.println(result); - verifyNumOfRows(result, 6); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows("Dev", "Eliot", 1, List.of("{Eliot, Ron, 2}", "{Ron, Andrew, 3}")), + rows("Eliot", "Ron", 2, List.of("{Ron, Andrew, 3}", "{Andrew, null, 4}")), + rows("Ron", "Andrew", 3, List.of("{Andrew, null, 4}")), + rows("Andrew", null, 4, Collections.emptyList()), + rows("Asya", "Ron", 5, List.of("{Ron, Andrew, 3}", "{Andrew, null, 4}")), + rows("Dan", "Andrew", 6, List.of("{Andrew, null, 4}"))); } - /** - * Test 4: Query specific employee and find their complete reporting chain. Filter to Dev and find - * all his managers. - */ + /** Test 4: Query Dev's complete reporting chain: Dev->Eliot->Ron->Andrew */ @Test public void testEmployeeHierarchyForSpecificEmployee() throws IOException { JSONObject result = @@ -132,16 +157,21 @@ public void testEmployeeHierarchyForSpecificEmployee() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // Dev's hierarchy: Eliot, Ron, Andrew - System.out.println(result); - verifyNumOfRows(result, 1); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows(result, rows("Dev", "Eliot", 1, List.of("{Eliot, Ron, 2}"))); } // ==================== Social Network (Travelers) Tests ==================== /** - * Test 5: Find all friends (direct and indirect) for travelers. Social network traversal - find - * friends of friends. + * Test 5: Find all friends (direct and indirect) for travelers. Note: Currently returns empty + * socialNetwork arrays because the friends field is an array type, which the current + * implementation doesn't fully traverse. */ @Test public void testTravelersFriendsNetwork() throws IOException { @@ -156,15 +186,33 @@ public void testTravelersFriendsNetwork() throws IOException { + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); - // All 4 travelers with their social networks - System.out.println(result); - verifyNumOfRows(result, 4); + verifySchema( + result, + schema("name", "string"), + schema("hobbies", "string"), + schema("friends", "string"), + schema("socialNetwork", "array")); + verifyDataRows( + result, + rows( + "Tanya Jordan", + List.of("tennis", "reading"), + List.of("Shirley Soto", "Terry Hawkins"), + Collections.emptyList()), + rows( + "Shirley Soto", + List.of("golf", "reading"), + List.of("Tanya Jordan", "Terry Hawkins"), + Collections.emptyList()), + rows( + "Terry Hawkins", + List.of("tennis", "golf"), + List.of("Tanya Jordan", "Shirley Soto"), + Collections.emptyList()), + rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); } - /** - * Test 6: Find friends network with limited depth. Only get direct friends (depth 0) and friends - * of friends (depth 1). - */ + /** Test 6: Brad Green's friends network with maxDepth=1. */ @Test public void testTravelersFriendsWithMaxDepth() throws IOException { JSONObject result = @@ -180,21 +228,25 @@ public void testTravelersFriendsWithMaxDepth() throws IOException { + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); - // Brad Green -> Shirley Soto -> {Tanya Jordan, Terry Hawkins} - System.out.println(result); - verifyNumOfRows(result, 1); + verifySchema( + result, + schema("name", "string"), + schema("hobbies", "string"), + schema("friends", "string"), + schema("socialNetwork", "array")); + verifyDataRows( + result, + rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); } - /** - * Test 7: Find friends network with depth tracking. Track the degree of connection for each - * friend. - */ + /** Test 7: Find friends network with depth tracking. */ @Test public void testTravelersFriendsWithDepthField() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" + + " | where name = 'Brad Green'" + " | graphLookup %s" + " startWith=friends" + " connectFromField=friends" @@ -203,15 +255,23 @@ public void testTravelersFriendsWithDepthField() throws IOException { + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); - System.out.println(result); - verifyNumOfRows(result, 4); + verifySchema( + result, + schema("name", "string"), + schema("hobbies", "string"), + schema("friends", "string"), + schema("socialNetwork", "array")); + verifyDataRows( + result, + rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); } // ==================== Airport Connections Tests ==================== /** - * Test 8: Find all reachable airports from each airport. Similar to MongoDB example: "Within - * Collection with maxDepth". + * Test 8: Find all reachable airports from each airport. Note: Currently returns empty + * reachableAirports arrays because the connects field is an array type, which the current + * implementation doesn't fully traverse. */ @Test public void testAirportConnections() throws IOException { @@ -226,13 +286,21 @@ public void testAirportConnections() throws IOException { + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); - System.out.println(result); - verifyNumOfRows(result, 5); + verifySchema( + result, + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); + verifyDataRows( + result, + rows("JFK", List.of("BOS", "ORD"), Collections.emptyList()), + rows("BOS", List.of("JFK", "PWM"), Collections.emptyList()), + rows("ORD", List.of("JFK"), Collections.emptyList()), + rows("PWM", List.of("BOS", "LHR"), Collections.emptyList()), + rows("LHR", List.of("PWM"), Collections.emptyList())); } - /** - * Test 9: Find airports reachable within 1 connection. Limited traversal depth. - */ + /** Test 9: Find airports reachable from JFK within maxDepth=1. */ @Test public void testAirportConnectionsWithMaxDepth() throws IOException { JSONObject result = @@ -248,20 +316,22 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); - // JFK -> {BOS, ORD} (depth 0) -> {JFK, PWM} (depth 1, excluding JFK as already visited) - System.out.println(result); - verifyNumOfRows(result, 1); + verifySchema( + result, + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); + verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); } - /** - * Test 10: Find airports with number of connections (hops) tracked. - */ + /** Test 10: Find airports with hop count tracked. */ @Test public void testAirportConnectionsWithDepthField() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" + + " | where airport = 'JFK'" + " | graphLookup %s" + " startWith=connects" + " connectFromField=connects" @@ -270,16 +340,17 @@ public void testAirportConnectionsWithDepthField() throws IOException { + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); - System.out.println(result); - verifyNumOfRows(result, 5); + verifySchema( + result, + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); + verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); } // ==================== Bidirectional Traversal Tests ==================== - /** - * Test 11: Bidirectional traversal on employee hierarchy. Find both reports-to and direct-reports - * relationships. - */ + /** Test 11: Bidirectional traversal for Ron (finds both managers and reports). */ @Test public void testBidirectionalEmployeeHierarchy() throws IOException { JSONObject result = @@ -295,13 +366,24 @@ public void testBidirectionalEmployeeHierarchy() throws IOException { + " as connections", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // Ron should find both his managers (Andrew) and his reports (Eliot, Asya, and indirectly Dev) - System.out.println(result); - verifyNumOfRows(result, 1); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("connections", "array")); + verifyDataRows( + result, + rows( + "Ron", + "Andrew", + 3, + List.of("{Ron, Andrew, 3}", "{Andrew, null, 4}", "{Dan, Andrew, 6}"))); } /** - * Test 12: Bidirectional airport connections. Find all airports connected in either direction. + * Test 12: Bidirectional airport connections for ORD. Note: Currently returns empty + * allConnections array because the connects field is an array type. */ @Test public void testBidirectionalAirportConnections() throws IOException { @@ -309,6 +391,7 @@ public void testBidirectionalAirportConnections() throws IOException { executeQuery( String.format( "source=%s" + + " | where airport = 'ORD'" + " | graphLookup %s" + " startWith=connects" + " connectFromField=connects" @@ -317,14 +400,17 @@ public void testBidirectionalAirportConnections() throws IOException { + " as allConnections", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); - verifyNumOfRows(result, 5); + verifySchema( + result, + schema("airport", "string"), + schema("connects", "string"), + schema("allConnections", "array")); + verifyDataRows(result, rows("ORD", List.of("JFK"), Collections.emptyList())); } // ==================== Edge Cases ==================== - /** - * Test 13: Graph lookup on empty result set. Filter to non-existent employee. - */ + /** Test 13: Graph lookup on empty result set (non-existent employee). */ @Test public void testEmptySourceResult() throws IOException { JSONObject result = @@ -339,14 +425,16 @@ public void testEmptySourceResult() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - System.out.println(result); - verifyNumOfRows(result, 0); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows(result); } - /** - * Test 14: Employee at top of hierarchy (CEO with no manager). Andrew has no reportsTo, so his - * hierarchy should be empty. - */ + /** Test 14: CEO (Andrew) with no manager - hierarchy should be empty. */ @Test public void testEmployeeWithNoManager() throws IOException { JSONObject result = @@ -361,14 +449,16 @@ public void testEmployeeWithNoManager() throws IOException { + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // Andrew is CEO, no one above him - System.out.println(result); - verifyNumOfRows(result, 1); + verifySchema( + result, + schema("name", "string"), + schema("reportsTo", "string"), + schema("id", "int"), + schema("reportingHierarchy", "array")); + verifyDataRows(result, rows("Andrew", null, 4, Collections.emptyList())); } - /** - * Test 15: Combined with other PPL commands (stats, sort). Count employees by hierarchy depth. - */ + /** Test 15: Combined with stats command. */ @Test public void testGraphLookupWithStats() throws IOException { JSONObject result = @@ -383,14 +473,18 @@ public void testGraphLookupWithStats() throws IOException { + " | stats count() by name", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - // 6 distinct employees - System.out.println(result); - verifyNumOfRows(result, 6); + verifySchema(result, schema("count()", "bigint"), schema("name", "string")); + verifyDataRows( + result, + rows(1L, "Ron"), + rows(1L, "Dan"), + rows(1L, "Dev"), + rows(1L, "Andrew"), + rows(1L, "Asya"), + rows(1L, "Eliot")); } - /** - * Test 16: Graph lookup with fields projection. Only select specific fields in the result. - */ + /** Test 16: Graph lookup with fields projection (name and reportingHierarchy only). */ @Test public void testGraphLookupWithFieldsProjection() throws IOException { JSONObject result = @@ -405,7 +499,14 @@ public void testGraphLookupWithFieldsProjection() throws IOException { + " | fields name, reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); - System.out.println(result); - verifyNumOfRows(result, 6); + verifySchema(result, schema("name", "string"), schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows("Dev", List.of("{Eliot, Ron, 2}")), + rows("Eliot", List.of("{Ron, Andrew, 3}")), + rows("Ron", List.of("{Andrew, null, 4}")), + rows("Andrew", Collections.emptyList()), + rows("Asya", List.of("{Ron, Andrew, 3}")), + rows("Dan", List.of("{Andrew, null, 4}"))); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 7018a7a21dc..e3c6dc6cc0c 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -36,6 +36,7 @@ import org.opensearch.sql.calcite.plan.Scannable; import org.opensearch.sql.calcite.plan.rel.GraphLookup; import org.opensearch.sql.opensearch.request.PredicateAnalyzer.NamedFieldExpression; +import org.opensearch.sql.opensearch.storage.scan.context.LimitDigest; import org.opensearch.sql.opensearch.storage.scan.context.OSRequestBuilderAction; import org.opensearch.sql.opensearch.storage.scan.context.PushDownType; import org.opensearch.sql.opensearch.util.OpenSearchRelOptUtil; @@ -163,6 +164,14 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec GraphLookupEnumerator(CalciteEnumerableGraphLookup graphLookup) { this.graphLookup = graphLookup; this.lookupScan = (CalciteEnumerableIndexScan) graphLookup.getLookup(); + // For performance consideration, limit the size of the lookup table MaxResultWindow to avoid + // PIT search + final int maxResultWindow = this.lookupScan.getOsIndex().getMaxResultWindow(); + this.lookupScan.pushDownContext.add( + PushDownType.LIMIT, + new LimitDigest(maxResultWindow, 0), + (OSRequestBuilderAction) + requestBuilder -> requestBuilder.pushDownLimit(maxResultWindow, 0)); // Get the source enumerator if (graphLookup.getSource() instanceof Scannable scannable) { From f577aef1578993bb12c9a29934a5efd40af3e5bc Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 11:06:26 +0800 Subject: [PATCH 09/23] Simplify GraphLookup param names Signed-off-by: Heng Qian --- .../opensearch/sql/ast/tree/GraphLookup.java | 10 +- .../sql/calcite/CalciteRelNodeVisitor.java | 12 +-- .../sql/calcite/plan/rel/GraphLookup.java | 28 +++--- .../calcite/plan/rel/LogicalGraphLookup.java | 41 ++++---- .../remote/CalcitePPLGraphLookupIT.java | 96 +++++++++---------- .../functions/GraphLookupFunction.java | 58 +++++------ .../rules/EnumerableGraphLookupRule.java | 6 +- .../scan/CalciteEnumerableGraphLookup.java | 49 +++++----- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 6 +- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 29 +++--- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 8 +- .../calcite/CalcitePPLGraphLookupTest.java | 36 +++---- .../sql/ppl/parser/AstBuilderTest.java | 40 ++++---- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 24 ++--- 15 files changed, 226 insertions(+), 223 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index debe2d48bfd..c4ea9428ab5 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -22,8 +22,8 @@ /** * AST node for graphLookup command. Performs BFS graph traversal on a lookup table. * - *

Example: source=employees | graphLookup employees connectFromField=manager connectToField=name - * maxDepth=3 depthField=level direction=uni as hierarchy + *

Example: source=employees | graphLookup employees fromField=manager toField=name maxDepth=3 + * depthField=level direction=uni as hierarchy */ @Getter @Setter @@ -45,13 +45,13 @@ public enum Direction { private final UnresolvedPlan fromTable; /** Field in sourceTable to start with. */ - private final Field startWith; + private final Field startField; /** Field in fromTable that represents the outgoing edge. */ - private final Field connectFromField; + private final Field fromField; /** Field in input/fromTable to match against for traversal. */ - private final Field connectToField; + private final Field toField; /** Output field name for collected traversal results. */ private final Field as; diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b5185ddaa69..758fc4e3830 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2586,9 +2586,9 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { RelNode sourceTable = builder.build(); // 2. Extract parameters - String startWith = node.getStartWith().getField().toString(); - String connectFromFieldName = node.getConnectFromField().getField().toString(); - String connectToFieldName = node.getConnectToField().getField().toString(); + String startFieldName = node.getStartField().getField().toString(); + String fromFieldName = node.getFromField().getField().toString(); + String toFieldName = node.getToField().getField().toString(); String outputFieldName = node.getAs().getField().toString(); String depthFieldName = node.getDepthFieldName(); boolean bidirectional = node.getDirection() == Direction.BI; @@ -2608,9 +2608,9 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { LogicalGraphLookup.create( sourceTable, lookupTable, - startWith, - connectFromFieldName, - connectToFieldName, + startFieldName, + fromFieldName, + toFieldName, outputFieldName, depthFieldName, maxDepthValue, diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index 4853de339a3..f9df615abc8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -39,9 +39,9 @@ public abstract class GraphLookup extends BiRel { // TODO: use RexInputRef instead of String for there fields - protected final String startWith; // Field in source table (start entities) - protected final String connectFromField; // Field in lookup table (edge source) - protected final String connectToField; // Field in lookup table (edge target) + protected final String startField; // Field in source table (start entities) + protected final String fromField; // Field in lookup table (edge source) + protected final String toField; // Field in lookup table (edge target) protected final String outputField; // Name of output array field @Nullable protected final String depthField; // Name of output array field @@ -58,9 +58,9 @@ public abstract class GraphLookup extends BiRel { * @param traitSet Trait set * @param source Source table RelNode * @param lookup Lookup table RelNode - * @param startWith Field name for start entities - * @param connectFromField Field name for outgoing edges - * @param connectToField Field name for incoming edges + * @param startField Field name for start entities + * @param fromField Field name for outgoing edges + * @param toField Field name for incoming edges * @param outputField Name of the output array field * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) @@ -71,17 +71,17 @@ protected GraphLookup( RelTraitSet traitSet, RelNode source, RelNode lookup, - String startWith, - String connectFromField, - String connectToField, + String startField, + String fromField, + String toField, String outputField, @Nullable String depthField, int maxDepth, boolean bidirectional) { super(cluster, traitSet, source, lookup); - this.startWith = startWith; - this.connectFromField = connectFromField; - this.connectToField = connectToField; + this.startField = startField; + this.fromField = fromField; + this.toField = toField; this.outputField = outputField; this.depthField = depthField; this.maxDepth = maxDepth; @@ -137,8 +137,8 @@ public double estimateRowCount(RelMetadataQuery mq) { @Override public RelWriter explainTerms(RelWriter pw) { return super.explainTerms(pw) - .item("connectFromField", connectFromField) - .item("connectToField", connectToField) + .item("fromField", fromField) + .item("toField", toField) .item("outputField", outputField) .item("depthField", depthField) .item("maxDepth", maxDepth) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index d25973d4b9f..e8d6cfa7bba 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -27,8 +27,9 @@ public class LogicalGraphLookup extends GraphLookup { * @param traitSet Trait set * @param source Source table RelNode * @param lookup Lookup table RelNode - * @param connectFromField Field name for outgoing edges - * @param connectToField Field name for incoming edges + * @param startField Field name for start entities + * @param fromField Field name for outgoing edges + * @param toField Field name for incoming edges * @param outputField Name of the output array field * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) @@ -39,9 +40,9 @@ protected LogicalGraphLookup( RelTraitSet traitSet, RelNode source, RelNode lookup, - String startWith, - String connectFromField, - String connectToField, + String startField, + String fromField, + String toField, String outputField, @Nullable String depthField, int maxDepth, @@ -51,9 +52,9 @@ protected LogicalGraphLookup( traitSet, source, lookup, - startWith, - connectFromField, - connectToField, + startField, + fromField, + toField, outputField, depthField, maxDepth, @@ -65,9 +66,9 @@ protected LogicalGraphLookup( * * @param source Source table RelNode * @param lookup Lookup table RelNode - * @param startWith Field name for start with entities - * @param connectFromField Field name for outgoing edges - * @param connectToField Field name for incoming edges + * @param startField Field name for start entities + * @param fromField Field name for outgoing edges + * @param toField Field name for incoming edges * @param outputField Name of the output array field * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions @@ -77,9 +78,9 @@ protected LogicalGraphLookup( public static LogicalGraphLookup create( RelNode source, RelNode lookup, - String startWith, - String connectFromField, - String connectToField, + String startField, + String fromField, + String toField, String outputField, @Nullable String depthField, int maxDepth, @@ -91,9 +92,9 @@ public static LogicalGraphLookup create( traitSet, source, lookup, - startWith, - connectFromField, - connectToField, + startField, + fromField, + toField, outputField, depthField, maxDepth, @@ -107,9 +108,9 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { traitSet, inputs.get(0), inputs.get(1), - startWith, - connectFromField, - connectToField, + startField, + fromField, + toField, outputField, depthField, maxDepth, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index a814a7db9ac..ba884d9a5a6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -58,9 +58,9 @@ public void testEmployeeHierarchyBasicTraversal() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -88,9 +88,9 @@ public void testEmployeeHierarchyWithDepthField() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " depthField=level" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -119,9 +119,9 @@ public void testEmployeeHierarchyWithMaxDepth() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " maxDepth=1" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -151,9 +151,9 @@ public void testEmployeeHierarchyForSpecificEmployee() throws IOException { "source=%s" + " | where name = 'Dev'" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -180,9 +180,9 @@ public void testTravelersFriendsNetwork() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=friends" - + " connectFromField=friends" - + " connectToField=name" + + " startField=friends" + + " fromField=friends" + + " toField=name" + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); @@ -221,9 +221,9 @@ public void testTravelersFriendsWithMaxDepth() throws IOException { "source=%s" + " | where name = 'Brad Green'" + " | graphLookup %s" - + " startWith=friends" - + " connectFromField=friends" - + " connectToField=name" + + " startField=friends" + + " fromField=friends" + + " toField=name" + " maxDepth=1" + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); @@ -248,9 +248,9 @@ public void testTravelersFriendsWithDepthField() throws IOException { "source=%s" + " | where name = 'Brad Green'" + " | graphLookup %s" - + " startWith=friends" - + " connectFromField=friends" - + " connectToField=name" + + " startField=friends" + + " fromField=friends" + + " toField=name" + " depthField=connectionLevel" + " as socialNetwork", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); @@ -280,9 +280,9 @@ public void testAirportConnections() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=connects" - + " connectFromField=connects" - + " connectToField=airport" + + " startField=connects" + + " fromField=connects" + + " toField=airport" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -309,9 +309,9 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { "source=%s" + " | where airport = 'JFK'" + " | graphLookup %s" - + " startWith=connects" - + " connectFromField=connects" - + " connectToField=airport" + + " startField=connects" + + " fromField=connects" + + " toField=airport" + " maxDepth=1" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -333,9 +333,9 @@ public void testAirportConnectionsWithDepthField() throws IOException { "source=%s" + " | where airport = 'JFK'" + " | graphLookup %s" - + " startWith=connects" - + " connectFromField=connects" - + " connectToField=airport" + + " startField=connects" + + " fromField=connects" + + " toField=airport" + " depthField=numConnections" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -359,9 +359,9 @@ public void testBidirectionalEmployeeHierarchy() throws IOException { "source=%s" + " | where name = 'Ron'" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " direction=bi" + " as connections", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -393,9 +393,9 @@ public void testBidirectionalAirportConnections() throws IOException { "source=%s" + " | where airport = 'ORD'" + " | graphLookup %s" - + " startWith=connects" - + " connectFromField=connects" - + " connectToField=airport" + + " startField=connects" + + " fromField=connects" + + " toField=airport" + " direction=bi" + " as allConnections", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -419,9 +419,9 @@ public void testEmptySourceResult() throws IOException { "source=%s" + " | where name = 'NonExistent'" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -443,9 +443,9 @@ public void testEmployeeWithNoManager() throws IOException { "source=%s" + " | where name = 'Andrew'" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -466,9 +466,9 @@ public void testGraphLookupWithStats() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy" + " | stats count() by name", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); @@ -492,9 +492,9 @@ public void testGraphLookupWithFieldsProjection() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startWith=reportsTo" - + " connectFromField=reportsTo" - + " connectToField=name" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + " as reportingHierarchy" + " | fields name, reportingHierarchy", TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java index a38f9c1d739..61a878bff62 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java @@ -51,10 +51,10 @@ public record TraversalResult(Object[] row, int depth) {} * * @param startValue The starting value to begin traversal from * @param lookupTableRows All rows from the lookup table - * @param connectFromFieldIndex Index of the field that represents outgoing edges (the field - * values we traverse FROM) - * @param connectToFieldIndex Index of the field that represents the target to match against (the - * field values we traverse TO) + * @param fromFieldIndex Index of the field that represents outgoing edges (the field values we + * traverse FROM) + * @param toFieldIndex Index of the field that represents the target to match against (the field + * values we traverse TO) * @param maxDepth Maximum traversal depth (-1 or 0 for unlimited) * @param bidirectional If true, traverse edges in both directions * @return List of traversal results containing row data and depth @@ -62,8 +62,8 @@ public record TraversalResult(Object[] row, int depth) {} public static List execute( Object startValue, List lookupTableRows, - int connectFromFieldIndex, - int connectToFieldIndex, + int fromFieldIndex, + int toFieldIndex, int maxDepth, boolean bidirectional) { @@ -71,27 +71,27 @@ public static List execute( return List.of(); } - // Build adjacency index: connectToField value -> list of rows with matching connectFromField - // This creates edges: when we're at a node with connectFromField=X, we can traverse to nodes - // where connectToField=X + // Build adjacency index: toField value -> list of rows with matching fromField + // This creates edges: when we're at a node with fromField=X, we can traverse to nodes + // where toField=X Map> forwardAdjacency = new HashMap<>(); // For bidirectional: also index reverse edges - // connectFromField value -> list of rows with matching connectToField + // fromField value -> list of rows with matching toField Map> reverseAdjacency = bidirectional ? new HashMap<>() : null; for (Object[] row : lookupTableRows) { - Object connectFromValue = row[connectFromFieldIndex]; - Object connectToValue = row[connectToFieldIndex]; + Object fromValue = row[fromFieldIndex]; + Object toValue = row[toFieldIndex]; - // Forward edge: from connectFromValue, we can reach this row - if (connectFromValue != null) { - forwardAdjacency.computeIfAbsent(connectFromValue, k -> new ArrayList<>()).add(row); + // Forward edge: from fromValue, we can reach this row + if (fromValue != null) { + forwardAdjacency.computeIfAbsent(fromValue, k -> new ArrayList<>()).add(row); } - // Reverse edge (for bidirectional): from connectToValue, we can reach this row - if (bidirectional && connectToValue != null) { - reverseAdjacency.computeIfAbsent(connectToValue, k -> new ArrayList<>()).add(row); + // Reverse edge (for bidirectional): from toValue, we can reach this row + if (bidirectional && toValue != null) { + reverseAdjacency.computeIfAbsent(toValue, k -> new ArrayList<>()).add(row); } } @@ -117,7 +117,7 @@ public static List execute( List forwardNeighbors = forwardAdjacency.get(current.value()); if (forwardNeighbors != null) { for (Object[] neighborRow : forwardNeighbors) { - Object neighborKey = neighborRow[connectToFieldIndex]; + Object neighborKey = neighborRow[toFieldIndex]; if (!visited.contains(neighborKey)) { visited.add(neighborKey); results.add(new TraversalResult(neighborRow, currentDepth + 1)); @@ -131,7 +131,7 @@ public static List execute( List reverseNeighbors = reverseAdjacency.get(current.value()); if (reverseNeighbors != null) { for (Object[] neighborRow : reverseNeighbors) { - Object neighborKey = neighborRow[connectFromFieldIndex]; + Object neighborKey = neighborRow[fromFieldIndex]; if (!visited.contains(neighborKey)) { visited.add(neighborKey); results.add(new TraversalResult(neighborRow, currentDepth + 1)); @@ -149,14 +149,14 @@ public static List execute( * Convenience method to get the starting value from an input row. * * @param inputRow The input row - * @param connectToFieldIndex Index of the field in input that contains the starting value + * @param toFieldIndex Index of the field in input that contains the starting value * @return The starting value for traversal */ - public static Object getStartValue(Object[] inputRow, int connectToFieldIndex) { - if (inputRow == null || connectToFieldIndex < 0 || connectToFieldIndex >= inputRow.length) { + public static Object getStartValue(Object[] inputRow, int toFieldIndex) { + if (inputRow == null || toFieldIndex < 0 || toFieldIndex >= inputRow.length) { return null; } - return inputRow[connectToFieldIndex]; + return inputRow[toFieldIndex]; } /** @@ -192,8 +192,8 @@ public static Object[] toResultArray(List results, boolean incl * * @param startValue Starting value for BFS traversal * @param lookupTable Collected rows from lookup table - * @param connectFromIdx Index of connectFrom field in lookup rows - * @param connectToIdx Index of connectTo field in lookup rows + * @param fromIdx Index of from field in lookup rows + * @param toIdx Index of to field in lookup rows * @param maxDepth Maximum traversal depth (-1 = unlimited) * @param bidirectional Whether to traverse edges in both directions * @param includeDepth Whether to include depth in output rows @@ -202,8 +202,8 @@ public static Object[] toResultArray(List results, boolean incl public static List executeWithDynamicLookup( Object startValue, RexSubQuery lookupTable, - int connectFromIdx, - int connectToIdx, + int fromIdx, + int toIdx, int maxDepth, boolean bidirectional, boolean includeDepth) { @@ -221,7 +221,7 @@ public static List executeWithDynamicLookup( } List results = - execute(startValue, rows, connectFromIdx, connectToIdx, maxDepth, bidirectional); + execute(startValue, rows, fromIdx, toIdx, maxDepth, bidirectional); // Convert to output format List output = new ArrayList<>(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java index 558286d1af3..c8da86bffe0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java @@ -92,9 +92,9 @@ public RelNode convert(RelNode rel) { traitSet, convertedSource, convertedLookup, - graphLookup.getStartWith(), - graphLookup.getConnectFromField(), - graphLookup.getConnectToField(), + graphLookup.getStartField(), + graphLookup.getFromField(), + graphLookup.getToField(), graphLookup.getOutputField(), graphLookup.getDepthField(), graphLookup.getMaxDepth(), diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index e3c6dc6cc0c..7664638b6da 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -59,8 +59,9 @@ public class CalciteEnumerableGraphLookup extends GraphLookup implements Enumera * @param source Source table RelNode * @param lookup Lookup table RelNode // * @param lookupIndex OpenSearchIndex for the lookup table * (extracted from lookup RelNode) - * @param connectFromField Field name for outgoing edges - * @param connectToField Field name for incoming edges + * @param startField Field name for start entities + * @param fromField Field name for outgoing edges + * @param toField Field name for incoming edges * @param outputField Name of the output array field * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) @@ -71,9 +72,9 @@ public CalciteEnumerableGraphLookup( RelTraitSet traitSet, RelNode source, RelNode lookup, - String startWith, - String connectFromField, - String connectToField, + String startField, + String fromField, + String toField, String outputField, String depthField, int maxDepth, @@ -83,9 +84,9 @@ public CalciteEnumerableGraphLookup( traitSet, source, lookup, - startWith, - connectFromField, - connectToField, + startField, + fromField, + toField, outputField, depthField, maxDepth, @@ -99,9 +100,9 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { traitSet, inputs.get(0), inputs.get(1), - startWith, - connectFromField, - connectToField, + startField, + fromField, + toField, outputField, depthField, maxDepth, @@ -154,9 +155,9 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec private final CalciteEnumerableIndexScan lookupScan; private final Enumerator<@Nullable Object> sourceEnumerator; private final List lookupFields; - private final int startWithIndex; - private final int connectFromIdx; - private final int connectToIdx; + private final int startFieldIndex; + private final int fromFieldIdx; + private final int toFieldIdx; private Object[] current = null; @@ -184,9 +185,9 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec List sourceFields = graphLookup.getSource().getRowType().getFieldNames(); this.lookupFields = graphLookup.getLookup().getRowType().getFieldNames(); - this.startWithIndex = sourceFields.indexOf(graphLookup.getStartWith()); - this.connectFromIdx = lookupFields.indexOf(graphLookup.connectFromField); - this.connectToIdx = lookupFields.indexOf(graphLookup.connectToField); + this.startFieldIndex = sourceFields.indexOf(graphLookup.getStartField()); + this.fromFieldIdx = lookupFields.indexOf(graphLookup.fromField); + this.toFieldIdx = lookupFields.indexOf(graphLookup.toField); } @Override @@ -216,8 +217,8 @@ public boolean moveNext() { // Get the start value for BFS Object startValue = - (startWithIndex >= 0 && startWithIndex < sourceValues.length) - ? sourceValues[startWithIndex] + (startFieldIndex >= 0 && startFieldIndex < sourceValues.length) + ? sourceValues[startFieldIndex] : null; // Perform BFS traversal @@ -266,14 +267,14 @@ private List performBfs(Object startValue) { } // Query OpenSearch for all current level values - // Forward direction: connectFromField = currentLevelValues + // Forward direction: fromField = currentLevelValues List forwardResults = queryLookupTable(currentLevelValues); for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); - Object nextValue = rowArray[connectFromIdx]; + Object nextValue = rowArray[fromFieldIdx]; if (graphLookup.bidirectional && visited.contains(nextValue)) { - nextValue = rowArray[connectToIdx]; + nextValue = rowArray[toFieldIdx]; } if (!visited.contains(nextValue)) { if (graphLookup.depthField != null) { @@ -311,12 +312,12 @@ private List queryLookupTable(List values) { NamedFieldExpression toFieldExpression = new NamedFieldExpression( - connectToIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); + toFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); QueryBuilder query = termsQuery(toFieldExpression.getReferenceForTermQuery(), values); if (graphLookup.bidirectional) { NamedFieldExpression fromFieldExpression = new NamedFieldExpression( - connectFromIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); + fromFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); query = QueryBuilders.boolQuery() .should(query) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 5408d6fb21f..bb56016b53a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -53,9 +53,9 @@ APPENDCOL: 'APPENDCOL'; ADDTOTALS: 'ADDTOTALS'; ADDCOLTOTALS: 'ADDCOLTOTALS'; GRAPHLOOKUP: 'GRAPHLOOKUP'; -START_WITH: 'STARTWITH'; -CONNECT_FROM_FIELD: 'CONNECTFROMFIELD'; -CONNECT_TO_FIELD: 'CONNECTTOFIELD'; +START_FIELD: 'STARTFIELD'; +FROM_FIELD: 'FROMFIELD'; +TO_FIELD: 'TOFIELD'; MAX_DEPTH: 'MAXDEPTH'; DEPTH_FIELD: 'DEPTHFIELD'; DIRECTION: 'DIRECTION'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 3654043811c..b92bb466a6d 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -632,9 +632,9 @@ graphLookupCommand ; graphLookupOption - : (START_WITH EQUAL fieldExpression) - | (CONNECT_FROM_FIELD EQUAL fieldExpression) - | (CONNECT_TO_FIELD EQUAL fieldExpression) + : (START_FIELD EQUAL fieldExpression) + | (FROM_FIELD EQUAL fieldExpression) + | (TO_FIELD EQUAL fieldExpression) | (MAX_DEPTH EQUAL integerLiteral) | (DEPTH_FIELD EQUAL fieldExpression) | (DIRECTION EQUAL (UNI | BI)) diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c9ace02e79b..d505bcfbf4a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -1490,25 +1490,25 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom UnresolvedPlan fromTable = visitTableSourceClause(ctx.lookupTable); // Parse options with defaults - Field connectFromField = null; - Field connectToField = null; + Field fromField = null; + Field toField = null; Literal maxDepth = Literal.ZERO; - Field startWith = null; + Field startField = null; Field depthField = null; Direction direction = Direction.UNI; for (OpenSearchPPLParser.GraphLookupOptionContext option : ctx.graphLookupOption()) { - if (option.CONNECT_FROM_FIELD() != null) { - connectFromField = (Field) internalVisitExpression(option.fieldExpression()); + if (option.FROM_FIELD() != null) { + fromField = (Field) internalVisitExpression(option.fieldExpression()); } - if (option.CONNECT_TO_FIELD() != null) { - connectToField = (Field) internalVisitExpression(option.fieldExpression()); + if (option.TO_FIELD() != null) { + toField = (Field) internalVisitExpression(option.fieldExpression()); } if (option.MAX_DEPTH() != null) { maxDepth = (Literal) internalVisitExpression(option.integerLiteral()); } - if (option.START_WITH() != null) { - startWith = (Field) internalVisitExpression(option.fieldExpression()); + if (option.START_FIELD() != null) { + startField = (Field) internalVisitExpression(option.fieldExpression()); } if (option.DEPTH_FIELD() != null) { depthField = (Field) internalVisitExpression(option.fieldExpression()); @@ -1520,18 +1520,17 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom Field as = (Field) internalVisitExpression(ctx.outputField); - if (connectFromField == null || connectToField == null) { - throw new SemanticCheckException( - "connectFromField and connectToField must be specified for graphLookup"); + if (fromField == null || toField == null) { + throw new SemanticCheckException("fromField and toField must be specified for graphLookup"); } return GraphLookup.builder() .fromTable(fromTable) - .connectFromField(connectFromField) - .connectToField(connectToField) + .fromField(fromField) + .toField(toField) .as(as) .maxDepth(maxDepth) - .startWith(startWith) + .startField(startField) .depthField(depthField) .direction(direction) .build(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index e599924d153..caa5fa74948 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -230,11 +230,11 @@ public String visitGraphLookup(GraphLookup node, String context) { String child = node.getChild().get(0).accept(this, context); StringBuilder command = new StringBuilder(); command.append(child).append(" | graphlookup ").append(MASK_TABLE); - if (node.getStartWith() != null) { - command.append(" startwith=").append(MASK_COLUMN); + if (node.getStartField() != null) { + command.append(" startField=").append(MASK_COLUMN); } - command.append(" connectFromField=").append(MASK_COLUMN); - command.append(" connectToField=").append(MASK_COLUMN); + command.append(" fromField=").append(MASK_COLUMN); + command.append(" toField=").append(MASK_COLUMN); if (node.getMaxDepth() != null && !Integer.valueOf(0).equals(node.getMaxDepth().getValue())) { command.append(" maxDepth=").append(MASK_LITERAL); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index e23687465b9..d0f6c5169a3 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -43,15 +43,16 @@ public CalcitePPLGraphLookupTest() { public void testGraphLookupBasic() { // Test basic graphLookup with same source and lookup table String ppl = - "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" - + " connectToField=name as reportingHierarchy"; + "source=employee | graphLookup employee startField=reportsTo fromField=reportsTo" + + " toField=name as reportingHierarchy"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + " bidirectional=[false])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalSort(fetch=[100])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + " LogicalTableScan(table=[[scott, employee]])\n"; verifyLogical(root, expectedLogical); } @@ -60,15 +61,16 @@ public void testGraphLookupBasic() { public void testGraphLookupWithDepthField() { // Test graphLookup with depthField parameter String ppl = - "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" - + " connectToField=name depthField=level as reportingHierarchy"; + "source=employee | graphLookup employee startField=reportsTo fromField=reportsTo" + + " toField=name depthField=level as reportingHierarchy"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + " outputField=[reportingHierarchy], depthField=[Field(field=level, fieldArgs=[])]," + " maxDepth=[-1], bidirectional=[false])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalSort(fetch=[100])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + " LogicalTableScan(table=[[scott, employee]])\n"; verifyLogical(root, expectedLogical); } @@ -77,15 +79,16 @@ public void testGraphLookupWithDepthField() { public void testGraphLookupWithMaxDepth() { // Test graphLookup with maxDepth parameter String ppl = - "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" - + " connectToField=name maxDepth=3 as reportingHierarchy"; + "source=employee | graphLookup employee startField=reportsTo fromField=reportsTo" + + " toField=name maxDepth=3 as reportingHierarchy"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[3]," + " bidirectional=[false])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalSort(fetch=[100])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + " LogicalTableScan(table=[[scott, employee]])\n"; verifyLogical(root, expectedLogical); } @@ -94,15 +97,16 @@ public void testGraphLookupWithMaxDepth() { public void testGraphLookupBidirectional() { // Test graphLookup with bidirectional traversal String ppl = - "source=employee | graphLookup employee startWith=reportsTo connectFromField=reportsTo" - + " connectToField=name direction=bi as reportingHierarchy"; + "source=employee | graphLookup employee startField=reportsTo fromField=reportsTo" + + " toField=name direction=bi as reportingHierarchy"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalGraphLookup(connectFromField=[reportsTo], connectToField=[name]," + "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + " bidirectional=[true])\n" - + " LogicalTableScan(table=[[scott, employee]])\n" + + " LogicalSort(fetch=[100])\n" + + " LogicalTableScan(table=[[scott, employee]])\n" + " LogicalTableScan(table=[[scott, employee]])\n"; verifyLogical(root, expectedLogical); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 6764420d133..f7cadaaf57d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -1649,58 +1649,58 @@ public void testMvmapWithNonFieldFirstArgThrowsException() { public void testGraphLookupCommand() { // Basic graphLookup with required parameters assertEqual( - "source=t | graphLookup employees connectFromField=manager connectToField=name maxDepth=3" + "source=t | graphLookup employees fromField=manager toField=name maxDepth=3" + " as reportingHierarchy", GraphLookup.builder() .child(relation("t")) .fromTable(relation("employees")) - .connectFromField(field("manager")) - .connectToField(field("name")) + .fromField(field("manager")) + .toField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(3)) - .startWith(null) + .startField(null) .depthField(null) .direction(GraphLookup.Direction.UNI) .build()); - // graphLookup with startWith filter + // graphLookup with startField filter assertEqual( - "source=t | graphLookup employees connectFromField=manager connectToField=name" - + " startWith=id as reportingHierarchy", + "source=t | graphLookup employees fromField=manager toField=name" + + " startField=id as reportingHierarchy", GraphLookup.builder() .child(relation("t")) .fromTable(relation("employees")) - .connectFromField(field("manager")) - .connectToField(field("name")) + .fromField(field("manager")) + .toField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(0)) - .startWith(field("id")) + .startField(field("id")) .depthField(null) .direction(GraphLookup.Direction.UNI) .build()); // graphLookup with depthField and bidirectional assertEqual( - "source=t | graphLookup employees connectFromField=manager connectToField=name" + "source=t | graphLookup employees fromField=manager toField=name" + " depthField=level direction=bi as reportingHierarchy", GraphLookup.builder() .child(relation("t")) .fromTable(relation("employees")) - .connectFromField(field("manager")) - .connectToField(field("name")) + .fromField(field("manager")) + .toField(field("name")) .as(field("reportingHierarchy")) .maxDepth(intLiteral(0)) - .startWith(null) + .startField(null) .depthField(field("level")) .direction(GraphLookup.Direction.BI) .build()); - // Error: missing connectFromField - SemanticCheckException thrown by AstBuilder + // Error: missing fromField - SemanticCheckException thrown by AstBuilder assertThrows( SemanticCheckException.class, () -> plan( - "source=t | graphLookup employees connectToField=name startWith=id as" + "source=t | graphLookup employees toField=name startField=id as" + " reportingHierarchy")); // Error: missing lookup table - SyntaxCheckException from grammar @@ -1708,14 +1708,12 @@ public void testGraphLookupCommand() { SyntaxCheckException.class, () -> plan( - "source=t | graphLookup connectFromField=manager connectToField=name as" + "source=t | graphLookup fromField=manager toField=name as" + " reportingHierarchy")); - // Error: missing connectToField - SemanticCheckException thrown by AstBuilder + // Error: missing toField - SemanticCheckException thrown by AstBuilder assertThrows( SemanticCheckException.class, - () -> - plan( - "source=t | graphLookup employees connectFromField=manager as reportingHierarchy")); + () -> plan("source=t | graphLookup employees fromField=manager as reportingHierarchy")); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 8a8e50081aa..01ccf9180b0 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -647,40 +647,40 @@ public void testLookup() { public void testGraphLookup() { // Basic graphLookup with required parameters assertEquals( - "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + "source=table | graphlookup table fromField=identifier toField=identifier" + " direction=uni as identifier", anonymize( - "source=t | graphLookup employees connectFromField=manager connectToField=name" + "source=t | graphLookup employees fromField=manager toField=name" + " as reportingHierarchy")); // graphLookup with maxDepth assertEquals( - "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + "source=table | graphlookup table fromField=identifier toField=identifier" + " maxDepth=*** direction=uni as identifier", anonymize( - "source=t | graphLookup employees connectFromField=manager connectToField=name" + "source=t | graphLookup employees fromField=manager toField=name" + " maxDepth=3 as reportingHierarchy")); // graphLookup with depthField assertEquals( - "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + "source=table | graphlookup table fromField=identifier toField=identifier" + " depthField=identifier direction=uni as identifier", anonymize( - "source=t | graphLookup employees connectFromField=manager connectToField=name" + "source=t | graphLookup employees fromField=manager toField=name" + " depthField=level as reportingHierarchy")); // graphLookup with bidirectional mode assertEquals( - "source=table | graphlookup table connectFromField=identifier connectToField=identifier" + "source=table | graphlookup table fromField=identifier toField=identifier" + " direction=bi as identifier", anonymize( - "source=t | graphLookup employees connectFromField=manager connectToField=name" + "source=t | graphLookup employees fromField=manager toField=name" + " direction=bi as reportingHierarchy")); // graphLookup with all optional parameters assertEquals( - "source=table | graphlookup table startwith=identifier connectFromField=identifier" - + " connectToField=identifier maxDepth=*** depthField=identifier direction=bi" + "source=table | graphlookup table startField=identifier fromField=identifier" + + " toField=identifier maxDepth=*** depthField=identifier direction=bi" + " as identifier", anonymize( - "source=t | graphLookup employees connectFromField=manager connectToField=name" - + " startWith=id maxDepth=5 depthField=level direction=bi as reportingHierarchy")); + "source=t | graphLookup employees fromField=manager toField=name" + + " startField=id maxDepth=5 depthField=level direction=bi as reportingHierarchy")); } @Test From 61e852689e16178f028e7d1ec8b3a2f922e61403 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 13:21:11 +0800 Subject: [PATCH 10/23] Refine IT Signed-off-by: Heng Qian --- .../remote/CalcitePPLGraphLookupIT.java | 171 ++++++++---------- .../src/test/resources/graph_travelers.json | 8 +- .../graph_travelers_index_mapping.json | 5 +- .../scan/CalciteEnumerableGraphLookup.java | 1 + 4 files changed, 82 insertions(+), 103 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index ba884d9a5a6..b6c392a71e1 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -28,7 +28,7 @@ * *
    *
  • graph_employees: Employee hierarchy (Dev->Eliot->Ron->Andrew, Asya->Ron, Dan->Andrew) - *
  • graph_travelers: Social network with friends connections + *
  • graph_travelers: Travelers with nearest airport (Dev->JFK, Eliot->JFK, Jeff->BOS) *
  • graph_airports: Airport connections (JFK, BOS, ORD, PWM, LHR) *
* @@ -166,186 +166,169 @@ public void testEmployeeHierarchyForSpecificEmployee() throws IOException { verifyDataRows(result, rows("Dev", "Eliot", 1, List.of("{Eliot, Ron, 2}"))); } - // ==================== Social Network (Travelers) Tests ==================== + // ==================== Airport Connections Tests ==================== /** - * Test 5: Find all friends (direct and indirect) for travelers. Note: Currently returns empty - * socialNetwork arrays because the friends field is an array type, which the current + * Test 5: Find all reachable airports from each airport. Note: Currently returns empty + * reachableAirports arrays because the connects field is an array type, which the current * implementation doesn't fully traverse. */ @Test - public void testTravelersFriendsNetwork() throws IOException { + public void testAirportConnections() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" + " | graphLookup %s" - + " startField=friends" - + " fromField=friends" - + " toField=name" - + " as socialNetwork", - TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + " startField=connects" + + " fromField=connects" + + " toField=airport" + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("name", "string"), - schema("hobbies", "string"), - schema("friends", "string"), - schema("socialNetwork", "array")); + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); verifyDataRows( result, - rows( - "Tanya Jordan", - List.of("tennis", "reading"), - List.of("Shirley Soto", "Terry Hawkins"), - Collections.emptyList()), - rows( - "Shirley Soto", - List.of("golf", "reading"), - List.of("Tanya Jordan", "Terry Hawkins"), - Collections.emptyList()), - rows( - "Terry Hawkins", - List.of("tennis", "golf"), - List.of("Tanya Jordan", "Shirley Soto"), - Collections.emptyList()), - rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); + rows("JFK", List.of("BOS", "ORD"), Collections.emptyList()), + rows("BOS", List.of("JFK", "PWM"), Collections.emptyList()), + rows("ORD", List.of("JFK"), Collections.emptyList()), + rows("PWM", List.of("BOS", "LHR"), Collections.emptyList()), + rows("LHR", List.of("PWM"), Collections.emptyList())); } - /** Test 6: Brad Green's friends network with maxDepth=1. */ + /** Test 6: Find airports reachable from JFK within maxDepth=1. */ @Test - public void testTravelersFriendsWithMaxDepth() throws IOException { + public void testAirportConnectionsWithMaxDepth() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" - + " | where name = 'Brad Green'" + + " | where airport = 'JFK'" + " | graphLookup %s" - + " startField=friends" - + " fromField=friends" - + " toField=name" + + " startField=connects" + + " fromField=connects" + + " toField=airport" + " maxDepth=1" - + " as socialNetwork", - TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("name", "string"), - schema("hobbies", "string"), - schema("friends", "string"), - schema("socialNetwork", "array")); - verifyDataRows( - result, - rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); + verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); } - /** Test 7: Find friends network with depth tracking. */ + /** Test 7: Find airports with hop count tracked. */ @Test - public void testTravelersFriendsWithDepthField() throws IOException { + public void testAirportConnectionsWithDepthField() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" - + " | where name = 'Brad Green'" + + " | where airport = 'JFK'" + " | graphLookup %s" - + " startField=friends" - + " fromField=friends" - + " toField=name" - + " depthField=connectionLevel" - + " as socialNetwork", - TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_TRAVELERS)); + + " startField=connects" + + " fromField=connects" + + " toField=airport" + + " depthField=numConnections" + + " as reachableAirports", + TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("name", "string"), - schema("hobbies", "string"), - schema("friends", "string"), - schema("socialNetwork", "array")); - verifyDataRows( - result, - rows("Brad Green", List.of("reading"), List.of("Shirley Soto"), Collections.emptyList())); + schema("airport", "string"), + schema("connects", "string"), + schema("reachableAirports", "array")); + verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); } - // ==================== Airport Connections Tests ==================== - /** - * Test 8: Find all reachable airports from each airport. Note: Currently returns empty - * reachableAirports arrays because the connects field is an array type, which the current - * implementation doesn't fully traverse. + * Test 8: Find reachable airports for all travelers. Uses travelers as source and airports as + * lookup table, with nearestAirport as the starting point for graph traversal. */ @Test - public void testAirportConnections() throws IOException { + public void testTravelersReachableAirports() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" + " | graphLookup %s" - + " startField=connects" + + " startField=nearestAirport" + " fromField=connects" + " toField=airport" + " as reachableAirports", - TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("airport", "string"), - schema("connects", "string"), + schema("name", "string"), + schema("nearestAirport", "string"), schema("reachableAirports", "array")); verifyDataRows( result, - rows("JFK", List.of("BOS", "ORD"), Collections.emptyList()), - rows("BOS", List.of("JFK", "PWM"), Collections.emptyList()), - rows("ORD", List.of("JFK"), Collections.emptyList()), - rows("PWM", List.of("BOS", "LHR"), Collections.emptyList()), - rows("LHR", List.of("PWM"), Collections.emptyList())); + rows("Dev", "JFK", List.of("{JFK, [BOS, ORD]}")), + rows("Eliot", "JFK", List.of("{JFK, [BOS, ORD]}")), + rows("Jeff", "BOS", List.of("{BOS, [JFK, PWM]}"))); } - /** Test 9: Find airports reachable from JFK within maxDepth=1. */ + /** + * Test 9: Find reachable airports for a specific traveler (Dev at JFK) with depth tracking. + * Traverses from JFK through connected airports. + */ @Test - public void testAirportConnectionsWithMaxDepth() throws IOException { + public void testTravelerReachableAirportsWithDepthField() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" - + " | where airport = 'JFK'" + + " | where name = 'Dev'" + " | graphLookup %s" - + " startField=connects" + + " startField=nearestAirport" + " fromField=connects" + " toField=airport" - + " maxDepth=1" + + " depthField=hops" + " as reachableAirports", - TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("airport", "string"), - schema("connects", "string"), + schema("name", "string"), + schema("nearestAirport", "string"), schema("reachableAirports", "array")); - verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); + verifyDataRows(result, rows("Dev", "JFK", List.of("{JFK, [BOS, ORD], 0}"))); } - /** Test 10: Find airports with hop count tracked. */ + /** + * Test 10: Find reachable airports for Jeff (at BOS) with maxDepth=1. Finds BOS record as the + * starting point and traverses one level to connected airports. + */ @Test - public void testAirportConnectionsWithDepthField() throws IOException { + public void testTravelerReachableAirportsWithMaxDepth() throws IOException { JSONObject result = executeQuery( String.format( "source=%s" - + " | where airport = 'JFK'" + + " | where name = 'Jeff'" + " | graphLookup %s" - + " startField=connects" + + " startField=nearestAirport" + " fromField=connects" + " toField=airport" - + " depthField=numConnections" + + " maxDepth=1" + " as reachableAirports", - TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_AIRPORTS)); verifySchema( result, - schema("airport", "string"), - schema("connects", "string"), + schema("name", "string"), + schema("nearestAirport", "string"), schema("reachableAirports", "array")); - verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); + verifyDataRows(result, rows("Jeff", "BOS", List.of("{BOS, [JFK, PWM]}"))); } // ==================== Bidirectional Traversal Tests ==================== diff --git a/integ-test/src/test/resources/graph_travelers.json b/integ-test/src/test/resources/graph_travelers.json index da4f4ec42f7..eb11d2206cc 100644 --- a/integ-test/src/test/resources/graph_travelers.json +++ b/integ-test/src/test/resources/graph_travelers.json @@ -1,8 +1,6 @@ {"index":{"_id":"1"}} -{"name":"Tanya Jordan","friends":["Shirley Soto","Terry Hawkins"],"hobbies":["tennis","reading"]} +{"name":"Dev","nearestAirport":"JFK"} {"index":{"_id":"2"}} -{"name":"Shirley Soto","friends":["Tanya Jordan","Terry Hawkins"],"hobbies":["golf","reading"]} +{"name":"Eliot","nearestAirport":"JFK"} {"index":{"_id":"3"}} -{"name":"Terry Hawkins","friends":["Tanya Jordan","Shirley Soto"],"hobbies":["tennis","golf"]} -{"index":{"_id":"4"}} -{"name":"Brad Green","friends":["Shirley Soto"],"hobbies":["reading"]} +{"name":"Jeff","nearestAirport":"BOS"} diff --git a/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json index de430eb8220..f4697dead12 100644 --- a/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json +++ b/integ-test/src/test/resources/indexDefinitions/graph_travelers_index_mapping.json @@ -4,10 +4,7 @@ "name": { "type": "keyword" }, - "friends": { - "type": "keyword" - }, - "hobbies": { + "nearestAirport": { "type": "keyword" } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 7664638b6da..d7b9d04116f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -273,6 +273,7 @@ private List performBfs(Object startValue) { for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); Object nextValue = rowArray[fromFieldIdx]; + // Note that nextValue may be a list if (graphLookup.bidirectional && visited.contains(nextValue)) { nextValue = rowArray[toFieldIdx]; } From 906efc568f217c0b5465747b34520014b3647ee1 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 14:16:02 +0800 Subject: [PATCH 11/23] Support value of list; Support retrieve circle edges also Signed-off-by: Heng Qian --- .../sql/calcite/CalciteRelNodeVisitor.java | 4 +- .../remote/CalcitePPLGraphLookupIT.java | 46 +++++++----- .../scan/CalciteEnumerableGraphLookup.java | 72 ++++++++++++++----- .../calcite/CalcitePPLGraphLookupTest.java | 6 +- 4 files changed, 87 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 758fc4e3830..29f7be888b4 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2594,8 +2594,8 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { boolean bidirectional = node.getDirection() == Direction.BI; RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(node.getMaxDepth(), context); - int maxDepthValue = maxDepthNode.getValueAs(Integer.class); - maxDepthValue = maxDepthValue <= 0 ? -1 : maxDepthValue; + Integer maxDepthValue = maxDepthNode.getValueAs(Integer.class); + maxDepthValue = maxDepthValue == null ? 0 : maxDepthValue; // 3. Visit and materialize lookup table analyze(node.getFromTable(), context); diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index b6c392a71e1..53c1ed91f17 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -168,11 +168,7 @@ public void testEmployeeHierarchyForSpecificEmployee() throws IOException { // ==================== Airport Connections Tests ==================== - /** - * Test 5: Find all reachable airports from each airport. Note: Currently returns empty - * reachableAirports arrays because the connects field is an array type, which the current - * implementation doesn't fully traverse. - */ + /** Test 5: Find all reachable airports from each airport. */ @Test public void testAirportConnections() throws IOException { JSONObject result = @@ -180,7 +176,7 @@ public void testAirportConnections() throws IOException { String.format( "source=%s" + " | graphLookup %s" - + " startField=connects" + + " startField=airport" + " fromField=connects" + " toField=airport" + " as reachableAirports", @@ -193,11 +189,11 @@ public void testAirportConnections() throws IOException { schema("reachableAirports", "array")); verifyDataRows( result, - rows("JFK", List.of("BOS", "ORD"), Collections.emptyList()), - rows("BOS", List.of("JFK", "PWM"), Collections.emptyList()), - rows("ORD", List.of("JFK"), Collections.emptyList()), - rows("PWM", List.of("BOS", "LHR"), Collections.emptyList()), - rows("LHR", List.of("PWM"), Collections.emptyList())); + rows("JFK", List.of("BOS", "ORD"), List.of("{JFK, [BOS, ORD]}")), + rows("BOS", List.of("JFK", "PWM"), List.of("{BOS, [JFK, PWM]}")), + rows("ORD", List.of("JFK"), List.of("{ORD, [JFK]}")), + rows("PWM", List.of("BOS", "LHR"), List.of("{PWM, [BOS, LHR]}")), + rows("LHR", List.of("PWM"), List.of("{LHR, [PWM]}"))); } /** Test 6: Find airports reachable from JFK within maxDepth=1. */ @@ -209,7 +205,7 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { "source=%s" + " | where airport = 'JFK'" + " | graphLookup %s" - + " startField=connects" + + " startField=airport" + " fromField=connects" + " toField=airport" + " maxDepth=1" @@ -221,10 +217,15 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { schema("airport", "string"), schema("connects", "string"), schema("reachableAirports", "array")); - verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); + verifyDataRows( + result, + rows( + "JFK", + List.of("BOS", "ORD"), + List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}", "{ORD, [JFK]}"))); } - /** Test 7: Find airports with hop count tracked. */ + /** Test 7: Find airports with default depth(=0) and start value of list */ @Test public void testAirportConnectionsWithDepthField() throws IOException { JSONObject result = @@ -239,13 +240,14 @@ public void testAirportConnectionsWithDepthField() throws IOException { + " depthField=numConnections" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); - verifySchema( result, schema("airport", "string"), schema("connects", "string"), schema("reachableAirports", "array")); - verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), Collections.emptyList())); + verifyDataRows( + result, + rows("JFK", List.of("BOS", "ORD"), List.of("{BOS, [JFK, PWM], 0}", "{ORD, [JFK], 0}"))); } /** @@ -328,7 +330,10 @@ public void testTravelerReachableAirportsWithMaxDepth() throws IOException { schema("name", "string"), schema("nearestAirport", "string"), schema("reachableAirports", "array")); - verifyDataRows(result, rows("Jeff", "BOS", List.of("{BOS, [JFK, PWM]}"))); + verifyDataRows( + result, + rows( + "Jeff", "BOS", List.of("{BOS, [JFK, PWM]}", "{JFK, [BOS, ORD]}", "{PWM, [BOS, LHR]}"))); } // ==================== Bidirectional Traversal Tests ==================== @@ -388,7 +393,12 @@ public void testBidirectionalAirportConnections() throws IOException { schema("airport", "string"), schema("connects", "string"), schema("allConnections", "array")); - verifyDataRows(result, rows("ORD", List.of("JFK"), Collections.emptyList())); + verifyDataRows( + result, + rows( + "ORD", + List.of("JFK"), + List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}", "{ORD, [JFK]}"))); } // ==================== Edge Cases ==================== diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index d7b9d04116f..2ba2e2d8745 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -249,8 +249,16 @@ private List performBfs(Object startValue) { Queue queue = new ArrayDeque<>(); // Initialize BFS with start value - queue.offer(startValue); - visited.add(startValue); + if (startValue instanceof List list) { + list.forEach( + value -> { + queue.offer(value); + visited.add(value); + }); + } else { + queue.offer(startValue); + visited.add(startValue); + } int currentLevelDepth = 0; while (!queue.isEmpty()) { @@ -272,24 +280,32 @@ private List performBfs(Object startValue) { for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); - Object nextValue = rowArray[fromFieldIdx]; - // Note that nextValue may be a list - if (graphLookup.bidirectional && visited.contains(nextValue)) { - nextValue = rowArray[toFieldIdx]; + Object fromValue = rowArray[fromFieldIdx]; + // Collect next values to traverse (may be single value or list) + // For forward traversal: extract fromField values for next level + // For bidirectional: also extract toField values + List nextValues = new ArrayList<>(); + collectValues(fromValue, nextValues); + if (graphLookup.bidirectional) { + Object toValue = rowArray[toFieldIdx]; + collectValues(toValue, nextValues); } - if (!visited.contains(nextValue)) { - if (graphLookup.depthField != null) { - Object[] rowWithDepth = new Object[rowArray.length + 1]; - System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = currentLevelDepth; - results.add(rowWithDepth); - } else { - results.add(rowArray); - } - if (nextValue != null) { - visited.add(nextValue); - queue.offer(nextValue); + // Add row to results (all matched rows should be included) + if (graphLookup.depthField != null) { + Object[] rowWithDepth = new Object[rowArray.length + 1]; + System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); + rowWithDepth[rowArray.length] = currentLevelDepth; + results.add(rowWithDepth); + } else { + results.add(rowArray); + } + + // Add unvisited values to queue for next level traversal + for (Object val : nextValues) { + if (val != null && !visited.contains(val)) { + visited.add(val); + queue.offer(val); } } } @@ -311,11 +327,14 @@ private List queryLookupTable(List values) { return List.of(); } + // Forward direction: query toField = values to find nodes matching current values + // Then extract fromField values for next level traversal NamedFieldExpression toFieldExpression = new NamedFieldExpression( toFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); QueryBuilder query = termsQuery(toFieldExpression.getReferenceForTermQuery(), values); if (graphLookup.bidirectional) { + // Also query fromField for bidirectional traversal NamedFieldExpression fromFieldExpression = new NamedFieldExpression( fromFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); @@ -339,6 +358,23 @@ private List queryLookupTable(List values) { return results; } + /** + * Collects values from a field that may be a single value or a list. + * + * @param value The field value (may be single value or List) + * @param collector The list to collect values into + */ + private void collectValues(Object value, List collector) { + if (value == null) { + return; + } + if (value instanceof List list) { + collector.addAll(list); + } else { + collector.add(value); + } + } + @Override public void reset() { sourceEnumerator.reset(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index d0f6c5169a3..d83c03af8d5 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -49,7 +49,7 @@ public void testGraphLookupBasic() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," - + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[0]," + " bidirectional=[false])\n" + " LogicalSort(fetch=[100])\n" + " LogicalTableScan(table=[[scott, employee]])\n" @@ -68,7 +68,7 @@ public void testGraphLookupWithDepthField() { String expectedLogical = "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," + " outputField=[reportingHierarchy], depthField=[Field(field=level, fieldArgs=[])]," - + " maxDepth=[-1], bidirectional=[false])\n" + + " maxDepth=[0], bidirectional=[false])\n" + " LogicalSort(fetch=[100])\n" + " LogicalTableScan(table=[[scott, employee]])\n" + " LogicalTableScan(table=[[scott, employee]])\n"; @@ -103,7 +103,7 @@ public void testGraphLookupBidirectional() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," - + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[-1]," + + " outputField=[reportingHierarchy], depthField=[null], maxDepth=[0]," + " bidirectional=[true])\n" + " LogicalSort(fetch=[100])\n" + " LogicalTableScan(table=[[scott, employee]])\n" From 845b45099685e329d3277fe1148b60fd2a07dcd9 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 14:42:22 +0800 Subject: [PATCH 12/23] Add documentation for graph lookup Signed-off-by: Heng Qian --- docs/user/ppl/cmd/graphlookup.md | 257 +++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 docs/user/ppl/cmd/graphlookup.md diff --git a/docs/user/ppl/cmd/graphlookup.md b/docs/user/ppl/cmd/graphlookup.md new file mode 100644 index 00000000000..495503bb520 --- /dev/null +++ b/docs/user/ppl/cmd/graphlookup.md @@ -0,0 +1,257 @@ + +# graphLookup + +The `graphLookup` command performs recursive graph traversal on a collection using a breadth-first search (BFS) algorithm. It searches for documents matching a start value and recursively traverses connections between documents based on specified fields. This is useful for hierarchical data like organizational charts, social networks, or routing graphs. + +## Syntax + +The `graphLookup` command has the following syntax: + +```syntax +graphLookup startField= fromField= toField= [maxDepth=] [depthField=] [direction=(uni | bi)] as +``` + +The following are examples of the `graphLookup` command syntax: + +```syntax +source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name as reportingHierarchy +source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name maxDepth=2 as reportingHierarchy +source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name depthField=level as reportingHierarchy +source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name direction=bi as connections +source = travelers | graphLookup airports startField=nearestAirport fromField=connects toField=airport as reachableAirports +``` + +## Parameters + +The `graphLookup` command supports the following parameters. + +| Parameter | Required/Optional | Description | +| --- | --- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `` | Required | The name of the index to perform the graph traversal on. Can be the same as the source index for self-referential graphs. | +| `startField=` | Required | The field in the source documents whose value is used to start the recursive search. The value of this field is matched against `toField` in the lookup index. We support both single value and array values as starting points. | +| `fromField=` | Required | The field in the lookup index documents that contains the value to recurse on. After matching a document, the value of this field is used to find the next set of documents. It supports both single value and array values. | +| `toField=` | Required | The field in the lookup index documents to match against. Documents where `toField` equals the current traversal value are included in the results. | +| `maxDepth=` | Optional | The maximum recursion depth of hops. Default is `0`. A value of `0` means only the direct connections to the statr values are returned. A value of `1` means 1 hop connections (initial match plus one recursive step), and so on. | +| `depthField=` | Optional | The name of the field to add to each traversed document indicating its recursion depth. If not specified, no depth field is added. Depth starts at `0` for the first level of matches. | +| `direction=(uni \| bi)` | Optional | The traversal direction. `uni` (default) performs unidirectional traversal following edges in the forward direction only. `bi` performs bidirectional traversal, following edges in both directions. | +| `as ` | Required | The name of the output array field that will contain all documents found during the graph traversal. | + +## How It Works + +The `graphLookup` command performs a breadth-first search (BFS) traversal: + +1. For each source document, extract the value of `startField` +2. Query the lookup index to find documents where `toField` matches the start value +3. Add matched documents to the result array +4. Extract `fromField` values from matched documents to continue traversal +5. Repeat steps 2-4 until no new documents are found or `maxDepth` is reached + +For bidirectional traversal (`direction=bi`), the algorithm also follows edges in the reverse direction by additionally matching `fromField` values. + +## Example 1: Employee Hierarchy Traversal + +Given an `employees` index with the following documents: + +| id | name | reportsTo | +|----|------|-----------| +| 1 | Dev | Eliot | +| 2 | Eliot | Ron | +| 3 | Ron | Andrew | +| 4 | Andrew | null | +| 5 | Asya | Ron | +| 6 | Dan | Andrew | + +The following query finds the reporting chain for each employee: + +```ppl ignore +source = employees + | graphLookup employees + startField=reportsTo + fromField=reportsTo + toField=name + as reportingHierarchy +``` + +The query returns the following results: + +```text ++--------+----------+----+---------------------+ +| name | reportsTo| id | reportingHierarchy | ++--------+----------+----+---------------------+ +| Dev | Eliot | 1 | [{Eliot, Ron, 2}] | +| Eliot | Ron | 2 | [{Ron, Andrew, 3}] | +| Ron | Andrew | 3 | [{Andrew, null, 4}] | +| Andrew | null | 4 | [] | +| Asya | Ron | 5 | [{Ron, Andrew, 3}] | +| Dan | Andrew | 6 | [{Andrew, null, 4}] | ++--------+----------+----+---------------------+ +``` + +For Dev, the traversal starts with `reportsTo="Eliot"`, finds the Eliot record, and returns it in the `reportingHierarchy` array. + +## Example 2: Employee Hierarchy with Depth Tracking + +The following query adds a depth field to track how many levels each manager is from the employee: + +```ppl ignore +source = employees + | graphLookup employees + startField=reportsTo + fromField=reportsTo + toField=name + depthField=level + as reportingHierarchy +``` + +The query returns the following results: + +```text ++--------+----------+----+------------------------+ +| name | reportsTo| id | reportingHierarchy | ++--------+----------+----+------------------------+ +| Dev | Eliot | 1 | [{Eliot, Ron, 2, 0}] | +| Eliot | Ron | 2 | [{Ron, Andrew, 3, 0}] | +| Ron | Andrew | 3 | [{Andrew, null, 4, 0}] | +| Andrew | null | 4 | [] | +| Asya | Ron | 5 | [{Ron, Andrew, 3, 0}] | +| Dan | Andrew | 6 | [{Andrew, null, 4, 0}] | ++--------+----------+----+------------------------+ +``` + +The depth field `level` is appended to each document in the result array. A value of `0` indicates the first level of matches. + +## Example 3: Limited Depth Traversal + +The following query limits traversal to 2 levels using `maxDepth=1`: + +```ppl ignore +source = employees + | graphLookup employees + startField=reportsTo + fromField=reportsTo + toField=name + maxDepth=1 + as reportingHierarchy +``` + +The query returns the following results: + +```text ++--------+----------+----+--------------------------------------+ +| name | reportsTo| id | reportingHierarchy | ++--------+----------+----+--------------------------------------+ +| Dev | Eliot | 1 | [{Eliot, Ron, 2}, {Ron, Andrew, 3}] | +| Eliot | Ron | 2 | [{Ron, Andrew, 3}, {Andrew, null, 4}]| +| Ron | Andrew | 3 | [{Andrew, null, 4}] | +| Andrew | null | 4 | [] | +| Asya | Ron | 5 | [{Ron, Andrew, 3}, {Andrew, null, 4}]| +| Dan | Andrew | 6 | [{Andrew, null, 4}] | ++--------+----------+----+--------------------------------------+ +``` + +With `maxDepth=1`, the traversal goes two levels deep (depth 0 and depth 1). + +## Example 4: Airport Connections Graph + +Given an `airports` index with the following documents: + +| airport | connects | +|---------|----------| +| JFK | [BOS, ORD] | +| BOS | [JFK, PWM] | +| ORD | [JFK] | +| PWM | [BOS, LHR] | +| LHR | [PWM] | + +The following query finds reachable airports from each airport: + +```ppl ignore +source = airports + | graphLookup airports + startField=airport + fromField=connects + toField=airport + as reachableAirports +``` + +The query returns the following results: + +```text ++---------+------------+---------------------+ +| airport | connects | reachableAirports | ++---------+------------+---------------------+ +| JFK | [BOS, ORD] | [{JFK, [BOS, ORD]}] | +| BOS | [JFK, PWM] | [{BOS, [JFK, PWM]}] | +| ORD | [JFK] | [{ORD, [JFK]}] | +| PWM | [BOS, LHR] | [{PWM, [BOS, LHR]}] | +| LHR | [PWM] | [{LHR, [PWM]}] | ++---------+------------+---------------------+ +``` + +## Example 5: Cross-Index Graph Lookup + +The `graphLookup` command can use different source and lookup indexes. Given a `travelers` index: + +| name | nearestAirport | +|------|----------------| +| Dev | JFK | +| Eliot | JFK | +| Jeff | BOS | + +The following query finds reachable airports for each traveler: + +```ppl ignore +source = travelers + | graphLookup airports + startField=nearestAirport + fromField=connects + toField=airport + as reachableAirports +``` + +The query returns the following results: + +```text ++-------+----------------+---------------------+ +| name | nearestAirport | reachableAirports | ++-------+----------------+---------------------+ +| Dev | JFK | [{JFK, [BOS, ORD]}] | +| Eliot | JFK | [{JFK, [BOS, ORD]}] | +| Jeff | BOS | [{BOS, [JFK, PWM]}] | ++-------+----------------+---------------------+ +``` + +## Example 6: Bidirectional Traversal + +The following query performs bidirectional traversal to find both managers and colleagues who share the same manager: + +```ppl ignore +source = employees + | where name = 'Ron' + | graphLookup employees + startField=reportsTo + fromField=reportsTo + toField=name + direction=bi + as connections +``` + +The query returns the following results: + +```text ++------+----------+----+------------------------------------------------+ +| name | reportsTo| id | connections | ++------+----------+----+------------------------------------------------+ +| Ron | Andrew | 3 | [{Ron, Andrew, 3}, {Andrew, null, 4}, {Dan, Andrew, 6}] | ++------+----------+----+------------------------------------------------+ +``` + +With bidirectional traversal, Ron's connections include: +- His own record (Ron reports to Andrew) +- His manager (Andrew) +- His peer (Dan, who also reports to Andrew) + +## Limitations + +- The source input, which provides the starting point for the traversal, has a limitation of 100 documents to avoid performance issues. +- To avoid PIT (Point in Time) search, each level of traversal search returns documents up to the "max result windows" of the lookup index. From 857044ef42c828cfd638cca24228aaec8ee6c764 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 15:03:34 +0800 Subject: [PATCH 13/23] Don't include loop edges Signed-off-by: Heng Qian --- .../remote/CalcitePPLGraphLookupIT.java | 6 +- .../scan/CalciteEnumerableGraphLookup.java | 58 ++++++++++--------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index 53c1ed91f17..9387d10553a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -222,7 +222,7 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { rows( "JFK", List.of("BOS", "ORD"), - List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}", "{ORD, [JFK]}"))); + List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); } /** Test 7: Find airports with default depth(=0) and start value of list */ @@ -247,7 +247,7 @@ public void testAirportConnectionsWithDepthField() throws IOException { schema("reachableAirports", "array")); verifyDataRows( result, - rows("JFK", List.of("BOS", "ORD"), List.of("{BOS, [JFK, PWM], 0}", "{ORD, [JFK], 0}"))); + rows("JFK", List.of("BOS", "ORD"), List.of("{BOS, [JFK, PWM], 0}"))); } /** @@ -398,7 +398,7 @@ public void testBidirectionalAirportConnections() throws IOException { rows( "ORD", List.of("JFK"), - List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}", "{ORD, [JFK]}"))); + List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); } // ==================== Edge Cases ==================== diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 2ba2e2d8745..b08fa0cfe48 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -245,19 +245,20 @@ private List performBfs(Object startValue) { // TODO: support spillable for these collections List results = new ArrayList<>(); - Set visited = new HashSet<>(); + // TODO: If we want to include loop edges, we also need to track the visited edges + Set visitedNodes = new HashSet<>(); Queue queue = new ArrayDeque<>(); // Initialize BFS with start value if (startValue instanceof List list) { list.forEach( value -> { + visitedNodes.add(value); queue.offer(value); - visited.add(value); }); } else { + visitedNodes.add(startValue); queue.offer(startValue); - visited.add(startValue); } int currentLevelDepth = 0; @@ -283,29 +284,32 @@ private List performBfs(Object startValue) { Object fromValue = rowArray[fromFieldIdx]; // Collect next values to traverse (may be single value or list) // For forward traversal: extract fromField values for next level - // For bidirectional: also extract toField values + // For bidirectional: also extract toField values. + // Skip visited values while keep null value List nextValues = new ArrayList<>(); - collectValues(fromValue, nextValues); + collectValues(fromValue, nextValues, visitedNodes); if (graphLookup.bidirectional) { Object toValue = rowArray[toFieldIdx]; - collectValues(toValue, nextValues); + collectValues(toValue, nextValues, visitedNodes); } - // Add row to results (all matched rows should be included) - if (graphLookup.depthField != null) { - Object[] rowWithDepth = new Object[rowArray.length + 1]; - System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = currentLevelDepth; - results.add(rowWithDepth); - } else { - results.add(rowArray); - } + // Add row to results if the nextValues is not empty + if (!nextValues.isEmpty()) { + if (graphLookup.depthField != null) { + Object[] rowWithDepth = new Object[rowArray.length + 1]; + System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); + rowWithDepth[rowArray.length] = currentLevelDepth; + results.add(rowWithDepth); + } else { + results.add(rowArray); + } - // Add unvisited values to queue for next level traversal - for (Object val : nextValues) { - if (val != null && !visited.contains(val)) { - visited.add(val); - queue.offer(val); + // Add unvisited non-null values to queue for next level traversal + for (Object val : nextValues) { + if (val != null) { + visitedNodes.add(val); + queue.offer(val); + } } } } @@ -363,14 +367,16 @@ private List queryLookupTable(List values) { * * @param value The field value (may be single value or List) * @param collector The list to collect values into + * @param visited Previously visited values to avoid duplicates */ - private void collectValues(Object value, List collector) { - if (value == null) { - return; - } + private void collectValues(Object value, List collector, Set visited) { if (value instanceof List list) { - collector.addAll(list); - } else { + for (Object item : list) { + if (!visited.contains(item)) { + collector.add(item); + } + } + } else if (!visited.contains(value)) { collector.add(value); } } From b61e020efbd35f3f96799cced20c2bdd785f0f62 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Wed, 4 Feb 2026 15:34:45 +0800 Subject: [PATCH 14/23] Refine code Signed-off-by: Heng Qian --- ppl/src/main/antlr/OpenSearchPPLParser.g4 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index b92bb466a6d..8559e59d1b4 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -1691,11 +1691,11 @@ searchableKeyWord | ROW | COL | COLUMN_NAME - | CONNECT_FROM_FIELD - | CONNECT_TO_FIELD + | FROM_FIELD + | TO_FIELD | MAX_DEPTH | DEPTH_FIELD | DIRECTION | UNI - | BIO + | BI ; From fabbb543dee11aa2eed55eee179339d89d7641c3 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Thu, 5 Feb 2026 11:31:59 +0800 Subject: [PATCH 15/23] spotlessApply Signed-off-by: Heng Qian --- .../calcite/remote/CalcitePPLGraphLookupIT.java | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index 9387d10553a..14251014fe9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -219,10 +219,7 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { schema("reachableAirports", "array")); verifyDataRows( result, - rows( - "JFK", - List.of("BOS", "ORD"), - List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); + rows("JFK", List.of("BOS", "ORD"), List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); } /** Test 7: Find airports with default depth(=0) and start value of list */ @@ -245,9 +242,7 @@ public void testAirportConnectionsWithDepthField() throws IOException { schema("airport", "string"), schema("connects", "string"), schema("reachableAirports", "array")); - verifyDataRows( - result, - rows("JFK", List.of("BOS", "ORD"), List.of("{BOS, [JFK, PWM], 0}"))); + verifyDataRows(result, rows("JFK", List.of("BOS", "ORD"), List.of("{BOS, [JFK, PWM], 0}"))); } /** @@ -394,11 +389,7 @@ public void testBidirectionalAirportConnections() throws IOException { schema("connects", "string"), schema("allConnections", "array")); verifyDataRows( - result, - rows( - "ORD", - List.of("JFK"), - List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); + result, rows("ORD", List.of("JFK"), List.of("{JFK, [BOS, ORD]}", "{BOS, [JFK, PWM]}"))); } // ==================== Edge Cases ==================== From 84199b8e854c663b501410d4ab08904401bc5d31 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Thu, 5 Feb 2026 17:06:14 +0800 Subject: [PATCH 16/23] Refine code Signed-off-by: Heng Qian --- core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index c4ea9428ab5..4754c777e21 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -68,7 +68,7 @@ public enum Direction { private UnresolvedPlan child; public String getDepthFieldName() { - return depthField == null ? null : depthField.toString(); + return depthField == null ? null : depthField.getField().toString(); } @Override From fcda2dfe4f3fe6d0c547a28634df5ad7cf46104e Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 13:35:48 +0800 Subject: [PATCH 17/23] Filter visited nodes in search query Signed-off-by: Heng Qian --- .../scan/CalciteEnumerableGraphLookup.java | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index b08fa0cfe48..5e050cedae6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -5,10 +5,12 @@ package org.opensearch.sql.opensearch.storage.scan; +import static org.opensearch.index.query.QueryBuilders.boolQuery; import static org.opensearch.index.query.QueryBuilders.termsQuery; import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -277,7 +279,7 @@ private List performBfs(Object startValue) { // Query OpenSearch for all current level values // Forward direction: fromField = currentLevelValues - List forwardResults = queryLookupTable(currentLevelValues); + List forwardResults = queryLookupTable(currentLevelValues, visitedNodes); for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); @@ -324,28 +326,28 @@ private List performBfs(Object startValue) { * Queries the lookup table with a terms filter. * * @param values Values to match + * @param visitedValues Values to not match * @return List of matching rows */ - private List queryLookupTable(List values) { + private List queryLookupTable( + Collection values, Collection visitedValues) { if (values.isEmpty()) { return List.of(); } - // Forward direction: query toField = values to find nodes matching current values - // Then extract fromField values for next level traversal - NamedFieldExpression toFieldExpression = - new NamedFieldExpression( - toFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); - QueryBuilder query = termsQuery(toFieldExpression.getReferenceForTermQuery(), values); + // Forward direction + QueryBuilder query = + boolQuery() + .must(getQueryBuilder(toFieldIdx, values)) + .mustNot(getQueryBuilder(fromFieldIdx, visitedValues)); if (graphLookup.bidirectional) { // Also query fromField for bidirectional traversal - NamedFieldExpression fromFieldExpression = - new NamedFieldExpression( - fromFieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()); - query = - QueryBuilders.boolQuery() - .should(query) - .should(termsQuery(fromFieldExpression.getReferenceForTermQuery(), values)); + QueryBuilder backQuery = + boolQuery() + .must(getQueryBuilder(fromFieldIdx, values)) + .mustNot(getQueryBuilder(toFieldIdx, visitedValues)); + + query = QueryBuilders.boolQuery().should(query).should(backQuery); } CalciteEnumerableIndexScan newScan = (CalciteEnumerableIndexScan) this.lookupScan.copy(); QueryBuilder finalQuery = query; @@ -362,6 +364,20 @@ private List queryLookupTable(List values) { return results; } + /** + * Provides a query builder to search edges with the field matching values + * + * @param fieldIdx field index + * @param values values to match + * @return query builder + */ + private QueryBuilder getQueryBuilder(int fieldIdx, Collection values) { + String fieldName = + new NamedFieldExpression(fieldIdx, lookupFields, lookupScan.getOsIndex().getFieldTypes()) + .getReferenceForTermQuery(); + return termsQuery(fieldName, values); + } + /** * Collects values from a field that may be a single value or a list. * From 6430d6cbad925ccdd7ec2bb6077efbdb5a141acc Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 13:56:47 +0800 Subject: [PATCH 18/23] Fix UT Signed-off-by: Heng Qian --- .../opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java index d83c03af8d5..da169704925 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLGraphLookupTest.java @@ -67,7 +67,7 @@ public void testGraphLookupWithDepthField() { RelNode root = getRelNode(ppl); String expectedLogical = "LogicalGraphLookup(fromField=[reportsTo], toField=[name]," - + " outputField=[reportingHierarchy], depthField=[Field(field=level, fieldArgs=[])]," + + " outputField=[reportingHierarchy], depthField=[level]," + " maxDepth=[0], bidirectional=[false])\n" + " LogicalSort(fetch=[100])\n" + " LogicalTableScan(table=[[scott, employee]])\n" From bb9e5163540f2fa518b5bcd807ab99501c1c36bc Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 15:17:54 +0800 Subject: [PATCH 19/23] Add parameter supportArray for handling fields with array values Signed-off-by: Heng Qian --- .../opensearch/sql/ast/tree/GraphLookup.java | 3 ++ .../sql/calcite/CalciteRelNodeVisitor.java | 4 +- .../sql/calcite/plan/rel/GraphLookup.java | 10 ++++- .../calcite/plan/rel/LogicalGraphLookup.java | 19 +++++--- docs/user/ppl/cmd/graphlookup.md | 10 ++++- .../remote/CalcitePPLGraphLookupIT.java | 3 ++ .../rules/EnumerableGraphLookupRule.java | 3 +- .../scan/CalciteEnumerableGraphLookup.java | 44 +++++++++++++------ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../opensearch/sql/ppl/parser/AstBuilder.java | 6 +++ 11 files changed, 78 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index 4754c777e21..0d7c6d479f3 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -65,6 +65,9 @@ public enum Direction { /** Direction mode: UNI (default) or BIO for bidirectional. */ private final Direction direction; + /** Whether to support array-typed fields without early filter pushdown. */ + private final boolean supportArray; + private UnresolvedPlan child; public String getDepthFieldName() { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 29f7be888b4..8feff4f14b2 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2596,6 +2596,7 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { RexLiteral maxDepthNode = (RexLiteral) rexVisitor.analyze(node.getMaxDepth(), context); Integer maxDepthValue = maxDepthNode.getValueAs(Integer.class); maxDepthValue = maxDepthValue == null ? 0 : maxDepthValue; + boolean supportArray = node.isSupportArray(); // 3. Visit and materialize lookup table analyze(node.getFromTable(), context); @@ -2614,7 +2615,8 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { outputFieldName, depthFieldName, maxDepthValue, - bidirectional); + bidirectional, + supportArray); builder.push(graphLookup); return builder.peek(); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index f9df615abc8..0a70fc44b1d 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -48,6 +48,7 @@ public abstract class GraphLookup extends BiRel { // TODO: add limitation on the maxDepth and input rows count protected final int maxDepth; // -1 = unlimited protected final boolean bidirectional; + protected final boolean supportArray; private RelDataType outputRowType; @@ -65,6 +66,8 @@ public abstract class GraphLookup extends BiRel { * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions + * @param supportArray Whether to support array-typed fields (disables early visited filter + * pushdown) */ protected GraphLookup( RelOptCluster cluster, @@ -77,7 +80,8 @@ protected GraphLookup( String outputField, @Nullable String depthField, int maxDepth, - boolean bidirectional) { + boolean bidirectional, + boolean supportArray) { super(cluster, traitSet, source, lookup); this.startField = startField; this.fromField = fromField; @@ -86,6 +90,7 @@ protected GraphLookup( this.depthField = depthField; this.maxDepth = maxDepth; this.bidirectional = bidirectional; + this.supportArray = supportArray; } /** Returns the source table RelNode. */ @@ -142,6 +147,7 @@ public RelWriter explainTerms(RelWriter pw) { .item("outputField", outputField) .item("depthField", depthField) .item("maxDepth", maxDepth) - .item("bidirectional", bidirectional); + .item("bidirectional", bidirectional) + .itemIf("supportArray", supportArray, supportArray); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index e8d6cfa7bba..dd82755a0d1 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -34,6 +34,7 @@ public class LogicalGraphLookup extends GraphLookup { * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions + * @param supportArray Whether to support array-typed fields */ protected LogicalGraphLookup( RelOptCluster cluster, @@ -46,7 +47,8 @@ protected LogicalGraphLookup( String outputField, @Nullable String depthField, int maxDepth, - boolean bidirectional) { + boolean bidirectional, + boolean supportArray) { super( cluster, traitSet, @@ -58,7 +60,8 @@ protected LogicalGraphLookup( outputField, depthField, maxDepth, - bidirectional); + bidirectional, + supportArray); } /** @@ -70,9 +73,10 @@ protected LogicalGraphLookup( * @param fromField Field name for outgoing edges * @param toField Field name for incoming edges * @param outputField Name of the output array field + * @param depthField Named of the output depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions - * @param depthField Named of the output depth field + * @param supportArray Whether to support array-typed fields * @return A new LogicalGraphLookup instance */ public static LogicalGraphLookup create( @@ -84,7 +88,8 @@ public static LogicalGraphLookup create( String outputField, @Nullable String depthField, int maxDepth, - boolean bidirectional) { + boolean bidirectional, + boolean supportArray) { RelOptCluster cluster = source.getCluster(); RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); return new LogicalGraphLookup( @@ -98,7 +103,8 @@ public static LogicalGraphLookup create( outputField, depthField, maxDepth, - bidirectional); + bidirectional, + supportArray); } @Override @@ -114,6 +120,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { outputField, depthField, maxDepth, - bidirectional); + bidirectional, + supportArray); } } diff --git a/docs/user/ppl/cmd/graphlookup.md b/docs/user/ppl/cmd/graphlookup.md index 495503bb520..277896cfb44 100644 --- a/docs/user/ppl/cmd/graphlookup.md +++ b/docs/user/ppl/cmd/graphlookup.md @@ -8,7 +8,7 @@ The `graphLookup` command performs recursive graph traversal on a collection usi The `graphLookup` command has the following syntax: ```syntax -graphLookup startField= fromField= toField= [maxDepth=] [depthField=] [direction=(uni | bi)] as +graphLookup startField= fromField= toField= [maxDepth=] [depthField=] [direction=(uni | bi)] [supportArray=(true | false)] as ``` The following are examples of the `graphLookup` command syntax: @@ -18,7 +18,8 @@ source = employees | graphLookup employees startField=reportsTo fromField=report source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name maxDepth=2 as reportingHierarchy source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name depthField=level as reportingHierarchy source = employees | graphLookup employees startField=reportsTo fromField=reportsTo toField=name direction=bi as connections -source = travelers | graphLookup airports startField=nearestAirport fromField=connects toField=airport as reachableAirports +source = travelers | graphLookup airports startField=nearestAirport fromField=connects toField=airport supportArray=true as reachableAirports +source = airports | graphLookup airports startField=airport fromField=connects toField=airport supportArray=true as reachableAirports ``` ## Parameters @@ -34,6 +35,7 @@ The `graphLookup` command supports the following parameters. | `maxDepth=` | Optional | The maximum recursion depth of hops. Default is `0`. A value of `0` means only the direct connections to the statr values are returned. A value of `1` means 1 hop connections (initial match plus one recursive step), and so on. | | `depthField=` | Optional | The name of the field to add to each traversed document indicating its recursion depth. If not specified, no depth field is added. Depth starts at `0` for the first level of matches. | | `direction=(uni \| bi)` | Optional | The traversal direction. `uni` (default) performs unidirectional traversal following edges in the forward direction only. `bi` performs bidirectional traversal, following edges in both directions. | +| `supportArray=(true \| false)` | Optional | When `true`, disables early visited-node filter pushdown to OpenSearch. Default is `false`. Set to `true` when `fromField` or `toField` contains array values to ensure correct traversal behavior. See [Array Field Handling](#array-field-handling) for details. | | `as ` | Required | The name of the output array field that will contain all documents found during the graph traversal. | ## How It Works @@ -251,6 +253,10 @@ With bidirectional traversal, Ron's connections include: - His manager (Andrew) - His peer (Dan, who also reports to Andrew) +## Array Field Handling + +When the `fromField` or `toField` contains array values, you should set `supportArray=true` to ensure correct traversal behavior. + ## Limitations - The source input, which provides the starting point for the traversal, has a limitation of 100 documents to avoid performance issues. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index 14251014fe9..25377062872 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -179,6 +179,7 @@ public void testAirportConnections() throws IOException { + " startField=airport" + " fromField=connects" + " toField=airport" + + " supportArray=true" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -209,6 +210,7 @@ public void testAirportConnectionsWithMaxDepth() throws IOException { + " fromField=connects" + " toField=airport" + " maxDepth=1" + + " supportArray=true" + " as reachableAirports", TEST_INDEX_GRAPH_AIRPORTS, TEST_INDEX_GRAPH_AIRPORTS)); @@ -317,6 +319,7 @@ public void testTravelerReachableAirportsWithMaxDepth() throws IOException { + " fromField=connects" + " toField=airport" + " maxDepth=1" + + " supportArray=true" + " as reachableAirports", TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_AIRPORTS)); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java index c8da86bffe0..38332c6ae88 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java @@ -98,6 +98,7 @@ public RelNode convert(RelNode rel) { graphLookup.getOutputField(), graphLookup.getDepthField(), graphLookup.getMaxDepth(), - graphLookup.isBidirectional()); + graphLookup.isBidirectional(), + graphLookup.isSupportArray()); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 5e050cedae6..98cf0f8ebbc 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -68,6 +68,7 @@ public class CalciteEnumerableGraphLookup extends GraphLookup implements Enumera * @param depthField Name of the depth field * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions + * @param supportArray Whether to support array-typed fields */ public CalciteEnumerableGraphLookup( RelOptCluster cluster, @@ -80,7 +81,8 @@ public CalciteEnumerableGraphLookup( String outputField, String depthField, int maxDepth, - boolean bidirectional) { + boolean bidirectional, + boolean supportArray) { super( cluster, traitSet, @@ -92,7 +94,8 @@ public CalciteEnumerableGraphLookup( outputField, depthField, maxDepth, - bidirectional); + bidirectional, + supportArray); } @Override @@ -108,7 +111,8 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { outputField, depthField, maxDepth, - bidirectional); + bidirectional, + supportArray); } @Override @@ -326,7 +330,7 @@ private List performBfs(Object startValue) { * Queries the lookup table with a terms filter. * * @param values Values to match - * @param visitedValues Values to not match + * @param visitedValues Values to not match (ignored when supportArray is true) * @return List of matching rows */ private List queryLookupTable( @@ -335,18 +339,30 @@ private List queryLookupTable( return List.of(); } - // Forward direction - QueryBuilder query = - boolQuery() - .must(getQueryBuilder(toFieldIdx, values)) - .mustNot(getQueryBuilder(fromFieldIdx, visitedValues)); - if (graphLookup.bidirectional) { - // Also query fromField for bidirectional traversal - QueryBuilder backQuery = + // Forward direction query + QueryBuilder query; + if (graphLookup.supportArray) { + // When supportArray is true, don't push down visited filter + // because array fields may contain multiple values that need to be checked individually + query = getQueryBuilder(toFieldIdx, values); + } else { + query = boolQuery() - .must(getQueryBuilder(fromFieldIdx, values)) - .mustNot(getQueryBuilder(toFieldIdx, visitedValues)); + .must(getQueryBuilder(toFieldIdx, values)) + .mustNot(getQueryBuilder(fromFieldIdx, visitedValues)); + } + if (graphLookup.bidirectional) { + // Also query fromField for bidirectional traversal + QueryBuilder backQuery; + if (graphLookup.supportArray) { + backQuery = getQueryBuilder(fromFieldIdx, values); + } else { + backQuery = + boolQuery() + .must(getQueryBuilder(fromFieldIdx, values)) + .mustNot(getQueryBuilder(toFieldIdx, visitedValues)); + } query = QueryBuilders.boolQuery().should(query).should(backQuery); } CalciteEnumerableIndexScan newScan = (CalciteEnumerableIndexScan) this.lookupScan.copy(); diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index bb56016b53a..5f0031eb722 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -61,6 +61,7 @@ DEPTH_FIELD: 'DEPTHFIELD'; DIRECTION: 'DIRECTION'; UNI: 'UNI'; BI: 'BI'; +SUPPORT_ARRAY: 'SUPPORTARRAY'; ROW: 'ROW'; COL: 'COL'; EXPAND: 'EXPAND'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 8559e59d1b4..182fe30ef5f 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -638,6 +638,7 @@ graphLookupOption | (MAX_DEPTH EQUAL integerLiteral) | (DEPTH_FIELD EQUAL fieldExpression) | (DIRECTION EQUAL (UNI | BI)) + | (SUPPORT_ARRAY EQUAL booleanLiteral) ; // clauses diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index d505bcfbf4a..78ce37b6cc4 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -1496,6 +1496,7 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom Field startField = null; Field depthField = null; Direction direction = Direction.UNI; + boolean supportArray = false; for (OpenSearchPPLParser.GraphLookupOptionContext option : ctx.graphLookupOption()) { if (option.FROM_FIELD() != null) { @@ -1516,6 +1517,10 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom if (option.DIRECTION() != null) { direction = option.BI() != null ? Direction.BI : Direction.UNI; } + if (option.SUPPORT_ARRAY() != null) { + Literal literal = (Literal) internalVisitExpression(option.booleanLiteral()); + supportArray = Boolean.TRUE.equals(literal.getValue()); + } } Field as = (Field) internalVisitExpression(ctx.outputField); @@ -1533,6 +1538,7 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom .startField(startField) .depthField(depthField) .direction(direction) + .supportArray(supportArray) .build(); } } From 6af4d84742b2f3a84d001b3ad86d7ea6ae99fa2e Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 15:25:48 +0800 Subject: [PATCH 20/23] Remove unused code Signed-off-by: Heng Qian --- .../function/BuiltinFunctionName.java | 5 +- .../functions/GraphLookupBfsFunction.java | 80 ------ .../functions/GraphLookupFunction.java | 240 ------------------ 3 files changed, 1 insertion(+), 324 deletions(-) delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java delete mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index fa34041f703..37052ec858c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -351,10 +351,7 @@ public enum BuiltinFunctionName { INTERNAL_PARSE(FunctionName.of("parse"), true), INTERNAL_REGEXP_REPLACE_PG_4(FunctionName.of("regexp_replace_pg_4"), true), INTERNAL_REGEXP_REPLACE_5(FunctionName.of("regexp_replace_5"), true), - INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true), - - /** Graph functions */ - GRAPH_LOOKUP(FunctionName.of("graph_lookup"), true); + INTERNAL_TRANSLATE3(FunctionName.of("translate3"), true); private final FunctionName name; private boolean isInternal; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java deleted file mode 100644 index 1a3b1fa59cc..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupBfsFunction.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.functions; - -import java.util.List; -import org.apache.calcite.adapter.enumerable.NotNullImplementor; -import org.apache.calcite.adapter.enumerable.NullPolicy; -import org.apache.calcite.adapter.enumerable.RexToLixTranslator; -import org.apache.calcite.linq4j.tree.Expression; -import org.apache.calcite.linq4j.tree.Expressions; -import org.apache.calcite.linq4j.tree.Types; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexSubQuery; -import org.apache.calcite.sql.type.SqlReturnTypeInference; -import org.apache.calcite.sql.type.SqlTypeName; -import org.opensearch.sql.expression.function.ImplementorUDF; -import org.opensearch.sql.expression.function.UDFOperandMetadata; - -/** - * UDF wrapper for graph traversal BFS function. - * - *

Parameters: - * - *

    - *
  • startValue: Object - starting value for BFS - *
  • lookupData: List - collected rows from lookup table - *
  • connectFromIdx: int - index of connectFrom field - *
  • connectToIdx: int - index of connectTo field - *
  • maxDepth: int - max traversal depth (-1 = unlimited) - *
  • bidirectional: boolean - traverse both directions - *
  • includeDepth: boolean - include depth in output - *
- * - *

Returns: List - array of [row_fields..., depth?] - */ -public class GraphLookupBfsFunction extends ImplementorUDF { - - public GraphLookupBfsFunction() { - super(new GraphLookupBfsImplementor(), NullPolicy.ANY); - } - - @Override - public SqlReturnTypeInference getReturnTypeInference() { - // Return ARRAY - actual struct type depends on lookup table schema - return opBinding -> { - var typeFactory = opBinding.getTypeFactory(); - var anyType = typeFactory.createSqlType(SqlTypeName.ANY); - return typeFactory.createArrayType(anyType, -1); - }; - } - - @Override - public UDFOperandMetadata getOperandMetadata() { - return null; // Accept any operand types - } - - private static class GraphLookupBfsImplementor implements NotNullImplementor { - @Override - public Expression implement( - RexToLixTranslator translator, RexCall call, List translatedOperands) { - // Args: startValue, lookupData, connectFromIdx, connectToIdx, maxDepth, bidirectional, - // includeDepth - return Expressions.call( - Types.lookupMethod( - GraphLookupFunction.class, - "executeWithDynamicLookup", - Object.class, // startValue - RexSubQuery.class, // lookupData - int.class, // connectFromIdx - int.class, // connectToIdx - int.class, // maxDepth - boolean.class, // bidirectional - boolean.class), // includeDepth - translatedOperands); - } - } -} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java deleted file mode 100644 index 61a878bff62..00000000000 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/functions/GraphLookupFunction.java +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.sql.opensearch.functions; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Queue; -import java.util.Set; -import org.apache.calcite.rex.RexSubQuery; - -/** - * BFS-based graph traversal function for the graphLookup command. - * - *

This function performs breadth-first search traversal on a graph represented by rows in a - * lookup table. It follows edges from starting nodes and collects all reachable nodes up to a - * specified depth. - * - *

The algorithm is inspired by MongoDB's $graphLookup operator. - */ -public class GraphLookupFunction { - - /** Internal class to track nodes during BFS with their depth. */ - public record NodeWithDepth(Object value, int depth) { - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - NodeWithDepth that = (NodeWithDepth) o; - return Objects.equals(value, that.value); - } - - @Override - public int hashCode() { - return Objects.hash(value); - } - } - - /** Result of a single node traversal, including the row data and traversal depth. */ - public record TraversalResult(Object[] row, int depth) {} - - /** - * Execute BFS graph traversal starting from a given value. - * - * @param startValue The starting value to begin traversal from - * @param lookupTableRows All rows from the lookup table - * @param fromFieldIndex Index of the field that represents outgoing edges (the field values we - * traverse FROM) - * @param toFieldIndex Index of the field that represents the target to match against (the field - * values we traverse TO) - * @param maxDepth Maximum traversal depth (-1 or 0 for unlimited) - * @param bidirectional If true, traverse edges in both directions - * @return List of traversal results containing row data and depth - */ - public static List execute( - Object startValue, - List lookupTableRows, - int fromFieldIndex, - int toFieldIndex, - int maxDepth, - boolean bidirectional) { - - if (startValue == null || lookupTableRows == null || lookupTableRows.isEmpty()) { - return List.of(); - } - - // Build adjacency index: toField value -> list of rows with matching fromField - // This creates edges: when we're at a node with fromField=X, we can traverse to nodes - // where toField=X - Map> forwardAdjacency = new HashMap<>(); - - // For bidirectional: also index reverse edges - // fromField value -> list of rows with matching toField - Map> reverseAdjacency = bidirectional ? new HashMap<>() : null; - - for (Object[] row : lookupTableRows) { - Object fromValue = row[fromFieldIndex]; - Object toValue = row[toFieldIndex]; - - // Forward edge: from fromValue, we can reach this row - if (fromValue != null) { - forwardAdjacency.computeIfAbsent(fromValue, k -> new ArrayList<>()).add(row); - } - - // Reverse edge (for bidirectional): from toValue, we can reach this row - if (bidirectional && toValue != null) { - reverseAdjacency.computeIfAbsent(toValue, k -> new ArrayList<>()).add(row); - } - } - - // BFS traversal - List results = new ArrayList<>(); - Set visited = new HashSet<>(); - Queue queue = new ArrayDeque<>(); - - // Start BFS from the starting value - queue.offer(new NodeWithDepth(startValue, 0)); - visited.add(startValue); - - while (!queue.isEmpty()) { - NodeWithDepth current = queue.poll(); - int currentDepth = current.depth(); - - // Check depth limit - if (maxDepth > 0 && currentDepth >= maxDepth) { - continue; - } - - // Get adjacent nodes via forward edges - List forwardNeighbors = forwardAdjacency.get(current.value()); - if (forwardNeighbors != null) { - for (Object[] neighborRow : forwardNeighbors) { - Object neighborKey = neighborRow[toFieldIndex]; - if (!visited.contains(neighborKey)) { - visited.add(neighborKey); - results.add(new TraversalResult(neighborRow, currentDepth + 1)); - queue.offer(new NodeWithDepth(neighborKey, currentDepth + 1)); - } - } - } - - // For bidirectional: also traverse reverse edges - if (bidirectional && reverseAdjacency != null) { - List reverseNeighbors = reverseAdjacency.get(current.value()); - if (reverseNeighbors != null) { - for (Object[] neighborRow : reverseNeighbors) { - Object neighborKey = neighborRow[fromFieldIndex]; - if (!visited.contains(neighborKey)) { - visited.add(neighborKey); - results.add(new TraversalResult(neighborRow, currentDepth + 1)); - queue.offer(new NodeWithDepth(neighborKey, currentDepth + 1)); - } - } - } - } - } - - return results; - } - - /** - * Convenience method to get the starting value from an input row. - * - * @param inputRow The input row - * @param toFieldIndex Index of the field in input that contains the starting value - * @return The starting value for traversal - */ - public static Object getStartValue(Object[] inputRow, int toFieldIndex) { - if (inputRow == null || toFieldIndex < 0 || toFieldIndex >= inputRow.length) { - return null; - } - return inputRow[toFieldIndex]; - } - - /** - * Convert traversal results to an array format suitable for aggregation. - * - * @param results List of traversal results - * @param includeDepth Whether to include depth information in the output - * @return Array of row arrays (with optional depth appended) - */ - public static Object[] toResultArray(List results, boolean includeDepth) { - if (results == null || results.isEmpty()) { - return new Object[0]; - } - - Object[] resultArray = new Object[results.size()]; - for (int i = 0; i < results.size(); i++) { - TraversalResult result = results.get(i); - if (includeDepth) { - // Append depth to the row - Object[] rowWithDepth = new Object[result.row().length + 1]; - System.arraycopy(result.row(), 0, rowWithDepth, 0, result.row().length); - rowWithDepth[result.row().length] = result.depth(); - resultArray[i] = rowWithDepth; - } else { - resultArray[i] = result.row(); - } - } - return resultArray; - } - - /** - * Entry point for UDF invocation. Converts List to Object[] and returns results. - * - * @param startValue Starting value for BFS traversal - * @param lookupTable Collected rows from lookup table - * @param fromIdx Index of from field in lookup rows - * @param toIdx Index of to field in lookup rows - * @param maxDepth Maximum traversal depth (-1 = unlimited) - * @param bidirectional Whether to traverse edges in both directions - * @param includeDepth Whether to include depth in output rows - * @return List of result rows as Object arrays - */ - public static List executeWithDynamicLookup( - Object startValue, - RexSubQuery lookupTable, - int fromIdx, - int toIdx, - int maxDepth, - boolean bidirectional, - boolean includeDepth) { - - if (lookupTable == null) { - return List.of(); - } - - // Convert List to List - List rows = new ArrayList<>(); - for (Object item : List.of()) { - if (item instanceof Object[] arr) { - rows.add(arr); - } - } - - List results = - execute(startValue, rows, fromIdx, toIdx, maxDepth, bidirectional); - - // Convert to output format - List output = new ArrayList<>(); - for (TraversalResult result : results) { - if (includeDepth) { - Object[] rowWithDepth = new Object[result.row().length + 1]; - System.arraycopy(result.row(), 0, rowWithDepth, 0, result.row().length); - rowWithDepth[result.row().length] = result.depth(); - output.add(rowWithDepth); - } else { - output.add(result.row()); - } - } - return output; - } -} From 041be849d6df5749e4bd74678ff94515286412c1 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 17:41:58 +0800 Subject: [PATCH 21/23] Support batch mode Signed-off-by: Heng Qian --- .../opensearch/sql/ast/tree/GraphLookup.java | 3 + .../sql/calcite/CalciteRelNodeVisitor.java | 7 +- .../sql/calcite/plan/rel/GraphLookup.java | 66 +++++--- .../calcite/plan/rel/LogicalGraphLookup.java | 17 +- docs/user/ppl/cmd/graphlookup.md | 48 +++++- .../remote/CalcitePPLGraphLookupIT.java | 104 ++++++++++++ .../rules/EnumerableGraphLookupRule.java | 3 +- .../scan/CalciteEnumerableGraphLookup.java | 149 +++++++++++++++++- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../opensearch/sql/ppl/parser/AstBuilder.java | 6 + 11 files changed, 372 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java index 0d7c6d479f3..7ab0e04b020 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/GraphLookup.java @@ -68,6 +68,9 @@ public enum Direction { /** Whether to support array-typed fields without early filter pushdown. */ private final boolean supportArray; + /** Whether to batch all source start values into a single unified BFS traversal. */ + private final boolean batchMode; + private UnresolvedPlan child; public String getDepthFieldName() { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 8feff4f14b2..cc9891ab205 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2583,6 +2583,9 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { RelBuilder builder = context.relBuilder; // TODO: Limit the number of source rows to 100 for now, make it configurable. builder.limit(0, 100); + if (node.isBatchMode()) { + tryToRemoveMetaFields(context, true); + } RelNode sourceTable = builder.build(); // 2. Extract parameters @@ -2597,6 +2600,7 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { Integer maxDepthValue = maxDepthNode.getValueAs(Integer.class); maxDepthValue = maxDepthValue == null ? 0 : maxDepthValue; boolean supportArray = node.isSupportArray(); + boolean batchMode = node.isBatchMode(); // 3. Visit and materialize lookup table analyze(node.getFromTable(), context); @@ -2616,7 +2620,8 @@ public RelNode visitGraphLookup(GraphLookup node, CalcitePlanContext context) { depthFieldName, maxDepthValue, bidirectional, - supportArray); + supportArray, + batchMode); builder.push(graphLookup); return builder.peek(); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java index 0a70fc44b1d..e0ea048edbb 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/GraphLookup.java @@ -49,6 +49,7 @@ public abstract class GraphLookup extends BiRel { protected final int maxDepth; // -1 = unlimited protected final boolean bidirectional; protected final boolean supportArray; + protected final boolean batchMode; private RelDataType outputRowType; @@ -68,6 +69,7 @@ public abstract class GraphLookup extends BiRel { * @param bidirectional Whether to traverse edges in both directions * @param supportArray Whether to support array-typed fields (disables early visited filter * pushdown) + * @param batchMode Whether to batch all source start values into a single unified BFS */ protected GraphLookup( RelOptCluster cluster, @@ -81,7 +83,8 @@ protected GraphLookup( @Nullable String depthField, int maxDepth, boolean bidirectional, - boolean supportArray) { + boolean supportArray, + boolean batchMode) { super(cluster, traitSet, source, lookup); this.startField = startField; this.fromField = fromField; @@ -91,6 +94,7 @@ protected GraphLookup( this.maxDepth = maxDepth; this.bidirectional = bidirectional; this.supportArray = supportArray; + this.batchMode = batchMode; } /** Returns the source table RelNode. */ @@ -109,26 +113,48 @@ public RelNode getLookup() { @Override protected RelDataType deriveRowType() { if (outputRowType == null) { - // Output = source fields + output array field RelDataTypeFactory.Builder builder = getCluster().getTypeFactory().builder(); - // Add all source fields - for (var field : getSource().getRowType().getFieldList()) { - builder.add(field); + if (batchMode) { + // Batch mode: Output = [Array, Array] + // First field: aggregated source rows as array + RelDataType sourceRowType = getSource().getRowType(); + RelDataType sourceArrayType = + getCluster().getTypeFactory().createArrayType(sourceRowType, -1); + builder.add(startField, sourceArrayType); + + // Second field: aggregated lookup rows as array + RelDataType lookupRowType = getLookup().getRowType(); + if (this.depthField != null) { + final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); + lookupBuilder.addAll(lookupRowType.getFieldList()); + RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); + lookupBuilder.add(this.depthField, depthType); + lookupRowType = lookupBuilder.build(); + } + RelDataType lookupArrayType = + getCluster().getTypeFactory().createArrayType(lookupRowType, -1); + builder.add(outputField, lookupArrayType); + } else { + // Normal mode: Output = source fields + output array field + // Add all source fields + for (var field : getSource().getRowType().getFieldList()) { + builder.add(field); + } + + // Add output field (ARRAY type containing lookup row struct) + RelDataType lookupRowType = getLookup().getRowType(); + if (this.depthField != null) { + final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); + lookupBuilder.addAll(lookupRowType.getFieldList()); + RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); + lookupBuilder.add(this.depthField, depthType); + lookupRowType = lookupBuilder.build(); + } + RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); + builder.add(outputField, arrayType); } - // Add output field (ARRAY type containing lookup row struct) - RelDataType lookupRowType = getLookup().getRowType(); - if (this.depthField != null) { - final RelDataTypeFactory.Builder lookupBuilder = getCluster().getTypeFactory().builder(); - lookupBuilder.addAll(lookupRowType.getFieldList()); - RelDataType depthType = getCluster().getTypeFactory().createSqlType(SqlTypeName.INTEGER); - lookupBuilder.add(this.depthField, depthType); - lookupRowType = lookupBuilder.build(); - } - RelDataType arrayType = getCluster().getTypeFactory().createArrayType(lookupRowType, -1); - builder.add(outputField, arrayType); - outputRowType = builder.build(); } return outputRowType; @@ -136,7 +162,8 @@ protected RelDataType deriveRowType() { @Override public double estimateRowCount(RelMetadataQuery mq) { - return getSource().estimateRowCount(mq); + // Batch mode aggregates all source rows into a single output row + return batchMode ? 1 : getSource().estimateRowCount(mq); } @Override @@ -148,6 +175,7 @@ public RelWriter explainTerms(RelWriter pw) { .item("depthField", depthField) .item("maxDepth", maxDepth) .item("bidirectional", bidirectional) - .itemIf("supportArray", supportArray, supportArray); + .itemIf("supportArray", supportArray, supportArray) + .itemIf("batchMode", batchMode, batchMode); } } diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java index dd82755a0d1..745d0cb3822 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalGraphLookup.java @@ -35,6 +35,7 @@ public class LogicalGraphLookup extends GraphLookup { * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions * @param supportArray Whether to support array-typed fields + * @param batchMode Whether to batch all source start values into a single unified BFS */ protected LogicalGraphLookup( RelOptCluster cluster, @@ -48,7 +49,8 @@ protected LogicalGraphLookup( @Nullable String depthField, int maxDepth, boolean bidirectional, - boolean supportArray) { + boolean supportArray, + boolean batchMode) { super( cluster, traitSet, @@ -61,7 +63,8 @@ protected LogicalGraphLookup( depthField, maxDepth, bidirectional, - supportArray); + supportArray, + batchMode); } /** @@ -77,6 +80,7 @@ protected LogicalGraphLookup( * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions * @param supportArray Whether to support array-typed fields + * @param batchMode Whether to batch all source start values into a single unified BFS * @return A new LogicalGraphLookup instance */ public static LogicalGraphLookup create( @@ -89,7 +93,8 @@ public static LogicalGraphLookup create( @Nullable String depthField, int maxDepth, boolean bidirectional, - boolean supportArray) { + boolean supportArray, + boolean batchMode) { RelOptCluster cluster = source.getCluster(); RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE); return new LogicalGraphLookup( @@ -104,7 +109,8 @@ public static LogicalGraphLookup create( depthField, maxDepth, bidirectional, - supportArray); + supportArray, + batchMode); } @Override @@ -121,6 +127,7 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { depthField, maxDepth, bidirectional, - supportArray); + supportArray, + batchMode); } } diff --git a/docs/user/ppl/cmd/graphlookup.md b/docs/user/ppl/cmd/graphlookup.md index 277896cfb44..ef8720af8eb 100644 --- a/docs/user/ppl/cmd/graphlookup.md +++ b/docs/user/ppl/cmd/graphlookup.md @@ -8,7 +8,7 @@ The `graphLookup` command performs recursive graph traversal on a collection usi The `graphLookup` command has the following syntax: ```syntax -graphLookup startField= fromField= toField= [maxDepth=] [depthField=] [direction=(uni | bi)] [supportArray=(true | false)] as +graphLookup startField= fromField= toField= [maxDepth=] [depthField=] [direction=(uni | bi)] [supportArray=(true | false)] [batchMode=(true | false)] as ``` The following are examples of the `graphLookup` command syntax: @@ -36,6 +36,7 @@ The `graphLookup` command supports the following parameters. | `depthField=` | Optional | The name of the field to add to each traversed document indicating its recursion depth. If not specified, no depth field is added. Depth starts at `0` for the first level of matches. | | `direction=(uni \| bi)` | Optional | The traversal direction. `uni` (default) performs unidirectional traversal following edges in the forward direction only. `bi` performs bidirectional traversal, following edges in both directions. | | `supportArray=(true \| false)` | Optional | When `true`, disables early visited-node filter pushdown to OpenSearch. Default is `false`. Set to `true` when `fromField` or `toField` contains array values to ensure correct traversal behavior. See [Array Field Handling](#array-field-handling) for details. | +| `batchMode=(true \| false)` | Optional | When `true`, collects all start values from all source rows and performs a single unified BFS traversal. Default is `false`. The output changes to two arrays: `[Array, Array]`. See [Batch Mode](#batch-mode) for details. | | `as ` | Required | The name of the output array field that will contain all documents found during the graph traversal. | ## How It Works @@ -253,6 +254,51 @@ With bidirectional traversal, Ron's connections include: - His manager (Andrew) - His peer (Dan, who also reports to Andrew) +## Batch Mode + +When `batchMode=true`, the `graphLookup` command collects all start values from all source rows and performs a single unified BFS traversal instead of separate traversals per row. + +### Output Format Change + +In batch mode, the output is a **single row** with two arrays: +- First array: All source rows collected +- Second array: All lookup results from the unified BFS traversal + +### When to Use Batch Mode + +Use `batchMode=true` when: +- You want to find all nodes reachable from **any** of the source start values +- You need a global view of the graph connectivity from multiple starting points +- You want to avoid duplicate traversals when multiple source rows share overlapping paths + +### Example + +```ppl ignore +source = travelers + | graphLookup airports + startField=nearestAirport + fromField=connects + toField=airport + batchMode=true + maxDepth=2 + as reachableAirports +``` + +**Normal mode** (default): Each traveler gets their own list of reachable airports +```text +| name | nearestAirport | reachableAirports | +|-------|----------------|-------------------| +| Dev | JFK | [JFK, BOS, ORD] | +| Jeff | BOS | [BOS, JFK, PWM] | +``` + +**Batch mode**: A single row with all travelers and all reachable airports combined +```text +| travelers | reachableAirports | +|----------------------------------------|-----------------------------| +| [{Dev, JFK}, {Jeff, BOS}] | [JFK, BOS, ORD, PWM, ...] | +``` + ## Array Field Handling When the `fromField` or `toField` contains array values, you should set `supportArray=true` to ensure correct traversal behavior. diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java index 25377062872..498b17dab91 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLGraphLookupIT.java @@ -496,4 +496,108 @@ public void testGraphLookupWithFieldsProjection() throws IOException { rows("Asya", List.of("{Ron, Andrew, 3}")), rows("Dan", List.of("{Andrew, null, 4}"))); } + + // ==================== Batch Mode Tests ==================== + + /** + * Test 17: Batch mode - collects all start values and performs unified BFS. Output is a single + * row with [Array, Array]. + * + *

Source: Dev (reportsTo=Eliot), Asya (reportsTo=Ron) Start values: {Eliot, Ron} BFS finds: + * Eliot->Ron, Ron->Andrew, Andrew->null + */ + @Test + public void testBatchModeEmployeeHierarchy() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name in ('Dev', 'Asya')" + + " | graphLookup %s" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + + " depthField=depth" + + " maxDepth=3" + + " batchMode=true" + + " as reportingHierarchy", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportsTo", "array"), schema("reportingHierarchy", "array")); + verifyDataRows( + result, + rows( + List.of("{Dev, Eliot, 1}", "{Asya, Ron, 5}"), + List.of("{Ron, Andrew, 3, 0}", "{Andrew, null, 4, 1}"))); + } + + /** + * Test 18: Batch mode for travelers - find all airports reachable from any traveler. All + * travelers' nearest airports: JFK (Dev, Eliot), BOS (Jeff) Unified BFS from {JFK, BOS} with + * maxDepth=1 finds connected airports. + */ + @Test + public void testBatchModeTravelersAirports() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | graphLookup %s" + + " startField=nearestAirport" + + " fromField=connects" + + " toField=airport" + + " batchMode=true" + + " depthField=depth" + + " maxDepth=3" + + " supportArray=true" + + " as reachableAirports", + TEST_INDEX_GRAPH_TRAVELERS, TEST_INDEX_GRAPH_AIRPORTS)); + + verifySchema(result, schema("nearestAirport", "array"), schema("reachableAirports", "array")); + // Batch mode returns single row with: + // - sourceRows: [{Dev, JFK}, {Eliot, JFK}, {Jeff, BOS}] + // - lookupResults: airports reachable from JFK and BOS within maxDepth=1 + verifyDataRows( + result, + rows( + List.of("{Dev, JFK}", "{Eliot, JFK}", "{Jeff, BOS}"), + List.of("{JFK, [BOS, ORD], 0}", "{BOS, [JFK, PWM], 0}", "{PWM, [BOS, LHR], 1}"))); + } + + /** + * Test 19: Batch mode with bidirectional traversal. Dev (reportsTo=Eliot), Dan (reportsTo=Andrew) + * Bidirectional BFS from {Eliot, Andrew} finds connections in both directions. + */ + @Test + public void testBatchModeBidirectional() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s" + + " | where name in ('Dev', 'Dan')" + + " | graphLookup %s" + + " startField=reportsTo" + + " fromField=reportsTo" + + " toField=name" + + " depthField=depth" + + " maxDepth=3" + + " direction=bi" + + " batchMode=true" + + " as connections", + TEST_INDEX_GRAPH_EMPLOYEES, TEST_INDEX_GRAPH_EMPLOYEES)); + + verifySchema(result, schema("reportsTo", "array"), schema("connections", "array")); + // Batch mode returns single row with bidirectional traversal results + // Start from {Eliot, Andrew}, find connections in both directions + verifyDataRows( + result, + rows( + List.of("{Dev, Eliot, 1}", "{Dan, Andrew, 6}"), + List.of( + "{Dev, Eliot, 1, 0}", + "{Eliot, Ron, 2, 0}", + "{Andrew, null, 4, 0}", + "{Dan, Andrew, 6, 0}", + "{Asya, Ron, 5, 1}"))); + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java index 38332c6ae88..ed107f641f0 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/EnumerableGraphLookupRule.java @@ -99,6 +99,7 @@ public RelNode convert(RelNode rel) { graphLookup.getDepthField(), graphLookup.getMaxDepth(), graphLookup.isBidirectional(), - graphLookup.isSupportArray()); + graphLookup.isSupportArray(), + graphLookup.isBatchMode()); } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 98cf0f8ebbc..ddced7fe2f3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -69,6 +69,7 @@ public class CalciteEnumerableGraphLookup extends GraphLookup implements Enumera * @param maxDepth Maximum traversal depth (-1 for unlimited) * @param bidirectional Whether to traverse edges in both directions * @param supportArray Whether to support array-typed fields + * @param batchMode Whether to batch all source start values into a single unified BFS */ public CalciteEnumerableGraphLookup( RelOptCluster cluster, @@ -82,7 +83,8 @@ public CalciteEnumerableGraphLookup( String depthField, int maxDepth, boolean bidirectional, - boolean supportArray) { + boolean supportArray, + boolean batchMode) { super( cluster, traitSet, @@ -95,7 +97,8 @@ public CalciteEnumerableGraphLookup( depthField, maxDepth, bidirectional, - supportArray); + supportArray, + batchMode); } @Override @@ -112,7 +115,8 @@ public RelNode copy(RelTraitSet traitSet, List inputs) { depthField, maxDepth, bidirectional, - supportArray); + supportArray, + batchMode); } @Override @@ -166,6 +170,7 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec private final int toFieldIdx; private Object[] current = null; + private boolean batchModeCompleted = false; @SuppressWarnings("unchecked") GraphLookupEnumerator(CalciteEnumerableGraphLookup graphLookup) { @@ -198,14 +203,73 @@ private static class GraphLookupEnumerator implements Enumerator<@Nullable Objec @Override public Object current() { - // source fields + output array + // source fields + output array (normal mode) or [source array, lookup array] (batch mode) return current; } - // TODO: currently we perform BFS for each single row. - // We can improve this by performing BFS for batch of rows. @Override public boolean moveNext() { + if (graphLookup.batchMode) { + return moveNextBatchMode(); + } else { + return moveNextNormalMode(); + } + } + + /** + * Batch mode: collect all source start values, perform unified BFS, return single aggregated + * row. + */ + private boolean moveNextBatchMode() { + // Batch mode only returns one row + if (batchModeCompleted) { + return false; + } + batchModeCompleted = true; + + // Collect all source rows and start values + List allSourceRows = new ArrayList<>(); + Set allStartValues = new HashSet<>(); + + while (sourceEnumerator.moveNext()) { + Object sourceRow = sourceEnumerator.current(); + Object[] sourceValues; + + if (sourceRow instanceof Object[] arr) { + sourceValues = arr; + } else { + sourceValues = new Object[] {sourceRow}; + } + + // Store the source row + allSourceRows.add(sourceValues); + + // Collect start value(s) + Object startValue = + (startFieldIndex >= 0 && startFieldIndex < sourceValues.length) + ? sourceValues[startFieldIndex] + : null; + + if (startValue != null) { + if (startValue instanceof List list) { + allStartValues.addAll(list); + } else { + allStartValues.add(startValue); + } + } + } + + // Perform unified BFS with all start values + List bfsResults = performBfsWithMultipleStarts(allStartValues); + + // Build output row: [Array, Array] + current = new Object[] {allSourceRows, bfsResults}; + + return true; + } + + /** Normal mode: perform BFS for each source row individually. */ + private boolean moveNextNormalMode() { if (!sourceEnumerator.moveNext()) { return false; } @@ -238,6 +302,79 @@ public boolean moveNext() { return true; } + /** + * Performs unified BFS traversal starting from multiple start values. + * + * @param startValues The set of starting values for BFS + * @return List of rows found during traversal + */ + private List performBfsWithMultipleStarts(Set startValues) { + if (startValues.isEmpty()) { + return List.of(); + } + + List results = new ArrayList<>(); + Set visitedNodes = new HashSet<>(); + Queue queue = new ArrayDeque<>(); + + // Initialize BFS with all start values + for (Object value : startValues) { + if (value != null && !visitedNodes.contains(value)) { + visitedNodes.add(value); + queue.offer(value); + } + } + + int currentLevelDepth = 0; + while (!queue.isEmpty()) { + List currentLevelValues = new ArrayList<>(); + + while (!queue.isEmpty()) { + Object value = queue.poll(); + currentLevelValues.add(value); + } + + if (currentLevelValues.isEmpty()) { + break; + } + + List forwardResults = queryLookupTable(currentLevelValues, visitedNodes); + + for (Object row : forwardResults) { + Object[] rowArray = (Object[]) row; + Object fromValue = rowArray[fromFieldIdx]; + List nextValues = new ArrayList<>(); + collectValues(fromValue, nextValues, visitedNodes); + if (graphLookup.bidirectional) { + Object toValue = rowArray[toFieldIdx]; + collectValues(toValue, nextValues, visitedNodes); + } + + if (!nextValues.isEmpty()) { + if (graphLookup.depthField != null) { + Object[] rowWithDepth = new Object[rowArray.length + 1]; + System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); + rowWithDepth[rowArray.length] = currentLevelDepth; + results.add(rowWithDepth); + } else { + results.add(rowArray); + } + + for (Object val : nextValues) { + if (val != null) { + visitedNodes.add(val); + queue.offer(val); + } + } + } + } + + if (++currentLevelDepth > graphLookup.maxDepth) break; + } + + return results; + } + /** * Performs BFS traversal starting from the given value by dynamically querying OpenSearch. * diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 5f0031eb722..5239ef9154c 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -62,6 +62,7 @@ DIRECTION: 'DIRECTION'; UNI: 'UNI'; BI: 'BI'; SUPPORT_ARRAY: 'SUPPORTARRAY'; +BATCH_MODE: 'BATCHMODE'; ROW: 'ROW'; COL: 'COL'; EXPAND: 'EXPAND'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 182fe30ef5f..65dc24810d5 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -639,6 +639,7 @@ graphLookupOption | (DEPTH_FIELD EQUAL fieldExpression) | (DIRECTION EQUAL (UNI | BI)) | (SUPPORT_ARRAY EQUAL booleanLiteral) + | (BATCH_MODE EQUAL booleanLiteral) ; // clauses diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 78ce37b6cc4..709c0021a9f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -1497,6 +1497,7 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom Field depthField = null; Direction direction = Direction.UNI; boolean supportArray = false; + boolean batchMode = false; for (OpenSearchPPLParser.GraphLookupOptionContext option : ctx.graphLookupOption()) { if (option.FROM_FIELD() != null) { @@ -1521,6 +1522,10 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom Literal literal = (Literal) internalVisitExpression(option.booleanLiteral()); supportArray = Boolean.TRUE.equals(literal.getValue()); } + if (option.BATCH_MODE() != null) { + Literal literal = (Literal) internalVisitExpression(option.booleanLiteral()); + batchMode = Boolean.TRUE.equals(literal.getValue()); + } } Field as = (Field) internalVisitExpression(ctx.outputField); @@ -1539,6 +1544,7 @@ public UnresolvedPlan visitGraphLookupCommand(OpenSearchPPLParser.GraphLookupCom .depthField(depthField) .direction(direction) .supportArray(supportArray) + .batchMode(batchMode) .build(); } } From 0069b8707a0a60b6b03b278ec4e858ebf02f3109 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 17:54:43 +0800 Subject: [PATCH 22/23] Close lookup table scan Signed-off-by: Heng Qian --- .../storage/scan/CalciteEnumerableGraphLookup.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index ddced7fe2f3..672a1c45c20 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -514,9 +514,22 @@ private List queryLookupTable( while (res.hasNext()) { results.add(res.next()); } + closeIterator(res); return results; } + private static void closeIterator(@Nullable Iterator iterator) { + if (iterator instanceof AutoCloseable) { + try { + ((AutoCloseable) iterator).close(); + } catch (RuntimeException e) { + throw e; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + /** * Provides a query builder to search edges with the field matching values * From 0f6892760af9b2ea8f504c900355d955d8161699 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 6 Feb 2026 18:58:38 +0800 Subject: [PATCH 23/23] refine code Signed-off-by: Heng Qian --- .../scan/CalciteEnumerableGraphLookup.java | 94 +++---------------- .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 6 ++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 14 +++ 3 files changed, 34 insertions(+), 80 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java index 672a1c45c20..03a327e27fa 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteEnumerableGraphLookup.java @@ -32,6 +32,8 @@ import org.apache.calcite.plan.RelTraitSet; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; @@ -52,6 +54,7 @@ */ @Getter public class CalciteEnumerableGraphLookup extends GraphLookup implements EnumerableRel, Scannable { + private static final Logger LOG = LogManager.getLogger(); /** * Creates a CalciteEnumerableGraphLookup. @@ -260,7 +263,7 @@ private boolean moveNextBatchMode() { } // Perform unified BFS with all start values - List bfsResults = performBfsWithMultipleStarts(allStartValues); + List bfsResults = performBfs(allStartValues); // Build output row: [Array, Array] current = new Object[] {allSourceRows, bfsResults}; @@ -302,79 +305,6 @@ private boolean moveNextNormalMode() { return true; } - /** - * Performs unified BFS traversal starting from multiple start values. - * - * @param startValues The set of starting values for BFS - * @return List of rows found during traversal - */ - private List performBfsWithMultipleStarts(Set startValues) { - if (startValues.isEmpty()) { - return List.of(); - } - - List results = new ArrayList<>(); - Set visitedNodes = new HashSet<>(); - Queue queue = new ArrayDeque<>(); - - // Initialize BFS with all start values - for (Object value : startValues) { - if (value != null && !visitedNodes.contains(value)) { - visitedNodes.add(value); - queue.offer(value); - } - } - - int currentLevelDepth = 0; - while (!queue.isEmpty()) { - List currentLevelValues = new ArrayList<>(); - - while (!queue.isEmpty()) { - Object value = queue.poll(); - currentLevelValues.add(value); - } - - if (currentLevelValues.isEmpty()) { - break; - } - - List forwardResults = queryLookupTable(currentLevelValues, visitedNodes); - - for (Object row : forwardResults) { - Object[] rowArray = (Object[]) row; - Object fromValue = rowArray[fromFieldIdx]; - List nextValues = new ArrayList<>(); - collectValues(fromValue, nextValues, visitedNodes); - if (graphLookup.bidirectional) { - Object toValue = rowArray[toFieldIdx]; - collectValues(toValue, nextValues, visitedNodes); - } - - if (!nextValues.isEmpty()) { - if (graphLookup.depthField != null) { - Object[] rowWithDepth = new Object[rowArray.length + 1]; - System.arraycopy(rowArray, 0, rowWithDepth, 0, rowArray.length); - rowWithDepth[rowArray.length] = currentLevelDepth; - results.add(rowWithDepth); - } else { - results.add(rowArray); - } - - for (Object val : nextValues) { - if (val != null) { - visitedNodes.add(val); - queue.offer(val); - } - } - } - } - - if (++currentLevelDepth > graphLookup.maxDepth) break; - } - - return results; - } - /** * Performs BFS traversal starting from the given value by dynamically querying OpenSearch. * @@ -393,12 +323,13 @@ private List performBfs(Object startValue) { Queue queue = new ArrayDeque<>(); // Initialize BFS with start value - if (startValue instanceof List list) { - list.forEach( - value -> { - visitedNodes.add(value); - queue.offer(value); - }); + if (startValue instanceof Collection collection) { + collection.forEach(value -> { + if (!visitedNodes.contains(value)) { + visitedNodes.add(value); + queue.offer(value); + } + }); } else { visitedNodes.add(startValue); queue.offer(startValue); @@ -422,6 +353,9 @@ private List performBfs(Object startValue) { // Forward direction: fromField = currentLevelValues List forwardResults = queryLookupTable(currentLevelValues, visitedNodes); + if (forwardResults.size() >= this.lookupScan.getOsIndex().getMaxResultWindow()) { + LOG.warn("BFS result size exceeds max result window, returning partial result."); + } for (Object row : forwardResults) { Object[] rowArray = (Object[]) (row); Object fromValue = rowArray[fromFieldIdx]; diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index caa5fa74948..b0a0c1d9ed4 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -242,6 +242,12 @@ public String visitGraphLookup(GraphLookup node, String context) { command.append(" depthField=").append(MASK_COLUMN); } command.append(" direction=").append(node.getDirection().name().toLowerCase()); + if (node.isSupportArray()) { + command.append(" supportArray=true"); + } + if (node.isBatchMode()) { + command.append(" batchMode=true"); + } command.append(" as ").append(MASK_COLUMN); return command.toString(); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 01ccf9180b0..943db6c2ba2 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -681,6 +681,20 @@ public void testGraphLookup() { anonymize( "source=t | graphLookup employees fromField=manager toField=name" + " startField=id maxDepth=5 depthField=level direction=bi as reportingHierarchy")); + // graphLookup with supportArray + assertEquals( + "source=table | graphlookup table fromField=identifier toField=identifier" + + " direction=uni supportArray=true as identifier", + anonymize( + "source=t | graphLookup airports fromField=connects toField=airport" + + " supportArray=true as reachableAirports")); + // graphLookup with batchMode + assertEquals( + "source=table | graphlookup table fromField=identifier toField=identifier" + + " direction=uni batchMode=true as identifier", + anonymize( + "source=t | graphLookup employees fromField=manager toField=name" + + " batchMode=true as reportingHierarchy")); } @Test