From 4f5b511877616b38f8e77f0b83e7213d216f0294 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 12 May 2026 08:49:06 -0700 Subject: [PATCH 01/11] feat: route SQL V2 AST through CalciteRelNodeVisitor for gap analysis Bridge SQL V2 parser output to PPL V3's CalciteRelNodeVisitor path: - QueryService.shouldUseCalcite() accepts QueryType.SQL - CalciteRelNodeVisitor handles Alias expression in project list - OpenSearchExecutionEngine returns Cursor.None instead of null - Disable REST-level V2-to-legacy fallback to expose all gaps - Enable Calcite + disallow fallback in SQL IT test base class These are temporary changes for gap analysis, not production-ready. --- .../org/opensearch/sql/calcite/CalciteRelNodeVisitor.java | 3 +++ .../java/org/opensearch/sql/executor/QueryService.java | 5 +++-- .../java/org/opensearch/sql/legacy/SQLIntegTestCase.java | 5 ++++- .../opensearch/sql/legacy/plugin/RestSQLQueryAction.java | 7 ++----- .../sql/opensearch/executor/OpenSearchExecutionEngine.java | 3 ++- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 1251f51b131..3d5bf4f5a8e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -541,6 +541,9 @@ private List expandProjectFields( .filter(addedFields::add) .forEach(field -> expandedFields.add(context.relBuilder.field(field))); } + case Alias alias -> { + expandedFields.add(rexVisitor.analyze(alias, context)); + } default -> throw new IllegalStateException( "Unexpected expression type in project list: " + expr.getClass().getSimpleName()); diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index fe9d3e55dc1..9e829d68c3a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -358,9 +358,10 @@ private boolean isCalciteEnabled(Settings settings) { } // TODO https://github.com/opensearch-project/sql/issues/3457 - // Calcite is not available for SQL query now. Maybe release in 3.1.0? + // Route SQL V2 AST through CalciteRelNodeVisitor for gap analysis private boolean shouldUseCalcite(QueryType queryType) { - return isCalciteEnabled(settings) && queryType == QueryType.PPL; + return isCalciteEnabled(settings) + && (queryType == QueryType.PPL || queryType == QueryType.SQL); } private FrameworkConfig buildFrameworkConfig() { diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 099b2f7e0cb..888ff7d6812 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -11,6 +11,8 @@ import static org.opensearch.sql.legacy.plugin.RestSqlAction.EXPLAIN_API_ENDPOINT; import static org.opensearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT; import static org.opensearch.sql.ppl.PPLIntegTestCase.disableCalcite; +import static org.opensearch.sql.ppl.PPLIntegTestCase.disallowCalciteFallback; +import static org.opensearch.sql.ppl.PPLIntegTestCase.enableCalcite; import com.google.common.base.Strings; import java.io.IOException; @@ -193,7 +195,8 @@ protected void resetMaxResultWindow(String indexName) throws IOException { /** Provide for each test to load test index, data and other setup work */ protected void init() throws Exception { - disableCalcite(); + enableCalcite(); + disallowCalciteFallback(); increaseMaxCompilationsRate(); } diff --git a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java index 21badf79412..a54c53e9a7f 100644 --- a/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java +++ b/legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java @@ -131,11 +131,8 @@ public void onResponse(T response) { @Override public void onFailure(Exception e) { - if (e instanceof SyntaxCheckException || e instanceof UnsupportedCursorRequestException) { - fallBackHandler.accept(channel, e); - } else { - next.onFailure(e); - } + // Disable fallback to legacy — surface all failures for gap analysis + next.onFailure(e); } }; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 32b7891d344..2a29b52e837 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -53,6 +53,7 @@ import org.opensearch.sql.executor.ExecutionEngine; import org.opensearch.sql.executor.ExecutionEngine.Schema.Column; import org.opensearch.sql.executor.Explain; +import org.opensearch.sql.executor.pagination.Cursor; import org.opensearch.sql.executor.pagination.PlanSerializer; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -321,7 +322,7 @@ private QueryResponse buildResultSet( columns.add(new Column(columnName, null, exprType)); } Schema schema = new Schema(columns); - QueryResponse response = new QueryResponse(schema, values, null); + QueryResponse response = new QueryResponse(schema, values, Cursor.None); return response; } From 2d450748794fc4118e0d7b6a268b845a0fb91095 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 12 May 2026 08:49:13 -0700 Subject: [PATCH 02/11] docs: add SQL V2 to CalciteRelNodeVisitor gap analysis report Comprehensive gap analysis for Mustang SQL design decision: - 572/964 tests pass (59.3%) with V2 AST routed through Calcite - Category A: 13 grammar gaps (JOINs, IN/EXISTS, UNION, etc.) - Category B: 20 visitor gaps (ArgumentMap NPE, AggFunc cast, etc.) - Quick wins identified: ~158 tests unlockable with S-effort fixes - Verified against grammar, AST builder, and visitor source code --- docs/report/sql-v2-calcite-gap-analysis.md | 250 +++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 docs/report/sql-v2-calcite-gap-analysis.md diff --git a/docs/report/sql-v2-calcite-gap-analysis.md b/docs/report/sql-v2-calcite-gap-analysis.md new file mode 100644 index 00000000000..d29322bcfad --- /dev/null +++ b/docs/report/sql-v2-calcite-gap-analysis.md @@ -0,0 +1,250 @@ +# SQL V2 → CalciteRelNodeVisitor: Complete Gap Analysis + +**Date**: 2026-05-12 +**Pass Rate**: 572 / 964 (59.3%) with all fallback paths disabled +**Fixes Applied**: Alias in project list, Cursor.None, REST fallback disabled +**Branch**: https://github.com/dai-chen/sql-1/tree/sql-v2-calcite-gap-analysis + +## Background + +This analysis evaluates the feasibility of connecting the SQL V2 parser/AST builder to the existing PPL Calcite planner (`CalciteRelNodeVisitor`) — bypassing the V2 Analyzer → Planner → PhysicalPlan path entirely. All fallback paths (Calcite → V2, V2 → legacy) were disabled to expose the complete set of gaps. + +The motivation is to provide data for the **Mustang SQL** design decision: integrating the SQL plugin with the Analytics Engine in OpenSearch core on Parquet index **without any fallback path to Lucene index**. One option under consideration is to reuse the V2 parser and AST builder for maximum SQL compatibility, routing the resulting AST through CalciteRelNodeVisitor (which already powers PPL V3) to produce Calcite RelNode plans. This report identifies exactly what works, what doesn't, and what effort is required to close each gap — informing whether this reuse strategy is viable or whether alternative approaches (e.g., Calcite's native SQL parser) are preferable for Mustang. + +### Alternative Path Evaluated: Calcite Native Parser + UnifiedQueryCompiler + +For comparison, we also tested routing all SQL through `RestUnifiedQueryAction` using Calcite's native `SqlParser` → `SqlToRelConverter` → `UnifiedQueryCompiler` (direct Calcite Bindable execution). Result: **11 / 1007 (1.1%) pass rate**. The test cluster crashed mid-run due to an unhandled `ExceptionInInitializerError` (fatal JVM crash from `log(0)` in Calcite Enumerable execution). Before the crash, 198 queries failed at planning (Calcite parser can't handle OpenSearch-specific functions/syntax) and 143 failed at compilation (RelRunner execution failures). This path is fundamentally not ready — it lacks error resilience, doesn't leverage OpenSearch push-down, and has far worse compatibility than the V2+CalciteRelNodeVisitor approach. + +--- + +## Category A: V2 Grammar / Parser Gaps + +These queries **cannot be parsed** by the V2 ANTLR grammar (`OpenSearchSQLParser.g4`). They fail with `SyntaxCheckException` before reaching CalciteRelNodeVisitor. + +| # | Gap | Failures | Example Query | Effort | Solution | +|---|-----|----------|---------------|--------|----------| +| A1 | **JOIN** (INNER, LEFT, cross) | 19 | `SELECT a.firstname FROM bank a INNER JOIN order b ON a.name = b.name` | L | Extend V2 grammar with JOIN clause + AstBuilder visitJoin. `Join` AST node and `CalciteRelNodeVisitor.visitJoin` already exist — only grammar + AstBuilder missing | +| A2 | **SHOW/DESCRIBE with unquoted hyphenated identifiers** | 11 | `SHOW TABLES LIKE opensearch-sql_test_index_account` | S | V2 grammar supports SHOW/DESCRIBE but `tableFilter` expects `STRING_LITERAL`. Tests pass unquoted identifiers. Fix: extend `tableFilter` to accept ID | +| A3 | **Nested FROM syntax** (PartiQL comma-join) | 7 | `SELECT * FROM nested_type e, e.message m` | M | Extend grammar FROM clause to allow comma-separated relations + nested path expressions | +| A4 | **PERCENTILES / STATS / EXTENDED_STATS** | 7 | `SELECT PERCENTILES(age, 25.0, 50.0) FROM account` | M | Tokens exist in lexer but unused in parser. Add parser rules. Note: `PERCENTILE` (singular) IS supported | +| A5 | **Legacy method syntax** (`field=FUNC('arg')`) | 5 | `SELECT * FROM account WHERE address=REGEXP_QUERY('.*')` | M | Only MATCH_QUERY/MATCHPHRASE have `altSingleFieldRelevanceFunction` rule. REGEXP_QUERY does not. Add to grammar or use standard function-call syntax | +| A6 | **Index names with dots** | 3 | `SELECT * FROM test-logs-2025.01.01` | S | Grammar splits on dots into `qualifiedName` parts. `01` fails as ident (starts with digit). Require backtick quoting (already works with backticks) | +| A7 | **IN / EXISTS / NOT EXISTS subquery** | 0 (excluded) | `WHERE age IN (SELECT age FROM bank2 WHERE age > 30)` | L | `IN` predicate only accepts expression list, not subquery. `EXISTS` token defined in lexer but unused in parser. Add `IN '(' querySpecification ')'` and `EXISTS '(' querySpecification ')'` rules | +| A8 | **Qualified wildcard** (`alias.*`) | 2 | `SELECT m.* FROM nested_type e, e.message m` | S | Add qualifiedStar rule to selectElement in grammar | +| A9 | **Multi-table FROM** (comma-join, non-nested) | 1 | `SELECT * FROM dog_index, dogs2_index` | S | Allow comma-separated table list in FROM clause | +| A10 | **date_histogram** | 1 | `SELECT count(*) FROM online GROUP BY date_histogram('field'='insert_time','fixed_interval'='4d')` | S | Add date_histogram to function name tokens in parser | +| A11 | **Index/type syntax** (`/type`) | 1 | `SELECT * FROM account/wrongType` | S | Add optional `/type` suffix to table reference rule | +| A12 | **UNION / INTERSECT / MINUS** | 0 (excluded) | `SELECT name FROM bank UNION ALL SELECT dog_name FROM dog` | L | Add set-operation rules to grammar + AST Append/Union nodes | +| A13 | **Correlated subquery** | 0 (excluded) | `WHERE EXISTS (SELECT * FROM dept d WHERE d.id = e.dept_id)` | L | Requires A7 first + decorrelation support in planner | + +**Total Category A failures: ~57 (active) + 3 hidden gaps with 0 active tests (A7, A12, A13)** + +**Note on subqueries**: V2 grammar supports derived tables (`FROM (SELECT ...) AS alias`) via the `subqueryAsRelation` rule. CalciteRelNodeVisitor has `visitSubqueryAlias` for this. However, `IN (SELECT ...)`, `EXISTS (SELECT ...)`, and correlated subqueries are not in the V2 grammar — the `EXISTS` token is defined in the lexer but unused in any parser rule, and `IN` only accepts expression lists, not nested SELECTs. The legacy `SubqueryIT` (18 tests) is entirely excluded from the build. + +--- + +## Category B: CalciteRelNodeVisitor Execution Gaps + +These queries parse successfully into AST but fail during Calcite visitor traversal or execution. + +| # | Gap | Failures | Example Query | Effort | Solution | +|---|-----|----------|---------------|--------|----------| +| B1 | **Aggregation ArgumentMap NPE** (BUCKET_NULLABLE) | 82 | `SELECT MAX(datetime1), MIN(datetime1) FROM calcs` | S | Null-check `statsArgs.get(Argument.BUCKET_NULLABLE)` at CalciteRelNodeVisitor.java:1612. SQL's `Aggregation` has empty `argExprList` (PPL sets BUCKET_NULLABLE). Default to `false`. | +| B2 | **AggregateFunction → Function cast** | 29 | `SELECT AVG(num3) OVER(PARTITION BY datetime1) FROM calcs` | S | Add `instanceof AggregateFunction` branch at CalciteRexNodeVisitor.java:564. `AggregateFunction` and `Function` are sibling classes (both extend `UnresolvedExpression`), not parent-child. | +| B3 | **Pagination unsupported** (schema mismatch + cursor) | 35 | `SELECT * FROM bank` (with fetchSize) | L | `visitPaginate()` and `visitFetchCursor()` both throw `CalciteUnsupportedException`. Non-paged goes through Calcite; paged falls back to V2 → different schemas. Implement pagination in Calcite or ensure schema consistency. | +| B4 | **Table function unsupported** | 26 | `SELECT v._id FROM vectorSearch(table='t', field='f', vector='[1.0]', option='k=5') AS v` | L | `visitTableFunction()` is a complete stub (single throw). Implement KNN scan translation. | +| B5 | **NESTED function** | 33 | `SELECT nested(message.author.name) FROM multi_nested` | L | NESTED not registered in `PPLFuncImpTable.java`. Requires special handling — not a simple function, needs unnest/correlate pattern for OpenSearch nested field access. | +| B6 | **WILDCARD_QUERY** | 18 | `SELECT body FROM wildcard WHERE wildcard_query(KeywordBody, 't*')` | M | Not registered in `PPLFuncImpTable.java`. Register and map to OpenSearch wildcard query DSL. | +| B7 | **Relevance function aliases** (MATCH_QUERY, MATCHPHRASE, MATCHQUERY) | 14 | `SELECT firstname FROM accounts WHERE match_query(lastname, 'Bates')` | S | `PPLFuncImpTable` registers MATCH, MATCH_PHRASE, MULTI_MATCH but NOT the aliases (MATCH_QUERY, MATCHQUERY, MATCHPHRASE). Add alias registrations pointing to same operators. | +| B8 | **LIMIT not applied** (visitLimit missing) | 9 | `SELECT last_day(date0) FROM calcs LIMIT 3` → returns 17 rows | S | `CalciteRelNodeVisitor` does NOT override `visitLimit()`. Default `visitChildren()` silently skips it. Add `visitLimit(Limit node, ctx)` calling `context.relBuilder.limit(offset, count)`. | +| B9 | **Window functions** (ROW_NUMBER, RANK, DENSE_RANK) | 8 | `SELECT age, ROW_NUMBER() OVER(ORDER BY age DESC) FROM bank` | S | `WINDOW_FUNC_MAPPING` in `BuiltinFunctionName.java` is missing ranking functions. `PlanUtils.makeOver()` already has `case ROW_NUMBER` handler. Just add to mapping. | +| B10 | **Table name resolution** (dot = schema separator) | 6 | `SELECT __age FROM test.twounderscores` → `Table 'test.twounderscores' not found` | S | `visitRelation` passes `QualifiedName.getParts()` (split on dots) to `relBuilder.scan()`. Calcite interprets `["test","twounderscores"]` as schema.table. Fix: pass resolved name as single-element list. | +| B11 | **QUERY function** | 5 | `SELECT Id FROM beer WHERE query('Tags:taste OR Body:taste')` | M | Not registered in `PPLFuncImpTable.java`. Register and map to OpenSearch query_string DSL. | +| B12 | **Schema type mismatch** (keyword vs text) | 5 | `SELECT phrase FROM phrase WHERE match_bool_prefix(phrase, 'quick')` — schema says `keyword` not `text` | S | Calcite schema resolves field type from mapping incorrectly. Fix type resolution in OpenSearchSchema. | +| B13 | **Column name = alias instead of expression** | 6 | `SELECT NULLIF(lastname, 'unknown') AS name` → schema name is `name` not `NULLIF(...)` | S | Calcite uses the AS alias as column label. V2 uses expression text. Align behavior in response formatter. | +| B14 | **ISNULL / IFNULL unresolved** | 5 | `SELECT ISNULL(lastname) FROM accounts` | S | ISNULL (1-arg boolean) not registered. IFNULL fails with NULL literal (UNDEFINED type). Register + fix type resolution. | +| B15 | **Explain format** | 3 | `EXPLAIN SELECT age FROM account WHERE age IS NOT NULL` | S | Calcite explain outputs different JSON structure. Test regex doesn't match. Fix test or add V2-compatible explain format. | +| B16 | **ArgumentMap NPE** (systemic — PI, GROUP BY, HAVING) | 4 | `SELECT PI()` / `SELECT col FROM t GROUP BY col HAVING COUNT(1) > 0` | S | Same root cause as B1 — ArgumentMap access without null-check in multiple visitor methods. Fix alongside B1. | +| B17 | **Pagination fallback tests** | 5 | `SELECT * FROM bank LIMIT 5` (with fetchSize) | M | Tests verify fallback behavior. With fallback disabled, they fail. Test-infrastructure issue. | +| B18 | **Type name mismatch** (SQL standard vs OpenSearch) | 1 | `SELECT typeof(age)` → returns `BIGINT` instead of `LONG` | S | Calcite uses SQL standard type names. Map back to OpenSearch names in typeof() implementation. | +| B19 | **LIMIT off-by-one** | 1 | `SELECT point FROM geopoints LIMIT 5` → returns 6 rows | S | Off-by-one in SystemLimit or LIMIT application. Related to B8 (visitLimit missing). | + +**Total Category B failures: ~295** + +--- + +## Summary Statistics + +| Category | Active Failures | Hidden Gaps (0 tests) | Key Blocker | +|----------|----------------|----------------------|-------------| +| A (Grammar) | ~57 | 3 (IN/EXISTS, UNION, correlated) | JOINs (19) — visitor ready, grammar missing | +| B (Visitor) | ~295 | — | ArgumentMap NPE (86), NESTED (33), AggFunc cast (29) | + +## Effort Legend + +- **S** (Small): < 1 day. Null-check, register function alias, fix config. +- **M** (Medium): 1–3 days. New visitor method, function implementation with DSL mapping. +- **L** (Large): 3+ days. Grammar extension, new execution path, architectural change. + +## Quick Wins (S effort, high impact) + +| Fix | Effort | Tests Unlocked | File | +|-----|--------|----------------|------| +| B1+B16: Null-check BUCKET_NULLABLE in visitAggregation | S | ~86 | CalciteRelNodeVisitor.java:1612 | +| B2: instanceof AggregateFunction in visitWindowFunction | S | ~29 | CalciteRexNodeVisitor.java:564 | +| B8+B19: Add visitLimit() override | S | ~10 | CalciteRelNodeVisitor.java (new method) | +| B9: Add ROW_NUMBER/RANK/DENSE_RANK to WINDOW_FUNC_MAPPING | S | ~8 | BuiltinFunctionName.java | +| B7: Register MATCH_QUERY/MATCHQUERY/MATCHPHRASE aliases | S | ~14 | PPLFuncImpTable.java | +| B10: Pass resolved name as single-element list to scan() | S | ~6 | CalciteRelNodeVisitor.java visitRelation | +| B14: Register ISNULL + fix IFNULL null-type | S | ~5 | PPLFuncImpTable.java | +| **Total quick wins** | **~2 days** | **~158 tests** | | + +## Architectural Notes + +1. **JOIN path is ready**: `Join` AST node exists, `CalciteRelNodeVisitor.visitJoin()` is implemented. Only the V2 SQL grammar rule + AstBuilder visitor method are needed to connect SQL JOINs to the existing Calcite path. + +2. **Pagination is the biggest L-effort gap**: B3 (35 failures) requires implementing cursor-based pagination in the Calcite execution engine — a fundamentally new capability. + +3. **Function registration pattern**: Most unresolved function gaps (B6, B7, B11, B14) follow the same pattern — register in `PPLFuncImpTable.java` and map to appropriate Calcite SqlOperator or custom implementation. + +--- + +## Appendix: Fixes Applied for This Analysis + +The following minimal fixes were applied to enable the gap analysis. They are intentionally temporary and non-production-quality. + +### Fix 1: Route SQL through CalciteRelNodeVisitor + +**File**: `core/src/main/java/org/opensearch/sql/executor/QueryService.java` + +```java +// Before: +private boolean shouldUseCalcite(QueryType queryType) { + return isCalciteEnabled(settings) && queryType == QueryType.PPL; +} + +// After: +private boolean shouldUseCalcite(QueryType queryType) { + return isCalciteEnabled(settings) + && (queryType == QueryType.PPL || queryType == QueryType.SQL); +} +``` + +### Fix 2: Enable Calcite + disable fallback in IT tests + +**File**: `integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java` + +```java +// Before: +protected void init() throws Exception { + disableCalcite(); + increaseMaxCompilationsRate(); +} + +// After: +protected void init() throws Exception { + enableCalcite(); + disallowCalciteFallback(); + increaseMaxCompilationsRate(); +} +``` + +### Fix 3: Handle Alias expression in project list + +**File**: `core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java` + +Added `case Alias` to the switch in `expandProjectFields()`: + +```java +case Alias alias -> { + expandedFields.add(rexVisitor.analyze(alias, context)); +} +``` + +This dispatches to the existing `CalciteRexNodeVisitor.visitAlias()` which correctly unwraps the delegated expression and applies the alias name. + +### Fix 4: Return Cursor.None instead of null + +**File**: `opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java` + +```java +// Before: +QueryResponse response = new QueryResponse(schema, values, null); + +// After: +QueryResponse response = new QueryResponse(schema, values, Cursor.None); +``` + +### Fix 5: Disable REST-level V2 → legacy fallback + +**File**: `legacy/src/main/java/org/opensearch/sql/legacy/plugin/RestSQLQueryAction.java` + +Disabled the `SyntaxCheckException` catch in `fallBackListener.onFailure()` to surface all grammar-level failures instead of silently routing to legacy engine. + +--- + +## Appendix B: Function Registration Status + +**278 total functions** in `BuiltinFunctionName`. Summary: + +| Status | Count | Details | +|--------|-------|---------| +| PPLFuncImpTable (registered in Calcite) | 237 | All math, string, date, arithmetic, boolean, aggregate, collection, JSON, crypto | +| visitCast (handled by cast visitor) | 14 | All CAST_TO_* type conversions | +| makeOver (window function utility) | 2 | ROW_NUMBER, NTH_VALUE | +| Special visitor method | 2 | HIGHLIGHT, SCORE (throw unsupported) | +| External (OpenSearchExecutionEngine) | 2 | GEOIP, DISTINCT_COUNT_APPROX | +| GAP IN REPORT (missing, identified) | 13 | See below | +| NOT REGISTERED (missing, not in report) | 8 | See below | + +### Functions Identified as Gaps (13) + +| Function | SQL Name | Gap ID | +|----------|----------|--------| +| NESTED | `nested` | B5 | +| ISNULL | `isnull` | B14 | +| RANK | `rank` | B9 | +| DENSE_RANK | `dense_rank` | B9 | +| MATCHPHRASE | `matchphrase` | B7 | +| MATCHPHRASEQUERY | `matchphrasequery` | B7 | +| QUERY | `query` | B11 | +| MATCH_QUERY | `match_query` | B7 | +| MATCHQUERY | `matchquery` | B7 | +| MULTIMATCH | `multimatch` | B7 | +| MULTIMATCHQUERY | `multimatchquery` | B7 | +| WILDCARDQUERY | `wildcardquery` | B6 | +| WILDCARD_QUERY | `wildcard_query` | B6 | + +### Functions NOT REGISTERED (8) — Not in Gap Report + +| Function | SQL Name | Impact | Notes | +|----------|----------|--------|-------| +| TAN | `tan` | Low | `SqlStdOperatorTable.TAN` exists, trivial to add | +| NOT_LIKE | `not like` | Negligible | Handled by `NOT` + `LIKE` composition at query level | +| SCOREQUERY | `scorequery` | Low | Legacy alias for SCORE (which throws unsupported) | +| SCORE_QUERY | `score_query` | Low | Legacy alias for SCORE | +| BRAIN | `brain` | None for SQL | Internal PPL patterns command function | +| INTERVAL | `interval` | None | Handled implicitly by date arithmetic literals | +| NONE | `none` | None for SQL | PPL convert command no-op | +| INTERNAL_UNCOLLECT_PATTERNS | `uncollect_patterns` | None | Defined but never referenced anywhere | + +### Registered Functions by Category (237 in PPLFuncImpTable) + +| Category | Count | Examples | +|----------|-------|---------| +| Math | 26 | ABS, CEIL, FLOOR, ROUND, SQRT, LOG, EXP, POW, PI, RAND | +| Trigonometric | 11 | ACOS, ASIN, ATAN, ATAN2, COS, COSH, COT, SIN, SINH, DEGREES, RADIANS | +| Arithmetic | 11 | +, -, *, /, %, MOD, ADD, SUBTRACT, MULTIPLY, DIVIDE | +| Boolean/Comparison | 12 | AND, OR, NOT, XOR, =, !=, <, <=, >, >=, LIKE, ILIKE | +| String | 23 | CONCAT, SUBSTRING, TRIM, UPPER, LOWER, LENGTH, REPLACE, REVERSE, LOCATE | +| Date/Time | 63 | NOW, CURDATE, DATE_FORMAT, DATEDIFF, TIMESTAMPADD, EXTRACT, YEAR, MONTH, DAY | +| Aggregate | 18 | AVG, SUM, COUNT, MIN, MAX, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP, PERCENTILE_APPROX | +| Conditional | 8 | IF, IFNULL, NULLIF, COALESCE, IS_NULL, IS_NOT_NULL, IS_PRESENT, IS_EMPTY | +| Collection | 20 | ARRAY, SPLIT, MVAPPEND, MVJOIN, MVINDEX, JSON_EXTRACT, JSON_KEYS | +| JSON | 12 | JSON_VALID, JSON_OBJECT, JSON_ARRAY, JSON_SET, JSON_DELETE | +| Binning | 4 | SPAN_BUCKET, WIDTH_BUCKET, MINSPAN_BUCKET, RANGE_BUCKET | +| Crypto | 3 | MD5, SHA1, SHA2 | +| Relevance | 7 | MATCH, MATCH_PHRASE, MULTI_MATCH, SIMPLE_QUERY_STRING, QUERY_STRING, MATCH_BOOL_PREFIX, MATCH_PHRASE_PREFIX | +| Convert/Type | 10 | AUTO, NUM, CTIME, MKTIME, TYPEOF, TOSTRING, TONUMBER | +| Internal | 9 | ITEM, PATTERN_PARSER, GROK, PARSE, REGEXP_REPLACE | +| Other | 2 | CIDRMATCH, SPAN | From a0b607035d7f050ab73e8bf71e4e42f297938cf8 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 12 May 2026 16:03:02 -0700 Subject: [PATCH 03/11] feat(calcite): fix SQL-specific patterns in CalciteRelNodeVisitor - Null-check BUCKET_NULLABLE in visitAggregation (B1) - Handle AggregateFunction in visitWindowFunction (B2) - Add visitLimit override for SQL LIMIT clause (B8) - Add RANK/DENSE_RANK/ROW_NUMBER to WINDOW_FUNC_MAPPING (B9) - Fix table name resolution to pass single-element list (B10) - Register relevance function aliases: MATCH_QUERY, MATCHQUERY, etc (B7) - Register ISNULL, fix IFNULL to use ENHANCED_COALESCE (B14) --- .../sql/calcite/CalciteRelNodeVisitor.java | 14 +++++++++-- .../sql/calcite/CalciteRexNodeVisitor.java | 25 +++++++++++++------ .../function/BuiltinFunctionName.java | 3 +++ .../expression/function/PPLFuncImpTable.java | 20 ++++++++++++++- 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 3d5bf4f5a8e..fc9e6254c96 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -132,6 +132,7 @@ import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; +import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Lookup; import org.opensearch.sql.ast.tree.Lookup.OutputStrategy; import org.opensearch.sql.ast.tree.ML; @@ -246,7 +247,7 @@ public RelNode visitRelation(Relation node, CalcitePlanContext context) { if (nameResolver.getSchemaName().equals(INFORMATION_SCHEMA_NAME)) { throw new CalciteUnsupportedException("information_schema is unsupported in Calcite"); } - context.relBuilder.scan(node.getTableQualifiedName().getParts()); + context.relBuilder.scan(List.of(nameResolver.getIdentifierName())); RelNode scan = context.relBuilder.peek(); // Eagerly push down highlight config to the scan (highlight is a scan hint, not an operator) @@ -766,6 +767,13 @@ public RelNode visitHead(Head node, CalcitePlanContext context) { return context.relBuilder.peek(); } + @Override + public RelNode visitLimit(Limit node, CalcitePlanContext context) { + visitChildren(node, context); + context.relBuilder.limit(node.getOffset(), node.getLimit()); + return context.relBuilder.peek(); + } + /** * Insert a reversed sort node after finding the original sort in the tree. This rebuilds the tree * with the reversed sort inserted right after the original sort. @@ -1624,7 +1632,9 @@ private Pair, List> resolveAttributesForAggregation( @Override public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) { Argument.ArgumentMap statsArgs = Argument.ArgumentMap.of(node.getArgExprList()); - Boolean bucketNullable = (Boolean) statsArgs.get(Argument.BUCKET_NULLABLE).getValue(); + Literal bucketNullableLit = statsArgs.get(Argument.BUCKET_NULLABLE); + Boolean bucketNullable = + bucketNullableLit != null ? (Boolean) bucketNullableLit.getValue() : true; int nGroup = node.getGroupExprList().size() + (Objects.nonNull(node.getSpan()) ? 1 : 0); BitSet nonNullGroupMask = new BitSet(nGroup); if (!bucketNullable) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index c2ce4a740ec..78047b70b51 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -42,6 +42,7 @@ import org.apache.calcite.util.TimestampString; import org.apache.logging.log4j.util.Strings; import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.expression.AggregateFunction; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.And; import org.opensearch.sql.ast.expression.Between; @@ -563,15 +564,27 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) { @Override public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext context) { - Function windowFunction = (Function) node.getFunction(); - List arguments = - windowFunction.getFuncArgs().stream().map(arg -> analyze(arg, context)).toList(); + String funcName; + List arguments; + if (node.getFunction() instanceof AggregateFunction aggFunc) { + funcName = aggFunc.getFuncName(); + List argExprs = new java.util.ArrayList<>(); + if (aggFunc.getField() != null) { + argExprs.add(aggFunc.getField()); + } + argExprs.addAll(aggFunc.getArgList()); + arguments = argExprs.stream().map(arg -> analyze(arg, context)).toList(); + } else { + Function windowFunction = (Function) node.getFunction(); + funcName = windowFunction.getFuncName(); + arguments = windowFunction.getFuncArgs().stream().map(arg -> analyze(arg, context)).toList(); + } List partitions = node.getPartitionByList().stream() .map(arg -> analyze(arg, context)) .map(this::extractRexNodeFromAlias) .toList(); - return BuiltinFunctionName.ofWindowFunction(windowFunction.getFuncName()) + return BuiltinFunctionName.ofWindowFunction(funcName) .map( functionName -> { RexNode field = arguments.isEmpty() ? null : arguments.getFirst(); @@ -601,9 +614,7 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte node.getWindowFrame()); }) .orElseThrow( - () -> - new UnsupportedOperationException( - "Unexpected window function: " + windowFunction.getFuncName())); + () -> new UnsupportedOperationException("Unexpected window function: " + funcName)); } /** extract the expression of Alias from a node */ diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 14f058a75d0..7c6545c5d66 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -425,6 +425,9 @@ public enum BuiltinFunctionName { .put("dc", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("distinct_count", BuiltinFunctionName.DISTINCT_COUNT_APPROX) .put("pattern", BuiltinFunctionName.INTERNAL_PATTERN) + .put("row_number", BuiltinFunctionName.ROW_NUMBER) + .put("rank", BuiltinFunctionName.RANK) + .put("dense_rank", BuiltinFunctionName.DENSE_RANK) .build(); public static Optional of(String str) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 849c60fe4eb..dd0d0f3b52c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -92,6 +92,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4; import static org.opensearch.sql.expression.function.BuiltinFunctionName.INTERNAL_TRANSLATE3; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.ISNULL; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_BLANK; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_EMPTY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.IS_NOT_NULL; @@ -133,9 +134,13 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_CONCAT; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAP_REMOVE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHPHRASE; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHPHRASEQUERY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCHQUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_BOOL_PREFIX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_PHRASE_PREFIX; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MATCH_QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MAX; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MD5; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MEDIAN; @@ -154,6 +159,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTHNAME; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MONTH_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MSTIME; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIMATCH; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIMATCHQUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTIPLYFUNCTION; import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH; @@ -178,6 +185,7 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.POW; import static org.opensearch.sql.expression.function.BuiltinFunctionName.POWER; import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUARTER; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.QUERY_STRING; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RADIANS; import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND; @@ -854,7 +862,7 @@ void populate() { registerOperator(SIN, SqlStdOperatorTable.SIN); registerOperator(CBRT, SqlStdOperatorTable.CBRT); - registerOperator(IFNULL, SqlStdOperatorTable.COALESCE); + registerOperator(IFNULL, PPLBuiltinOperators.ENHANCED_COALESCE); registerOperator(EARLIEST, PPLBuiltinOperators.EARLIEST); registerOperator(LATEST, PPLBuiltinOperators.LATEST); registerOperator(COALESCE, PPLBuiltinOperators.ENHANCED_COALESCE); @@ -908,6 +916,14 @@ void populate() { registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING); registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING); registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH); + // Legacy relevance function aliases + registerOperator(MATCH_QUERY, PPLBuiltinOperators.MATCH); + registerOperator(MATCHQUERY, PPLBuiltinOperators.MATCH); + registerOperator(MATCHPHRASE, PPLBuiltinOperators.MATCH_PHRASE); + registerOperator(MATCHPHRASEQUERY, PPLBuiltinOperators.MATCH_PHRASE); + registerOperator(MULTIMATCH, PPLBuiltinOperators.MULTI_MATCH); + registerOperator(MULTIMATCHQUERY, PPLBuiltinOperators.MULTI_MATCH); + registerOperator(QUERY, PPLBuiltinOperators.QUERY_STRING); registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT); registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI); registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET); @@ -1147,6 +1163,8 @@ void populate() { IS_PRESENT, SqlStdOperatorTable.IS_NOT_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); registerOperator( IS_NULL, SqlStdOperatorTable.IS_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); + registerOperator( + ISNULL, SqlStdOperatorTable.IS_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE)); // Register implementation. // Note, make the implementation an individual class if too complex. From a507b1f86da032b691c74e62563dd765f7d36e4e Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 12 May 2026 16:03:25 -0700 Subject: [PATCH 04/11] feat(sql): extend V2 grammar with JOIN, UNION, IN/EXISTS subquery - Add joinClause and joinType rules to fromClause - Add UNION ALL / EXCEPT to selectStatement - Add IN subquery predicate alternative - Add EXISTS expression atom - Default AstBuilder throws SyntaxCheckException (preserves legacy fallback) - Add createExpressionBuilder() factory for subclass override - Make visitAstExpression protected for subclass access --- sql/src/main/antlr/OpenSearchSQLParser.g4 | 19 ++++++++- .../opensearch/sql/sql/parser/AstBuilder.java | 41 +++++++++++++++++-- .../sql/sql/parser/AstExpressionBuilder.java | 14 +++++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 6b34507eacc..aa0c4f383ca 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -54,7 +54,9 @@ dmlStatement // Primary DML Statements selectStatement - : querySpecification # simpleSelect + : querySpecification # simpleSelect + | querySpecification UNION ALL? querySpecification (UNION ALL? querySpecification)* # unionSelect + | querySpecification EXCEPT querySpecification # exceptSelect ; adminStatement @@ -104,10 +106,21 @@ selectElement ; fromClause - : FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY + : FROM relation joinClause* (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY ; +joinClause + : joinType? JOIN relation (ON expression)? + ; + +joinType + : INNER + | LEFT OUTER? + | RIGHT OUTER? + | CROSS + ; + relation : tableName (AS? alias)? # tableAsRelation | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation @@ -302,6 +315,7 @@ predicate | left = predicate NOT? LIKE right = predicate # likePredicate | left = predicate REGEXP right = predicate # regexpPredicate | predicate NOT? IN '(' expressions ')' # inPredicate + | predicate NOT? IN '(' selectStatement ')' # inSubqueryPredicate ; expressions @@ -313,6 +327,7 @@ expressionAtom | columnName # fullColumnNameExpressionAtom | functionCall # functionCallExpressionAtom | LR_BRACKET expression RR_BRACKET # nestedExpressionAtom + | EXISTS '(' selectStatement ')' # existsExpressionAtom | left = expressionAtom mathOperator = (STAR | SLASH | MODULE) right = expressionAtom # mathExpressionAtom | left = expressionAtom mathOperator = (PLUS | MINUS) right = expressionAtom # mathExpressionAtom ; diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java index 5250ab7fb0f..659e7140232 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.Locale; import java.util.Optional; -import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.expression.Alias; import org.opensearch.sql.ast.expression.AllFields; @@ -32,6 +31,7 @@ import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.tree.DescribeRelation; import org.opensearch.sql.ast.tree.Filter; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Limit; import org.opensearch.sql.ast.tree.Project; import org.opensearch.sql.ast.tree.Relation; @@ -50,10 +50,18 @@ import org.opensearch.sql.sql.parser.context.ParsingContext; /** Abstract syntax tree (AST) builder. */ -@RequiredArgsConstructor public class AstBuilder extends OpenSearchSQLParserBaseVisitor { - private final AstExpressionBuilder expressionBuilder = new AstExpressionBuilder(); + private final AstExpressionBuilder expressionBuilder = createExpressionBuilder(); + + /** Factory method for expression builder. Override to provide subquery-capable builder. */ + protected AstExpressionBuilder createExpressionBuilder() { + return new AstExpressionBuilder(); + } + + public AstBuilder(String query) { + this.query = query; + } /** Parsing context stack that contains context for current query parsing. */ private final ParsingContext context = new ParsingContext(); @@ -139,6 +147,13 @@ public UnresolvedPlan visitLimitClause(OpenSearchSQLParser.LimitClauseContext ct public UnresolvedPlan visitFromClause(FromClauseContext ctx) { UnresolvedPlan result = visit(ctx.relation()); + // Let ANTLR visitor dispatch handle each joinClause + for (var joinCtx : ctx.joinClause()) { + UnresolvedPlan joinPlan = visit(joinCtx); + ((Join) joinPlan).attach(result); + result = joinPlan; + } + if (ctx.whereClause() != null) { result = visit(ctx.whereClause()).attach(result); } @@ -163,6 +178,24 @@ public UnresolvedPlan visitFromClause(FromClauseContext ctx) { return result; } + @Override + public UnresolvedPlan visitJoinClause(OpenSearchSQLParser.JoinClauseContext ctx) { + throw new SyntaxCheckException( + "JOIN is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitUnionSelect(OpenSearchSQLParser.UnionSelectContext ctx) { + throw new SyntaxCheckException( + "UNION is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedPlan visitExceptSelect(OpenSearchSQLParser.ExceptSelectContext ctx) { + throw new SyntaxCheckException( + "EXCEPT is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + /** * Ensure NESTED function is not used in HAVING clause and fallback to legacy engine. Can remove * when support is added for NESTED function in HAVING clause. @@ -261,7 +294,7 @@ protected UnresolvedPlan aggregateResult(UnresolvedPlan aggregate, UnresolvedPla return nextResult != null ? nextResult : aggregate; } - private UnresolvedExpression visitAstExpression(ParseTree tree) { + protected UnresolvedExpression visitAstExpression(ParseTree tree) { return expressionBuilder.visit(tree); } diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index 346ef6660d7..0058cc4369c 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -290,6 +290,20 @@ public UnresolvedExpression visitInPredicate(InPredicateContext ctx) { return ctx.NOT() != null ? AstDSL.not(in) : in; } + @Override + public UnresolvedExpression visitInSubqueryPredicate( + OpenSearchSQLParser.InSubqueryPredicateContext ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "IN subquery is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + + @Override + public UnresolvedExpression visitExistsExpressionAtom( + OpenSearchSQLParser.ExistsExpressionAtomContext ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "EXISTS is not supported in the V2 SQL engine. Falling back to legacy engine."); + } + @Override public UnresolvedExpression visitAndExpression(AndExpressionContext ctx) { return new And(visit(ctx.left), visit(ctx.right)); From 04efc5a3fd105a47306dafbab11c0ddb791c72ce Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Tue, 12 May 2026 16:03:34 -0700 Subject: [PATCH 05/11] feat(sql): PoC override AstBuilder for Calcite path (JOIN/UNION/IN/EXISTS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Demonstrates the pattern: override AstBuilder in SQLService to build AST nodes for features gated in the default path. In production, this override moves to the unified query API's CustomVisitorStrategy. QueryService.shouldUseCalcite() temporarily accepts SQL for PoC testing. This flag is not for production — SQL should route through unified query API (RestUnifiedQueryAction) in the final integration. --- .../opensearch/sql/executor/QueryService.java | 6 +- .../org/opensearch/sql/sql/SQLService.java | 92 ++++++++++++++++++- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/executor/QueryService.java b/core/src/main/java/org/opensearch/sql/executor/QueryService.java index 9e829d68c3a..cd91c6b9a6e 100644 --- a/core/src/main/java/org/opensearch/sql/executor/QueryService.java +++ b/core/src/main/java/org/opensearch/sql/executor/QueryService.java @@ -358,10 +358,10 @@ private boolean isCalciteEnabled(Settings settings) { } // TODO https://github.com/opensearch-project/sql/issues/3457 - // Route SQL V2 AST through CalciteRelNodeVisitor for gap analysis + // TODO: PoC only — in production, SQL routes through unified query API (RestUnifiedQueryAction), + // not through this flag. Remove this change when migrating to the unified query path. private boolean shouldUseCalcite(QueryType queryType) { - return isCalciteEnabled(settings) - && (queryType == QueryType.PPL || queryType == QueryType.SQL); + return isCalciteEnabled(settings) && (queryType == QueryType.PPL || queryType == QueryType.SQL); } private FrameworkConfig buildFrameworkConfig() { diff --git a/sql/src/main/java/org/opensearch/sql/sql/SQLService.java b/sql/src/main/java/org/opensearch/sql/sql/SQLService.java index d51fa1c898d..1a5ead6d3f8 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/SQLService.java +++ b/sql/src/main/java/org/opensearch/sql/sql/SQLService.java @@ -9,7 +9,11 @@ import lombok.RequiredArgsConstructor; import org.antlr.v4.runtime.tree.ParseTree; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.UnresolvedExpression; import org.opensearch.sql.ast.statement.Statement; +import org.opensearch.sql.ast.tree.Join; +import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.common.response.ResponseListener; import org.opensearch.sql.executor.ExecutionEngine.ExplainResponse; import org.opensearch.sql.executor.ExecutionEngine.QueryResponse; @@ -20,6 +24,7 @@ import org.opensearch.sql.sql.antlr.SQLSyntaxParser; import org.opensearch.sql.sql.domain.SQLQueryRequest; import org.opensearch.sql.sql.parser.AstBuilder; +import org.opensearch.sql.sql.parser.AstExpressionBuilder; import org.opensearch.sql.sql.parser.AstStatementBuilder; /** SQL service. */ @@ -95,7 +100,92 @@ private AbstractPlan plan( Statement statement = cst.accept( new AstStatementBuilder( - new AstBuilder(request.getQuery()), + new AstBuilder(request.getQuery()) { + @Override + protected AstExpressionBuilder createExpressionBuilder() { + final var planBuilder = this; + return new AstExpressionBuilder() { + @Override + public UnresolvedExpression visitInSubqueryPredicate( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser + .InSubqueryPredicateContext + ctx) { + UnresolvedExpression field = visit(ctx.predicate()); + UnresolvedPlan subquery = planBuilder.visit(ctx.selectStatement()); + UnresolvedExpression inSub = + new org.opensearch.sql.ast.expression.subquery.InSubquery( + java.util.List.of(field), subquery); + return ctx.NOT() != null + ? org.opensearch.sql.ast.dsl.AstDSL.not(inSub) + : inSub; + } + + @Override + public UnresolvedExpression visitExistsExpressionAtom( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser + .ExistsExpressionAtomContext + ctx) { + UnresolvedPlan subquery = planBuilder.visit(ctx.selectStatement()); + return new org.opensearch.sql.ast.expression.subquery.ExistsSubquery( + subquery); + } + }; + } + + @Override + public UnresolvedPlan visitJoinClause( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext + ctx) { + UnresolvedPlan rightRelation = visit(ctx.relation()); + Join.JoinType joinType = resolveJoinType(ctx.joinType()); + java.util.Optional joinCondition = + ctx.expression() != null + ? java.util.Optional.of(visitAstExpression(ctx.expression())) + : java.util.Optional.empty(); + return new Join( + rightRelation, + java.util.Optional.empty(), + java.util.Optional.empty(), + joinType, + joinCondition, + new Join.JoinHint(), + java.util.Optional.empty(), + Argument.ArgumentMap.empty()); + } + + private Join.JoinType resolveJoinType( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinTypeContext + ctx) { + if (ctx == null) return Join.JoinType.INNER; + if (ctx.LEFT() != null) return Join.JoinType.LEFT; + if (ctx.RIGHT() != null) return Join.JoinType.RIGHT; + if (ctx.CROSS() != null) return Join.JoinType.CROSS; + return Join.JoinType.INNER; + } + + @Override + public UnresolvedPlan visitUnionSelect( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.UnionSelectContext + ctx) { + java.util.List plans = + ctx.querySpecification().stream() + .map(this::visit) + .collect(java.util.stream.Collectors.toList()); + UnresolvedPlan first = plans.remove(0); + org.opensearch.sql.ast.tree.Union union = + new org.opensearch.sql.ast.tree.Union(plans, 0); + union.attach(first); + return union; + } + + @Override + public UnresolvedPlan visitExceptSelect( + org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.ExceptSelectContext + ctx) { + throw new org.opensearch.sql.common.antlr.SyntaxCheckException( + "EXCEPT is not yet supported. Falling back to legacy engine."); + } + }, AstStatementBuilder.StatementBuilderContext.builder() .isExplain(isExplainRequest) .fetchSize(request.getFetchSize()) From 71fe20f000ae6495f9cb83f435b9abfe96bfd376 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 16:12:42 -0700 Subject: [PATCH 06/11] feat(calcite): add window ORDER BY, RANK/DENSE_RANK, distinct, and alias encoding - Pass window function sortList as orderKeys to PlanUtils.makeOver - Add RANK and DENSE_RANK cases to PlanUtils.makeOver switch - Add makeOver overload with distinct parameter for COUNT(DISTINCT) OVER - Bypass aggFunctionRegistry validation for pure window functions - Encode expression name + alias in Calcite field name for SQL queries - Decode alias in OpenSearchExecutionEngine response schema builder - Fix NPE in ANY-type lookup when alias is present --- .../sql/calcite/CalciteRelNodeVisitor.java | 9 ++- .../sql/calcite/CalciteRexNodeVisitor.java | 56 +++++++++++++++++-- .../sql/calcite/utils/PlanUtils.java | 31 +++++++++- .../executor/OpenSearchExecutionEngine.java | 23 ++++++-- 4 files changed, 108 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index fc9e6254c96..48a93085e4b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -543,7 +543,14 @@ private List expandProjectFields( .forEach(field -> expandedFields.add(context.relBuilder.field(field))); } case Alias alias -> { - expandedFields.add(rexVisitor.analyze(alias, context)); + String aliasName = + Strings.isNullOrEmpty(alias.getAlias()) ? alias.getName() : alias.getAlias(); + if (alias.getDelegated() instanceof AggregateFunction + && currentFields.contains(aliasName)) { + expandedFields.add(context.relBuilder.field(aliasName)); + } else { + expandedFields.add(rexVisitor.analyze(alias, context)); + } } default -> throw new IllegalStateException( diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java index 78047b70b51..b89b831cc57 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java @@ -18,6 +18,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; import javax.annotation.Nullable; @@ -84,6 +85,7 @@ import org.opensearch.sql.exception.CalciteUnsupportedException; import org.opensearch.sql.exception.ExpressionEvaluationException; import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.executor.QueryType; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.PPLFuncImpTable; @@ -91,6 +93,10 @@ public class CalciteRexNodeVisitor extends AbstractNodeVisitor { private final CalciteRelNodeVisitor planVisitor; + private static final Set PURE_WINDOW_FUNCTIONS = + Set.of( + BuiltinFunctionName.ROW_NUMBER, BuiltinFunctionName.RANK, BuiltinFunctionName.DENSE_RANK); + public RexNode analyze(UnresolvedExpression unresolved, CalcitePlanContext context) { return unresolved.accept(this, context); } @@ -339,8 +345,14 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context public RexNode visitAlias(Alias node, CalcitePlanContext context) { RexNode expr = analyze(node.getDelegated(), context); // Only OpenSearch SQL uses node.getAlias, OpenSearch PPL uses node.getName. - return context.relBuilder.alias( - expr, Strings.isEmpty(node.getAlias()) ? node.getName() : node.getAlias()); + String aliasName = Strings.isEmpty(node.getAlias()) ? node.getName() : node.getAlias(); + // For SQL queries, encode expression name and alias in field name so the response + // builder can reconstruct both (V2 compatibility: name=expr, alias=AS-alias). + if (context.queryType == QueryType.SQL && !Strings.isEmpty(node.getAlias())) { + String exprName = node.getName() != null ? node.getName() : aliasName; + aliasName = exprName + "\u0000" + node.getAlias(); + } + return context.relBuilder.alias(expr, aliasName); } @Override @@ -566,8 +578,10 @@ public RexNode visitFunction(Function node, CalcitePlanContext context) { public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext context) { String funcName; List arguments; + final boolean isDistinct; if (node.getFunction() instanceof AggregateFunction aggFunc) { funcName = aggFunc.getFuncName(); + isDistinct = Boolean.TRUE.equals(aggFunc.getDistinct()); List argExprs = new java.util.ArrayList<>(); if (aggFunc.getField() != null) { argExprs.add(aggFunc.getField()); @@ -577,6 +591,7 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte } else { Function windowFunction = (Function) node.getFunction(); funcName = windowFunction.getFuncName(); + isDistinct = false; arguments = windowFunction.getFuncArgs().stream().map(arg -> analyze(arg, context)).toList(); } List partitions = @@ -584,6 +599,25 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte .map(arg -> analyze(arg, context)) .map(this::extractRexNodeFromAlias) .toList(); + List orderKeys = + node.getSortList().stream() + .map( + pair -> { + RexNode sortField = analyze(pair.getRight(), context); + if (pair.getLeft().getSortOrder() + == org.opensearch.sql.ast.tree.Sort.SortOrder.DESC) { + sortField = context.relBuilder.desc(sortField); + } + if (pair.getLeft().getNullOrder() + == org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST) { + sortField = context.relBuilder.nullsLast(sortField); + } else if (pair.getLeft().getNullOrder() + == org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST) { + sortField = context.relBuilder.nullsFirst(sortField); + } + return sortField; + }) + .toList(); return BuiltinFunctionName.ofWindowFunction(funcName) .map( functionName -> { @@ -592,6 +626,18 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte (arguments.isEmpty() || arguments.size() == 1) ? Collections.emptyList() : arguments.subList(1, arguments.size()); + // Pure window functions (ROW_NUMBER, RANK, DENSE_RANK) are not registered + // in aggFunctionRegistry, so skip validation for them. + if (PURE_WINDOW_FUNCTIONS.contains(functionName)) { + return PlanUtils.makeOver( + context, + functionName, + field, + args, + partitions, + orderKeys, + node.getWindowFrame()); + } List nodes = PPLFuncImpTable.INSTANCE.validateAggFunctionSignature( functionName, field, args, context.rexBuilder); @@ -599,18 +645,20 @@ public RexNode visitWindowFunction(WindowFunction node, CalcitePlanContext conte ? PlanUtils.makeOver( context, functionName, + isDistinct, nodes.getFirst(), nodes.size() <= 1 ? Collections.emptyList() : nodes.subList(1, nodes.size()), partitions, - List.of(), + orderKeys, node.getWindowFrame()) : PlanUtils.makeOver( context, functionName, + isDistinct, field, args, partitions, - List.of(), + orderKeys, node.getWindowFrame()); }) .orElseThrow( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 4d2dae4bd60..d828f508862 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -171,6 +171,19 @@ static RexNode makeOver( List partitions, List orderKeys, @Nullable WindowFrame windowFrame) { + return makeOver( + context, functionName, false, field, argList, partitions, orderKeys, windowFrame); + } + + static RexNode makeOver( + CalcitePlanContext context, + BuiltinFunctionName functionName, + boolean distinct, + RexNode field, + List argList, + List partitions, + List orderKeys, + @Nullable WindowFrame windowFrame) { if (windowFrame == null) { windowFrame = WindowFrame.rowsUnbounded(); } @@ -216,6 +229,22 @@ static RexNode makeOver( true, lowerBound, upperBound); + case RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); + case DENSE_RANK: + return withOver( + context.relBuilder.aggregateCall(SqlStdOperatorTable.DENSE_RANK), + partitions, + orderKeys, + true, + lowerBound, + upperBound); case NTH_VALUE: return withOver( context.relBuilder.aggregateCall(SqlStdOperatorTable.NTH_VALUE, field, argList.get(0)), @@ -226,7 +255,7 @@ static RexNode makeOver( upperBound); default: return withOver( - makeAggCall(context, functionName, false, field, argList), + makeAggCall(context, functionName, distinct, field, argList), partitions, orderKeys, rows, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java index 2a29b52e837..96cc14e0f69 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/executor/OpenSearchExecutionEngine.java @@ -292,11 +292,14 @@ private QueryResponse buildResultSet( Map row = new LinkedHashMap(); // Loop through each column for (int i = 1; i <= columnCount; i++) { - String columnName = metaData.getColumnName(i); - Object value = resultSet.getObject(columnName); + String rawColumnName = metaData.getColumnName(i); + // Use alias as row key if encoded (matches getColumnName in QueryResult) + int sepIdx = rawColumnName.indexOf('\0'); + String rowKey = sepIdx >= 0 ? rawColumnName.substring(sepIdx + 1) : rawColumnName; + Object value = resultSet.getObject(rawColumnName); Object converted = processValue(value, fieldTypes.get(i - 1)); ExprValue exprValue = ExprValueUtils.fromObjectValue(converted); - row.put(columnName, exprValue); + row.put(rowKey, exprValue); } values.add(ExprTupleValue.fromExprValueMap(row)); } @@ -304,6 +307,14 @@ private QueryResponse buildResultSet( List columns = new ArrayList<>(metaData.getColumnCount()); for (int i = 1; i <= columnCount; ++i) { String columnName = metaData.getColumnName(i); + // Decode expression name and alias encoded by CalciteRexNodeVisitor.visitAlias + // Format: "exprName\0alias" for SQL queries with AS-alias + String alias = null; + int sep = columnName.indexOf('\0'); + if (sep >= 0) { + alias = columnName.substring(sep + 1); + columnName = columnName.substring(0, sep); + } RelDataType fieldType = fieldTypes.get(i - 1); // TODO: Correct this after fixing issue github.com/opensearch-project/sql/issues/3751 // The element type of struct and array is currently set to ANY. @@ -311,7 +322,9 @@ private QueryResponse buildResultSet( ExprType exprType; if (fieldType.getSqlTypeName() == SqlTypeName.ANY) { if (!values.isEmpty()) { - exprType = values.getFirst().tupleValue().get(columnName).type(); + // Row map is keyed by alias (if present) or columnName + String lookupKey = alias != null ? alias : columnName; + exprType = values.getFirst().tupleValue().get(lookupKey).type(); } else { // Using UNDEFINED instead of UNKNOWN to avoid throwing exception exprType = ExprCoreType.UNDEFINED; @@ -319,7 +332,7 @@ private QueryResponse buildResultSet( } else { exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(fieldType); } - columns.add(new Column(columnName, null, exprType)); + columns.add(new Column(columnName, alias, exprType)); } Schema schema = new Schema(columns); QueryResponse response = new QueryResponse(schema, values, Cursor.None); From 732cdafb1ebfc848c35c1fe2d46d3675a892f7b0 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 16:12:52 -0700 Subject: [PATCH 07/11] feat(calcite): make typeof() QueryType-aware and bypass AVG type validation for SQL - Add ThreadLocal to CalcitePlanContext with cleanup in finally - typeof() now uses getCurrentQueryType() instead of hardcoded PPL - SQL queries bypass strict PPL type validation for aggregation functions - Allows AVG on TIMESTAMP/DATE/TIME in SQL (Calcite handles coercion) --- .../opensearch/sql/calcite/CalcitePlanContext.java | 13 +++++++++++++ .../sql/expression/function/PPLFuncImpTable.java | 12 +++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java index 5f58dea227e..c2783804e07 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java @@ -46,6 +46,10 @@ public class CalcitePlanContext { private static final ThreadLocal legacyPreferredFlag = ThreadLocal.withInitial(() -> true); + /** Thread-local QueryType for use by functions that lack access to CalcitePlanContext. */ + private static final ThreadLocal currentQueryType = + ThreadLocal.withInitial(() -> QueryType.PPL); + @Getter @Setter private HighlightConfig highlightConfig; @Getter @Setter private boolean isResolvingJoinCondition = false; @Getter @Setter private boolean isResolvingSubquery = false; @@ -78,6 +82,7 @@ private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType this.config = config; this.sysLimit = sysLimit; this.queryType = queryType; + currentQueryType.set(queryType); this.connection = CalciteToolsHelper.connect(config, TYPE_FACTORY); this.relBuilder = CalciteToolsHelper.create(config, TYPE_FACTORY, connection); this.rexBuilder = new ExtendedRexBuilder(relBuilder.getRexBuilder()); @@ -158,6 +163,7 @@ public static void run(Runnable action, Settings settings) { action.run(); } finally { legacyPreferredFlag.remove(); + currentQueryType.remove(); } } @@ -168,6 +174,13 @@ public static boolean isLegacyPreferred() { return legacyPreferredFlag.get(); } + /** + * @return the current QueryType from thread-local context. + */ + public static QueryType getCurrentQueryType() { + return currentQueryType.get(); + } + public void putRexLambdaRefMap(Map candidateMap) { this.rexLambdaRefMap.putAll(candidateMap); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index dd0d0f3b52c..673d0f1ad71 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -262,6 +262,8 @@ import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEKOFYEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WEEK_OF_YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.WIDTH_BUCKET; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WILDCARDQUERY; +import static org.opensearch.sql.expression.function.BuiltinFunctionName.WILDCARD_QUERY; import static org.opensearch.sql.expression.function.BuiltinFunctionName.XOR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEAR; import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK; @@ -514,6 +516,11 @@ static List validateFunctionArgs( coercionNodes = CoercionUtils.castArguments(rexBuilder, signature.typeChecker(), fields); } if (coercionNodes == null) { + // For SQL queries, skip strict type validation and let Calcite handle type coercion + // (e.g., AVG on TIMESTAMP is valid in SQL but not in PPL). + if (CalcitePlanContext.getCurrentQueryType() == QueryType.SQL) { + return null; + } String errorMessagePattern = argTypes.size() <= 1 ? "Aggregation function %s expects field type {%s}, but got %s" @@ -924,6 +931,8 @@ void populate() { registerOperator(MULTIMATCH, PPLBuiltinOperators.MULTI_MATCH); registerOperator(MULTIMATCHQUERY, PPLBuiltinOperators.MULTI_MATCH); registerOperator(QUERY, PPLBuiltinOperators.QUERY_STRING); + registerOperator(WILDCARD_QUERY, PPLBuiltinOperators.WILDCARD_QUERY); + registerOperator(WILDCARDQUERY, PPLBuiltinOperators.WILDCARD_QUERY); registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT); registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI); registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET); @@ -1266,7 +1275,8 @@ void populate() { TYPEOF, (FunctionImp1) (builder, arg) -> - builder.makeLiteral(getLegacyTypeName(arg.getType(), QueryType.PPL)), + builder.makeLiteral( + getLegacyTypeName(arg.getType(), CalcitePlanContext.getCurrentQueryType())), null); register( NULLIF, From c1359a7bad719afd883c9da68a2be53e1cadf361 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 16:12:59 -0700 Subject: [PATCH 08/11] feat(functions): register wildcard_query operator and percentile window mapping - Add WILDCARD_QUERY and WILDCARDQUERY to PPLFuncImpTable - Add wildcard_query UDF in PPLBuiltinOperators - Add percentile/percentile_approx to WINDOW_FUNC_MAPPING --- .../opensearch/sql/expression/function/BuiltinFunctionName.java | 2 ++ .../opensearch/sql/expression/function/PPLBuiltinOperators.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index 7c6545c5d66..ddefaa3fec0 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -428,6 +428,8 @@ public enum BuiltinFunctionName { .put("row_number", BuiltinFunctionName.ROW_NUMBER) .put("rank", BuiltinFunctionName.RANK) .put("dense_rank", BuiltinFunctionName.DENSE_RANK) + .put("percentile", BuiltinFunctionName.PERCENTILE_APPROX) + .put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX) .build(); public static Optional of(String str) { diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java index 0a5b0fe0e03..3ec1000002c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java @@ -424,6 +424,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable { RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("query_string", false); public static final SqlOperator MULTI_MATCH = RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("multi_match", false); + public static final SqlOperator WILDCARD_QUERY = + RELEVANCE_QUERY_FUNCTION_INSTANCE.toUDF("wildcard_query"); public static final SqlOperator NUMBER_TO_STRING = new NumberToStringFunction().toUDF("NUMBER_TO_STRING"); public static final SqlOperator TONUMBER = new ToNumberFunction().toUDF("TONUMBER"); From ddfc369ca091437706b09dba8fbdadef4f021b87 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 16:13:03 -0700 Subject: [PATCH 09/11] docs: add complete SQL V2 Calcite gap analysis table --- .../sql-v2-calcite-complete-gap-table.md | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 docs/report/sql-v2-calcite-complete-gap-table.md diff --git a/docs/report/sql-v2-calcite-complete-gap-table.md b/docs/report/sql-v2-calcite-complete-gap-table.md new file mode 100644 index 00000000000..d170bce056a --- /dev/null +++ b/docs/report/sql-v2-calcite-complete-gap-table.md @@ -0,0 +1,68 @@ +# SQL V2 + CalciteRelNodeVisitor: Complete Gap Table + +**Date**: 2026-05-12 +**Pass Rate**: 650 / 964 (67.4%) with S-effort fixes + grammar extensions applied +**Branch**: https://github.com/dai-chen/sql-1/tree/sql-v2-calcite-gap-analysis + +## Complete SQL Gap Table + +| # | Gap | Failures | Example Query | SQL-Only? | Origin | Status | +|---|-----|----------|---------------|-----------|--------|--------| +| **Grammar Gaps (cannot parse)** | | | | | | | +| 1 | SHOW/DESCRIBE with unquoted identifiers | 11 | `SHOW TABLES LIKE opensearch-sql_test_index_account` | ✅ SQL-only | SQL Plugin | Grammar expects STRING_LITERAL | +| 2 | Comma-join / nested FROM (PartiQL) | 5 | `SELECT * FROM nested_type e, e.message m` | ✅ SQL-only | SQL Plugin | Grammar gap | +| 3 | PERCENTILES / STATS / EXTENDED_STATS | 5 | `SELECT PERCENTILES(age, 25.0, 50.0) FROM account` | ✅ SQL-only | SQL Plugin | Legacy functions not in grammar | +| 4 | Index names with dots | 3 | `SELECT * FROM test-logs-2025.01.01` | ✅ SQL-only | SQL Plugin | Use backticks | +| 5 | Legacy method syntax (REGEXP_QUERY, wildcardQuery) | 3 | `SELECT * FROM account WHERE address=REGEXP_QUERY('.*')` | ✅ SQL-only | SQL Plugin | Legacy syntax | +| 6 | Qualified wildcard `m.*` | 2 | `SELECT m.* FROM nested_type e, e.message m` | ✅ SQL-only | SQL Plugin | Grammar gap | +| 7 | Index/type syntax | 1 | `SELECT * FROM account/wrongType` | ✅ SQL-only | SQL Plugin | Legacy syntax | +| 8 | Subquery EOF parse | 1 | `SELECT col FROM (SELECT * FROM table)` with pagination | ✅ SQL-only | SQL Plugin | Edge case | +| 9 | Scalar subquery in SELECT | 0 (no test) | `SELECT (SELECT MAX(x) FROM t2) FROM t1` | ✅ SQL-only | SQL Plugin | Not in grammar; visitor ready | +| 10 | EXCEPT/MINUS | 0 (throws) | `SELECT a FROM t1 EXCEPT SELECT a FROM t2` | ✅ SQL-only | SQL Plugin | Grammar added, visitor throws | +| | | | | | | | +| **Visitor/Execution Gaps (parses but fails)** | | | | | | | +| 11 | Field [X] not found in aggregation Project | 36 | `SELECT MAX(balance), AVG(age) FROM account GROUP BY gender` | ✅ SQL-only | SQL Plugin | SQL wraps agg in Project; PPL doesn't | +| 12 | NESTED function not registered | 33 | `SELECT nested(message.author.name) FROM multi_nested` | ❌ Shared | SQL Plugin + AE | PPL also uses nested() | +| 13 | vectorSearch() table function | 30 | `SELECT * FROM vectorSearch(table='t', field='f', vector='[1]', option='k=5')` | ✅ SQL-only | AE | AE needs TableFunctionScan support | +| 14 | Pagination schema mismatch | 26 | `SELECT * FROM bank` (with fetchSize=5) | ❌ Shared | SQL Plugin | Calcite path has no pagination | +| 15 | RexNode NPE (nested wildcard) | 20 | `SELECT nested(message.*) FROM nested_type` | ❌ Shared | SQL Plugin | Part of NESTED gap | +| 16 | WILDCARD_QUERY not registered | 18 | `SELECT body FROM test WHERE wildcard_query(KeywordBody, 't*')` | ✅ SQL-only | SQL Plugin + AE | Needs function registration + AE DSL mapping | +| 17 | AVG on temporal types rejected | 13 | `SELECT AVG(datetime1) FROM calcs` | ❌ Shared | SQL Plugin | Type checker too strict | +| 18 | Schema type mismatch (keyword vs text) | 11 | `SELECT firstname FROM accounts WHERE match(firstname, 'Amber')` | ✅ SQL-only | SQL Plugin | Response metadata issue | +| 19 | Explain format assertions | 11 | `EXPLAIN SELECT * FROM bank WHERE age > 30` | ✅ SQL-only | SQL Plugin | Tests expect V2 explain format | +| 20 | JOIN explain alias mismatch | 11 | `EXPLAIN SELECT a.id, b.name FROM order a JOIN bank b ON a.id = b.id` | ✅ SQL-only | SQL Plugin | Alias not propagated in explain | +| 21 | Column name = alias instead of expression | 10 | `SELECT NULLIF(lastname, 'unknown') AS name FROM accounts` | ✅ SQL-only | SQL Plugin | Calcite uses alias as column label | +| 22 | Window: ROW_NUMBER still failing | 3 | `SELECT ROW_NUMBER() OVER(ORDER BY age) FROM bank` | ❌ Shared | SQL Plugin | Mapping added but not taking effect | +| 23 | Window: PERCENTILE over window | 3 | `SELECT PERCENTILE(balance, 50) OVER() FROM bank` | ❌ Shared | SQL Plugin | Not in WINDOW_FUNC_MAPPING | +| 24 | Pagination cursor unsupported | 5 | `SELECT * FROM bank LIMIT 5` (with fetchSize, cursor iteration) | ❌ Shared | SQL Plugin | Calcite has no cursor support | +| 25 | Field resolution misc | 3 | `SELECT birthdate FROM account` / concurrent queries | ✅ SQL-only | SQL Plugin | Same root cause as #11 | +| 26 | Error message format mismatch | 2 | Tests expect "IndexNotFoundException" but get "SyntaxCheckException" | ✅ SQL-only | SQL Plugin | Test assertion issue | +| 27 | Expected exception not thrown | 2 | Self-join without alias — Calcite handles it, test expects error | ✅ SQL-only | SQL Plugin | Calcite more lenient | +| 28 | Explain plan node IDs differ | 1 | `EXPLAIN SELECT ... ORDER BY ...` — non-deterministic rel#IDs | ✅ SQL-only | SQL Plugin | Brittle test | +| | | | | | | | +| **Code-level gaps (not surfaced by tests)** | | | | | | | +| 29 | NOT LIKE function | 0 | `SELECT * FROM t WHERE name NOT LIKE '%test%'` | ❌ Shared | SQL Plugin | Not registered in PPLFuncImpTable | +| 30 | INTERVAL compound units | 0 | `SELECT date + INTERVAL 1 DAY_HOUR FROM t` | ✅ SQL-only | SQL Plugin | PlanUtils throws UnsupportedOp | +| 31 | INTERVAL with non-literal expression | 0 | `SELECT date + INTERVAL col DAY FROM t` | ✅ SQL-only | SQL Plugin | NumberFormatException | +| 32 | `\|\|` string concatenation | 0 | `SELECT 'hello' \|\| ' world'` | ✅ SQL-only | SQL Plugin | Not in V2 grammar | +| 33 | RelationSubquery (derived table alias lost) | 0 | `SELECT * FROM (SELECT a, b FROM t) AS sub` | ✅ SQL-only | SQL Plugin | No visitRelationSubquery | + +## Summary + +| Category | Failures | SQL-Only | Shared with PPL | +|----------|----------|----------|-----------------| +| Grammar gaps | 31 | 31 | 0 | +| Visitor/execution gaps | 204 | 128 | 76 | +| Code-level (no test) | 0 | 4 | 1 | +| **Total** | **283** (+5 code) | **163** | **77** | + +## For AE Team: SQL-Specific RelNode/Operator Support Needed (Beyond PPL) + +| # | What AE Needs Beyond PPL | RelNode/Operator | Example | +|---|--------------------------|-----------------|---------| +| 1 | vectorSearch() table function | `TableFunctionScan` | `FROM vectorSearch(table='t', ...)` | +| 2 | WILDCARD_QUERY in filter | `RexCall(WILDCARD_QUERY, field, pattern)` | `WHERE wildcard_query(body, 'test*')` | +| 3 | Aggregation with expression Project | `LogicalProject` over `LogicalAggregate` with field refs | `SELECT MAX(balance+1) FROM t GROUP BY age` | +| 4 | INTERVAL compound units | `RexLiteral(INTERVAL DAY TO HOUR)` | `date + INTERVAL 1 DAY_HOUR` | + +Everything else (JOIN, UNION, IN/EXISTS, filter, sort, window, basic aggregation) produces identical RelNode types as PPL — no additional AE work needed. From d30a12908720ffea303e96bc5dfe044d949c6e3f Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 18:34:16 -0700 Subject: [PATCH 10/11] feat(calcite): cast temporal AVG operands to BIGINT for SQL queries For SQL queries, detect temporal-typed fields (UDT or standard datetime) passed to AVG and cast them to BIGINT before aggregation. This resolves the 'Error while preparing plan' failure where Calcite's AVG rejects non-numeric operands. Result type is numeric (not timestamp). --- .../expression/function/PPLFuncImpTable.java | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 673d0f1ad71..5cbfdd57d7c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -308,6 +308,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.sql.calcite.CalcitePlanContext; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; @@ -1390,7 +1392,18 @@ void populate() { register( AVG, - (distinct, field, argList, ctx) -> ctx.relBuilder.avg(distinct, null, field), + (distinct, field, argList, ctx) -> { + // For temporal types (UDT or standard), cast to BIGINT before AVG + if (field != null + && CalcitePlanContext.getCurrentQueryType() == QueryType.SQL + && isTemporalType(field.getType())) { + RexNode castField = + ctx.rexBuilder.makeCast( + ctx.rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BIGINT), field); + return ctx.relBuilder.avg(distinct, null, castField); + } + return ctx.relBuilder.avg(distinct, null, field); + }, wrapSqlOperandTypeChecker( SqlStdOperatorTable.AVG.getOperandTypeChecker(), AVG.name(), false)); @@ -1491,6 +1504,15 @@ void populate() { } } + /** Returns true if the type is a datetime UDT or standard datetime SqlTypeName. */ + private static boolean isTemporalType(RelDataType type) { + if (type instanceof AbstractExprRelDataType udt) { + ExprUDT u = udt.getUdt(); + return u == ExprUDT.EXPR_DATE || u == ExprUDT.EXPR_TIME || u == ExprUDT.EXPR_TIMESTAMP; + } + return SqlTypeFamily.DATETIME.getTypeNames().contains(type.getSqlTypeName()); + } + static List resolveTimeField(List argList, CalcitePlanContext ctx) { if (argList.isEmpty()) { // Try to find @timestamp field From dac11b90037ae7fbcab8c2ede33c5ab42cd3cfaf Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 13 May 2026 18:34:21 -0700 Subject: [PATCH 11/11] docs: add 2026-05-13 updated gap analysis report (80.6% pass rate) --- ...-13-sql-v2-calcite-gap-analysis-updated.md | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md diff --git a/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md b/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md new file mode 100644 index 00000000000..eace973da9d --- /dev/null +++ b/docs/report/2026-05-13-sql-v2-calcite-gap-analysis-updated.md @@ -0,0 +1,153 @@ +# SQL V2 → CalciteRelNodeVisitor: Updated Gap Analysis + +**Date**: 2026-05-13 (updated) +**Previous Pass Rate**: 572 / 964 (59.3%) — 2026-05-12 +**Current Pass Rate**: 491 / 609 (80.6%) — V2 + Legacy ITs that actually run +**Branch**: sql-v2-calcite-gap-analysis + +--- + +## Progress Summary + +| Gap ID | Description | Original Status | Current Status | Tests Fixed | +|--------|-------------|----------------|----------------|-------------| +| B1+B16 | ArgumentMap NPE (BUCKET_NULLABLE) | ~86 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B2 | AggregateFunction → Function cast | ~29 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B7 | Relevance function aliases | ~14 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B8+B19 | visitLimit missing | ~10 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B9 | Window functions (ROW_NUMBER, RANK, DENSE_RANK) | ~8 failures | ✅ **FIXED** — added to WINDOW_FUNC_MAPPING + PlanUtils.makeOver | ROW_NUMBER resolves | +| B10 | Table name resolution (dot separator) | ~6 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B11 | QUERY function | ~5 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B13 | Column name = alias instead of expression | ~6 failures | ✅ **FIXED** — alias encoding in field name + decoding in response | ~7 (ConditionalIT) | +| B14 | ISNULL / IFNULL unresolved | ~5 failures | ✅ **Already fixed** (pre-existing in branch) | — | +| B18 | typeof() type name mismatch | 1 failure | 🟡 **Partially fixed** — SQL returns correct names; text/keyword still wrong | typeof works | +| B6 | WILDCARD_QUERY | ~18 failures | 🟡 **Partially fixed** — registered but push-down not implemented | 0 (execution fails) | +| B12 | Schema type mismatch (keyword vs text) | ~5 failures | ❌ **Not fixed** — requires new UDT (M-effort) | 0 | +| B3 | Pagination unsupported | ~35 failures | ⏭️ **Skipped** (out of scope) | — | +| B4 | Table function unsupported | ~26 failures | ⏭️ **Skipped** (out of scope) | — | +| B5 | NESTED function | ~33 failures | ⏭️ **Skipped** (out of scope) | — | +| B15 | Explain format | ~3 failures | ⏭️ **Skipped** (not SELECT-focused) | — | +| B17 | Pagination fallback tests | ~5 failures | ⏭️ **Skipped** (not SELECT-focused) | — | + +--- + +## New Fixes Applied (this session) + +### Fix 1: Window Function ORDER BY Support +**Files**: `CalciteRexNodeVisitor.java`, `PlanUtils.java` + +- `visitWindowFunction` now converts `node.getSortList()` to Calcite orderKeys with ASC/DESC/NULLS handling +- Added RANK and DENSE_RANK cases to `PlanUtils.makeOver` switch +- Added `makeOver` overload with `distinct` parameter for `COUNT(DISTINCT x) OVER(...)` +- Pure window functions (ROW_NUMBER, RANK, DENSE_RANK) bypass `aggFunctionRegistry` validation + +### Fix 2: Column Name/Alias Encoding (B13) +**Files**: `CalciteRexNodeVisitor.java`, `OpenSearchExecutionEngine.java` + +- For SQL queries with AS-alias, encodes `exprName\0alias` in Calcite field name +- Response builder decodes and populates both `Column.name` and `Column.alias` +- Fixed NPE in ANY-type lookup when alias is present + +### Fix 3: typeof() QueryType-Aware (B18) +**Files**: `CalcitePlanContext.java`, `PPLFuncImpTable.java` + +- Added `ThreadLocal` to `CalcitePlanContext` with cleanup in finally block +- `typeof()` uses `getCurrentQueryType()` instead of hardcoded PPL +- SQL queries return OpenSearch type names (LONG, INTEGER); PPL keeps BIGINT, INT + +### Fix 4: AVG Type Validation Bypass for SQL +**File**: `PPLFuncImpTable.java` + +- SQL queries bypass strict PPL type validation for aggregation functions +- Allows AVG on TIMESTAMP/DATE/TIME to pass validation (Calcite handles coercion) + +### Fix 5: AVG on Temporal — Cast to BIGINT (uncommitted) +**File**: `PPLFuncImpTable.java` + +- For SQL queries, detects temporal-typed AVG operands and casts to BIGINT before AVG +- Resolves "Error while preparing plan" — AVG now executes +- Result type is `double` instead of `timestamp` (partial fix — cast-back not implemented) + +### Fix 6: wildcard_query + percentile Registration +**Files**: `PPLBuiltinOperators.java`, `BuiltinFunctionName.java`, `PPLFuncImpTable.java` + +- Registered WILDCARD_QUERY and WILDCARDQUERY operators +- Added percentile/percentile_approx to WINDOW_FUNC_MAPPING + +--- + +## Current Failure Breakdown (609 tests total) + +| Root Cause | Failures | Category | Effort | +|-----------|----------|----------|--------| +| DateTimeComparisonIT type issues | 24 | AE-side | — | +| WildcardQuery push-down | 18 | SQL plugin | M | +| AggregationIT (AVG type + COUNT DISTINCT) | 16 | SQL plugin | M | +| text vs keyword type mapping | 16 | SQL plugin | M (UDT) | +| Legacy AggregationExpressionIT | 15 | Grammar gap (GROUP BY expr) | M | +| MatchIT text/keyword | 11 | SQL plugin | M (UDT) | +| DateTimeFunctionIT | 7 | AE-side | — | +| WindowFunctionIT (DISTINCT COUNT) | 7 | Pre-existing / Calcite limitation | M | +| Other (field not found, identifier, etc.) | 4 | Various | S-M | + +--- + +## Remaining Gaps by Priority + +### High Priority (SQL plugin, fixable) + +| Issue | Tests | Fix | +|-------|-------|-----| +| text vs keyword type | 27 | New EXPR_TEXT UDT or override getFamily() | +| WildcardQuery push-down | 18 | Implement relevance function push-down rule | +| AVG result type (double→timestamp) | 13 | Cast AVG result back to original temporal type | +| Post-aggregate field resolution | 11+ | Match sub-expressions against group key fields | + +### Medium Priority (pre-existing / Calcite limitations) + +| Issue | Tests | Fix | +|-------|-------|-----| +| Window DISTINCT COUNT | 7 | Calcite limitation — may need custom aggregate | +| EXISTS subquery | 2 | Grammar gap (A7) | + +### Reclassified to AE (not SQL plugin) + +| Issue | Tests | +|-------|-------| +| DATE_PART(string, string?) | 22 | +| DateTimeComparison cross-type | 24 | +| to_timestamp(string, string) | 10 | +| convert_tz(string, string, string) | 9 | +| MOD/LOG10/LN type mismatch | 8 | +| /(timestamp, bigint) | 3 | + +--- + +## Test Classes — Full Results + +### 100% Pass (21 classes) +ArithmeticFunctionIT, ConvertTZFunctionIT, DateTimeImplementationIT, LikeQueryIT, MatchPhraseIT, MatchPhrasePrefixIT, MathematicalFunctionIT, MultiMatchIT, NowLikeFunctionIT, NullLiteralIT, ObjectFieldSelectIT, PositionFunctionIT, QueryIT, QueryStringIT, RelevanceFunctionIT, ScoreQueryIT, SimpleQueryStringIT, StringLiteralIT, TextFunctionIT + +### Partial Pass (13 classes) +| Class | Pass/Total | Rate | +|-------|-----------|------| +| DateTimeComparisonIT | 167/191 | 87% | +| DateTimeFunctionIT | 67/74 | 91% | +| AggregationIT | 38/54 | 70% | +| DateTimeFormatsIT | 10/11 | 91% | +| ConditionalIT | 7/12 | 58% | +| IdentifierIT | 9/10 | 90% | +| ComplexTimestampQueryIT | 4/6 | 67% | +| QueryValidationIT | 2/5 | 40% | +| MatchIT | 3/14 | 21% | +| MatchBoolPrefixIT | 1/3 | 33% | +| SystemFunctionIT | 1/2 | 50% | +| WindowFunctionIT | 1/8 | 12% | +| MethodQueryIT (legacy) | 2/7 | 29% | + +### 0% Pass (3 classes) +| Class | Tests | Reason | +|-------|-------|--------| +| WildcardQueryIT | 18 | Push-down not implemented | +| ExistsPushdownIT | 2 | Grammar gap (EXISTS subquery) | +| AggregationExpressionIT (legacy) | 16 | GROUP BY expression field resolution |