diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d0051568c42..b7cc24c1686 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -26,7 +26,9 @@ import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; @@ -40,7 +42,6 @@ import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.Map; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -50,6 +51,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -72,6 +74,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -93,6 +96,7 @@ import org.opensearch.sql.planner.logical.LogicalDedupe; import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFetchCursor; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; import org.opensearch.sql.planner.logical.LogicalML; @@ -277,7 +281,7 @@ public LogicalPlan visitRename(Rename node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); ImmutableMap.Builder renameMapBuilder = new ImmutableMap.Builder<>(); - for (Map renameMap : node.getRenameList()) { + for (org.opensearch.sql.ast.expression.Map renameMap : node.getRenameList()) { Expression origin = expressionAnalyzer.analyze(renameMap.getOrigin(), context); // We should define the new target field in the context instead of analyze it. if (renameMap.getTarget() instanceof Field) { @@ -336,6 +340,70 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { return new LogicalAggregation(child, aggregators, groupBys); } + @Override + public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) { + LogicalPlan child = node.getChild().getFirst().accept(this, context); + + TypeEnvironment env = context.peek(); + Map fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME); + + if (node.getIncludeFields() != null) { + List includeFields = + node.getIncludeFields().stream() + .map(expr -> ((Field) expr).getField().toString()) + .toList(); + + Map filteredFields = new HashMap<>(); + for (String field : includeFields) { + if (fieldsMap.containsKey(field)) { + filteredFields.put(field, fieldsMap.get(field)); + } + } + fieldsMap = filteredFields; + } + + ImmutableList.Builder aggregatorBuilder = new ImmutableList.Builder<>(); + Map> aggregatorToFieldNameMap = new HashMap<>(); + + for (Map.Entry entry : fieldsMap.entrySet()) { + ExprType fieldType = entry.getValue(); + String fieldName = entry.getKey(); + ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType); + + aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression))); + aggregatorToFieldNameMap.put("Count" + fieldName, entry); + aggregatorBuilder.add( + new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression))); + aggregatorToFieldNameMap.put("Distinct" + fieldName, entry); + + if (ExprCoreType.numberTypes().contains(fieldType)) { + aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression))); + aggregatorToFieldNameMap.put("Max" + fieldName, entry); + aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression))); + aggregatorToFieldNameMap.put("Min" + fieldName, entry); + aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression))); + aggregatorToFieldNameMap.put("Avg" + fieldName, entry); + } + } + + ImmutableList aggregators = aggregatorBuilder.build(); + ImmutableList groupBys = new ImmutableList.Builder().build(); + + // new context + context.push(); + TypeEnvironment newEnv = context.peek(); + + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Field"), ExprCoreType.STRING); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Count"), ExprCoreType.INTEGER); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Distinct"), ExprCoreType.INTEGER); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Type"), ExprCoreType.STRING); + + return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap); + } + /** Build {@link LogicalRareTopN}. */ @Override public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index f27260dd5f0..9b2290027d8 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -17,6 +17,7 @@ import org.opensearch.sql.ast.expression.Compare; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.FieldList; import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.In; @@ -45,6 +46,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -326,4 +328,12 @@ public T visitCloseCursor(CloseCursor closeCursor, C context) { public T visitFillNull(FillNull fillNull, C context) { return visitChildren(fillNull, context); } + + public T visitFieldSummary(FieldSummary fieldSummary, C context) { + return visitChildren(fieldSummary, context); + } + + public T visitFieldList(FieldList fieldList, C context) { + return visitChildren(fieldList, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index d9956609ecb..44b783a5b6c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -506,4 +507,9 @@ public static FillNull fillNull( return new FillNull( FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); } + + public static FieldSummary fieldSummary( + UnresolvedPlan input, List includeFields) { + return new FieldSummary(includeFields).attach(input); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java new file mode 100644 index 00000000000..335b38ca331 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java @@ -0,0 +1,33 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +/** Expression node that includes a list of fields nodes. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@AllArgsConstructor +public class FieldList extends UnresolvedExpression { + private final List fieldList; + + @Override + public List getChild() { + return ImmutableList.copyOf(fieldList); + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldList(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java new file mode 100644 index 00000000000..da47b2b4bbd --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java @@ -0,0 +1,48 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import java.util.List; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.AttributeList; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +public class FieldSummary extends UnresolvedPlan { + private UnresolvedPlan child; + private List includeFields; + + public FieldSummary(List collect) { + collect.forEach( + exp -> { + if (exp instanceof AttributeList) { + this.includeFields = ((AttributeList) exp).getAttrList(); + } + }); + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldSummary(this, context); + } + + @Override + public FieldSummary attach(UnresolvedPlan child) { + this.child = child; + return this; + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java new file mode 100644 index 00000000000..41cb1524e0c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java @@ -0,0 +1,28 @@ +package org.opensearch.sql.planner.logical; + +import java.util.List; +import java.util.Map; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.aggregation.NamedAggregator; + +/** Logical Field Summary. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalFieldSummary extends LogicalAggregation { + + Map> aggregationToFieldNameMap; + + public LogicalFieldSummary( + LogicalPlan child, + List aggregatorList, + List groupByList, + Map> aggregationToFieldNameMap) { + super(child, aggregatorList, groupByList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 3f4752aa2e3..742c6aa6c70 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -85,6 +85,7 @@ import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; @@ -1871,4 +1872,10 @@ public void visit_close_cursor() { () -> assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor())); } + + @Test + public void visit_fieldsummary() { + LogicalPlan actual = analyze(new FieldSummary(List.of())); + assertEquals() + } } diff --git a/docs/user/ppl/cmd/fields.rst b/docs/user/ppl/cmd/fields.rst index 32c3a665d75..f42e9bd74fe 100644 --- a/docs/user/ppl/cmd/fields.rst +++ b/docs/user/ppl/cmd/fields.rst @@ -11,7 +11,7 @@ fields Description ============ -| Using ``field`` command to keep or remove fields from the search result. +| Use ``field`` command to keep or remove fields from the search result. Syntax diff --git a/docs/user/ppl/cmd/fieldsummary.rst b/docs/user/ppl/cmd/fieldsummary.rst new file mode 100644 index 00000000000..47c610d4cfd --- /dev/null +++ b/docs/user/ppl/cmd/fieldsummary.rst @@ -0,0 +1,56 @@ +============= +fieldsummary +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Use ``fieldsummary`` command to provide summary statistics for all fields in the current result set. + +This command will: + - Calculate basic statistics for each field (count, distinct count and min, max, avg for numeric fields) + - Determine the data type of each field + +Syntax +============ +fieldsummary [includefields="[,]"] + +* includefields: optional. specify which fields to include in the summary. + +Example 1: Perform fieldsummary without additional fields +============================================== + +PPL query:: + + os> source=nyc_taxi | fieldsummary; + fetched rows / total rows = 4/4 + +--------------+-------+----------+--------------------+---------+--------+--------+ + | Field | Count | Distinct | Avg | Max | Min | Type | + |--------------+-------+----------|--------------------+---------+--------+--------| + | anomaly_type | 973 | 1 | | | | string | + | category | 973 | 2 | | | | string | + | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | + | timestamp | 973 | 973 | | | | date | + +--------------+-------+----------+--------------------+---------+--------+--------+ + +Example 2: Perform fieldsummary with includefields +============================================== + +PPL query:: + + os> source=accounts | fieldsummary includefields="category,value" ; + fetched rows / total rows = 2/2 + +--------------+-------+----------+--------------------+---------+--------+--------+ + | Field | Count | Distinct | Avg | Max | Min | Type | + |--------------+-------+----------|--------------------+---------+--------+--------| + | category | 973 | 2 | | | | string | + | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | + +--------------+-------+----------+--------------------+---------+--------+--------+ + + diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index e4026f70ae1..cbf74b8735e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -289,6 +289,26 @@ public void pushTypeMapping(Map typeMapping) { exprValueFactory.extendTypeMapping(typeMapping); } + public void pushFieldSummaryTypeMapping() { + Map typeMapping = + Map.of( + "Field", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Text), + "Count", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Distinct", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Avg", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Min", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Max", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Type", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Text)); + exprValueFactory.extendTypeMapping(typeMapping); + } + private boolean isSortByDocOnly() { List> sorts = sourceBuilder.sorts(); if (sorts != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java new file mode 100644 index 00000000000..98294ef2ccd --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.sql.data.type.ExprType; + +/** No Bucket Aggregation Parser which include only metric parsers. */ +public class FieldSummaryAggregationParser implements OpenSearchAggregationResponseParser { + + private final MetricParserHelper metricsParser; + private Map> aggregationToFieldNameMap; + + public FieldSummaryAggregationParser( + List metricParserList, + Map> aggregationToFieldNameMap) { + metricParserList.addAll( + Arrays.asList( + new SingleValueParser("Field"), + new SingleValueParser("Count"), + new SingleValueParser("Distinct"), + new SingleValueParser("Avg"), + new SingleValueParser("Max"), + new SingleValueParser("Min"), + new SingleValueParser("Type"))); + metricsParser = new MetricParserHelper(metricParserList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } + + @Override + public List> parse(Aggregations aggregations) { + List> summaryTable = new ArrayList<>(); + + Map, List> aggregationsByField = new HashMap<>(); + + for (Aggregation aggregation : aggregations.asList()) { + String aggregationName = aggregation.getName(); + Map.Entry aggregationField = aggregationToFieldNameMap.get(aggregationName); + + aggregationsByField.putIfAbsent(aggregationField, new ArrayList()); + aggregationsByField.get(aggregationField).add(aggregation); + } + + for (Map.Entry, List> entry : + aggregationsByField.entrySet()) { + Map row = new HashMap<>(); + row.put("Field", entry.getKey().getKey()); + row.putAll(transformKeys(metricsParser.parse(new Aggregations(entry.getValue())))); + row.put("Type", entry.getKey().getValue().toString()); + summaryTable.add(row); + } + + return summaryTable; + } + + private static Map transformKeys(Map map) { + Map newMap = new HashMap<>(); + + for (Map.Entry entry : map.entrySet()) { + String originalKey = entry.getKey(); + String newKey = extractAggregationType(originalKey); + newMap.put(newKey, entry.getValue()); + } + + return newMap; + } + + private static String extractAggregationType(String key) { + String[] aggregationTypes = {"Count", "Avg", "Min", "Max", "Distinct"}; + + for (String type : aggregationTypes) { + if (key.startsWith(type)) { + return type; + } + } + + return "Unknown"; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java index 02ac21a39dc..c91b767be55 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java @@ -6,12 +6,14 @@ package org.opensearch.sql.opensearch.storage.scan; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; @@ -21,6 +23,7 @@ import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; import org.opensearch.sql.opensearch.storage.serialization.DefaultExpressionSerializer; import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalSort; @@ -37,9 +40,18 @@ class OpenSearchIndexScanAggregationBuilder implements PushDownQueryBuilder { /** Grouping items pushed down. */ private final List groupByList; + /** Is aggregation performing a fieldsumary aggregation */ + private Map> aggregationToFieldNameMap; + /** Sorting items pushed down. */ private List> sortList; + OpenSearchIndexScanAggregationBuilder( + OpenSearchRequestBuilder requestBuilder, LogicalFieldSummary aggregation) { + this(requestBuilder, (LogicalAggregation) aggregation); + aggregationToFieldNameMap = aggregation.getAggregationToFieldNameMap(); + } + OpenSearchIndexScanAggregationBuilder( OpenSearchRequestBuilder requestBuilder, LogicalAggregation aggregation) { this.requestBuilder = requestBuilder; @@ -52,9 +64,13 @@ public OpenSearchRequestBuilder build() { AggregationQueryBuilder builder = new AggregationQueryBuilder(new DefaultExpressionSerializer()); Pair, OpenSearchAggregationResponseParser> aggregationBuilder = - builder.buildAggregationBuilder(aggregatorList, groupByList, sortList); + builder.buildAggregationBuilder( + aggregatorList, groupByList, sortList, aggregationToFieldNameMap); requestBuilder.pushDownAggregation(aggregationBuilder); requestBuilder.pushTypeMapping(builder.buildTypeMapping(aggregatorList, groupByList)); + if (aggregationToFieldNameMap != null) { + requestBuilder.pushFieldSummaryTypeMapping(); + } return requestBuilder; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 8a2f3e98f42..9e4dba9436a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -12,6 +12,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; @@ -71,7 +72,14 @@ public boolean pushDownAggregation(LogicalAggregation aggregation) { // Switch to builder for aggregate query which has different push down logic // for later filter, sort and limit operator. - delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), aggregation); + if (aggregation instanceof LogicalFieldSummary) { + delegate = + new OpenSearchIndexScanAggregationBuilder( + delegate.build(), (LogicalFieldSummary) aggregation); + } else { + delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), aggregation); + } + return true; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index a218151b2e3..9d4ceef4456 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -23,6 +23,7 @@ import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ExpressionNodeVisitor; import org.opensearch.sql.expression.NamedExpression; @@ -30,6 +31,7 @@ import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.FieldSummaryAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -65,11 +67,25 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { List namedAggregatorList, List groupByList, List> sortList) { + return buildAggregationBuilder(namedAggregatorList, groupByList, sortList, null); + } + + /** Build AggregationBuilder. */ + public Pair, OpenSearchAggregationResponseParser> + buildAggregationBuilder( + List namedAggregatorList, + List groupByList, + List> sortList, + Map> aggregationToFieldNameMap) { final Pair> metrics = metricBuilder.build(namedAggregatorList); - if (groupByList.isEmpty()) { + if (aggregationToFieldNameMap != null) { + return Pair.of( + ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), + new FieldSummaryAggregationParser(metrics.getRight(), aggregationToFieldNameMap)); + } else if (groupByList.isEmpty()) { // no bucket return Pair.of( ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index c484f34a2a6..c61a8a31a61 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -38,6 +38,10 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; TRENDLINE: 'TRENDLINE'; +// FIELDSUMMARY keywords +FIELDSUMMARY: 'FIELDSUMMARY'; +INCLUDEFIELDS: 'INCLUDEFIELDS'; + // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index acae54b7d9d..e40cd6ab2c1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -51,6 +51,7 @@ commands | mlCommand | fillnullCommand | trendlineCommand + | fieldsummaryCommand ; searchCommand @@ -195,6 +196,14 @@ mlArg : (argName = ident EQUAL argValue = literalValue) ; +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* + ; + +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + ; + // clauses fromClause : SOURCE EQUAL tableSourceClause diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c3c31ee2e1b..dd27f44e6c6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -437,6 +438,15 @@ public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommand .orElse(new Trendline(Optional.empty(), trendlineComputations)); } + @Override + public UnresolvedPlan visitFieldsummaryCommand( + OpenSearchPPLParser.FieldsummaryCommandContext ctx) { + return new FieldSummary( + ctx.fieldsummaryParameter().stream() + .map(arg -> expressionBuilder.visit(arg)) + .collect(Collectors.toList())); + } + /** Get original text in query. */ private String getTextInQuery(ParserRuleContext ctx) { Token start = ctx.getStart(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 5a7522683a1..c78568925ee 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -174,6 +174,16 @@ public UnresolvedExpression visitStatsFunctionCall(StatsFunctionCallContext ctx) return new AggregateFunction(ctx.statsFunctionName().getText(), visit(ctx.valueExpression())); } + @Override + public UnresolvedExpression visitFieldsummaryIncludeFields( + OpenSearchPPLParser.FieldsummaryIncludeFieldsContext ctx) { + List list = + ctx.fieldList().fieldExpression().stream() + .map(this::visitFieldExpression) + .collect(Collectors.toList()); + return new AttributeList(list); + } + @Override public UnresolvedExpression visitCountAllFunctionCall(CountAllFunctionCallContext ctx) { return new AggregateFunction("count", AllFields.of()); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index c6f4ed20447..b0d79ee7dfb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -49,18 +49,21 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.List; import java.util.Optional; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.AttributeList; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; @@ -848,6 +851,16 @@ public void testShowDataSourcesCommand() { assertEqual("show datasources", relation(DATASOURCES_TABLE_NAME)); } + @Test + public void testFieldSummaryCommand() { + assertEqual("source=t | fieldsummary", new FieldSummary(List.of()).attach(relation("t"))); + + assertEqual( + "source=t | fieldsummary includefields = field_name", + new FieldSummary(List.of(new AttributeList(List.of(field("field_name"))))) + .attach(relation("t"))); + } + protected void assertEqual(String query, Node expectedPlan) { Node actualPlan = plan(query); assertEquals(expectedPlan, actualPlan);