From 7692eb1efc30c6fbba7a785686053a903de09b8e Mon Sep 17 00:00:00 2001 From: Kenrick Yap Date: Thu, 30 Jan 2025 14:12:41 -0500 Subject: [PATCH 1/3] added grammar Signed-off-by: Kenrick Yap --- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 5 +++++ ppl/src/main/antlr/OpenSearchPPLParser.g4 | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index c484f34a2a6..7d6c95d5503 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -38,6 +38,11 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; TRENDLINE: 'TRENDLINE'; +// FIELDSUMMARY keywords +FIELDSUMMARY: 'FIELDSUMMARY'; +INCLUDEFIELDS: 'INCLUDEFIELDS'; +NULLS: 'NULLS'; + // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index acae54b7d9d..c30d9ca803e 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -51,6 +51,7 @@ commands | mlCommand | fillnullCommand | trendlineCommand + | fieldSummaryCommand ; searchCommand @@ -134,6 +135,15 @@ fillnullCommand | fillNullWithFieldVariousValues) ; +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* + ; + +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + | NULLS EQUAL booleanLiteral # fieldsummaryNulls + ; + fillNullWithTheSameValue : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList ; From 3163f7c4587288980f50e7769d119233aa56096f Mon Sep 17 00:00:00 2001 From: Kenrick Yap Date: Mon, 10 Feb 2025 10:51:15 -0800 Subject: [PATCH 2/3] fieldsummary implementation Signed-off-by: Kenrick Yap --- .../org/opensearch/sql/analysis/Analyzer.java | 58 +++++++++++- .../sql/ast/AbstractNodeVisitor.java | 10 ++ .../org/opensearch/sql/ast/dsl/AstDSL.java | 8 ++ .../sql/ast/expression/FieldList.java | 34 +++++++ .../opensearch/sql/ast/tree/FieldSummary.java | 49 ++++++++++ .../planner/logical/LogicalFieldSummary.java | 27 ++++++ .../request/OpenSearchRequestBuilder.java | 14 +++ .../agg/FieldSummaryAggregationParser.java | 94 +++++++++++++++++++ ...OpenSearchIndexScanAggregationBuilder.java | 17 +++- .../scan/OpenSearchIndexScanBuilder.java | 8 +- .../aggregation/AggregationQueryBuilder.java | 23 ++++- ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 - ppl/src/main/antlr/OpenSearchPPLParser.g4 | 19 ++-- .../opensearch/sql/ppl/parser/AstBuilder.java | 6 ++ .../sql/ppl/parser/AstExpressionBuilder.java | 8 ++ 15 files changed, 357 insertions(+), 19 deletions(-) create mode 100644 core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java create mode 100644 core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d0051568c42..53488ab763f 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -26,12 +26,15 @@ import com.google.common.collect.ImmutableSet; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.math3.analysis.function.Exp; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; @@ -40,7 +43,6 @@ import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; -import org.opensearch.sql.ast.expression.Map; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.QualifiedName; import org.opensearch.sql.ast.expression.UnresolvedExpression; @@ -50,6 +52,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -72,6 +75,7 @@ import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprMissingValue; import org.opensearch.sql.data.type.ExprCoreType; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.datasource.DataSourceService; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -80,7 +84,9 @@ import org.opensearch.sql.expression.LiteralExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; +import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; +import org.opensearch.sql.expression.aggregation.AvgAggregator; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; @@ -93,6 +99,7 @@ import org.opensearch.sql.planner.logical.LogicalDedupe; import org.opensearch.sql.planner.logical.LogicalEval; import org.opensearch.sql.planner.logical.LogicalFetchCursor; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalLimit; import org.opensearch.sql.planner.logical.LogicalML; @@ -277,7 +284,7 @@ public LogicalPlan visitRename(Rename node, AnalysisContext context) { LogicalPlan child = node.getChild().get(0).accept(this, context); ImmutableMap.Builder renameMapBuilder = new ImmutableMap.Builder<>(); - for (Map renameMap : node.getRenameList()) { + for (org.opensearch.sql.ast.expression.Map renameMap : node.getRenameList()) { Expression origin = expressionAnalyzer.analyze(renameMap.getOrigin(), context); // We should define the new target field in the context instead of analyze it. if (renameMap.getTarget() instanceof Field) { @@ -336,6 +343,53 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { return new LogicalAggregation(child, aggregators, groupBys); } + @Override + public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) { + LogicalPlan child = node.getChild().getFirst().accept(this, context); + + TypeEnvironment env = context.peek(); + Map fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME); + + ImmutableList.Builder aggregatorBuilder = new ImmutableList.Builder<>(); + Map aggregatorToFieldNameMap = new HashMap(); + + for (Map.Entry entry : fieldsMap.entrySet()) { + ExprType fieldType = entry.getValue(); + String fieldName = entry.getKey(); + ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType); + + aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression))); + aggregatorToFieldNameMap.put("Count" + fieldName, fieldName); + aggregatorBuilder.add(new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression))); + aggregatorToFieldNameMap.put("Distinct" + fieldName, fieldName); + + if (ExprCoreType.numberTypes().contains(fieldType)) { + aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression))); + aggregatorToFieldNameMap.put("Avg" + fieldName, fieldName); + aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression))); + aggregatorToFieldNameMap.put("Max" + fieldName, fieldName); + aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression))); + aggregatorToFieldNameMap.put("Min" + fieldName, fieldName); + } + } + + ImmutableList aggregators = aggregatorBuilder.build(); + ImmutableList groupBys = new ImmutableList.Builder().build(); + + // new context + context.push(); + TypeEnvironment newEnv = context.peek(); + + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Field"), ExprCoreType.STRING); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Count"), ExprCoreType.INTEGER); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Distinct"), ExprCoreType.INTEGER); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE); + + return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap); + } + /** Build {@link LogicalRareTopN}. */ @Override public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index f27260dd5f0..9b2290027d8 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -17,6 +17,7 @@ import org.opensearch.sql.ast.expression.Compare; import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.FieldList; import org.opensearch.sql.ast.expression.Function; import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.In; @@ -45,6 +46,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -326,4 +328,12 @@ public T visitCloseCursor(CloseCursor closeCursor, C context) { public T visitFillNull(FillNull fillNull, C context) { return visitChildren(fillNull, context); } + + public T visitFieldSummary(FieldSummary fieldSummary, C context) { + return visitChildren(fieldSummary, context); + } + + public T visitFieldList(FieldList fieldList, C context) { + return visitChildren(fieldList, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index d9956609ecb..39917337bcb 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -49,6 +49,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -506,4 +507,11 @@ public static FillNull fillNull( return new FillNull( FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); } + + public static FieldSummary fieldSummary( + UnresolvedPlan input, + List includeFields + ) { + return new FieldSummary(includeFields).attach(input); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java new file mode 100644 index 00000000000..218aa7a9f0c --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java @@ -0,0 +1,34 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.expression; + +import com.google.common.collect.ImmutableList; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; + +import java.util.List; + +/** Expression node that includes a list of fields nodes. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +@AllArgsConstructor +public class FieldList extends UnresolvedExpression { + private final List fieldList; + + @Override + public List getChild() { + return ImmutableList.copyOf(fieldList); + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldList(this, context); + } +} \ No newline at end of file diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java new file mode 100644 index 00000000000..aa2570bf7ed --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Argument; +import org.opensearch.sql.ast.expression.AttributeList; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +import java.util.List; + +@Getter +@ToString +@EqualsAndHashCode(callSuper = false) +public class FieldSummary extends UnresolvedPlan { + private List includeFields; + private UnresolvedPlan child; + + public FieldSummary(List collect) { + collect.forEach(exp -> { + if (exp instanceof AttributeList) { + this.includeFields = ((AttributeList)exp).getAttrList(); + } + }); + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldSummary(this, context); + } + + @Override + public FieldSummary attach(UnresolvedPlan child) { + this.child = child; + return this; + } +} diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java new file mode 100644 index 00000000000..4ff18fc7f67 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java @@ -0,0 +1,27 @@ +package org.opensearch.sql.planner.logical; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import org.opensearch.sql.expression.NamedExpression; +import org.opensearch.sql.expression.aggregation.NamedAggregator; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** Logical Field Summary. */ +@Getter +@ToString +@EqualsAndHashCode(callSuper = true) +public class LogicalFieldSummary extends LogicalAggregation { + + Map aggregationToFieldNameMap; + + public LogicalFieldSummary( + LogicalPlan child, List aggregatorList, List groupByList, Map aggregationToFieldNameMap) { + super(child, aggregatorList, groupByList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } +} \ No newline at end of file diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index e4026f70ae1..f6147770dcd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -15,6 +15,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -289,6 +290,19 @@ public void pushTypeMapping(Map typeMapping) { exprValueFactory.extendTypeMapping(typeMapping); } + public void pushFieldSummaryTypeMapping() { + Map typeMapping = Map.of( + "Field", OpenSearchDataType.of(OpenSearchDataType.MappingType.Text), + "Count", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Distinct", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Avg", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Min", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Max", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Sum", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double) + ); + exprValueFactory.extendTypeMapping(typeMapping); + } + private boolean isSortByDocOnly() { List> sorts = sourceBuilder.sorts(); if (sorts != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java new file mode 100644 index 00000000000..0a72416aae0 --- /dev/null +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java @@ -0,0 +1,94 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.opensearch.sql.opensearch.response.agg; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.opensearch.search.aggregations.Aggregation; +import org.opensearch.search.aggregations.Aggregations; + +/** No Bucket Aggregation Parser which include only metric parsers. */ +public class FieldSummaryAggregationParser implements OpenSearchAggregationResponseParser { + + private final MetricParserHelper metricsParser; + private Map aggregationToFieldNameMap; + + + public FieldSummaryAggregationParser(List metricParserList, Map aggregationToFieldNameMap) { + metricParserList.addAll(Arrays.asList( + new SingleValueParser("Field"), + new SingleValueParser("Count"), + new SingleValueParser("Distinct"), + new SingleValueParser("Avg"), + new SingleValueParser("Max"), + new SingleValueParser("Min"), + new SingleValueParser("Sum") + )); + metricsParser = new MetricParserHelper(metricParserList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } + + @Override + public List> parse(Aggregations aggregations) { + List> summaryTable = new ArrayList<>(); + + Map> aggregationsByField = new HashMap<>(); + + for (Aggregation aggregation: aggregations.asList()) { + String aggregationName = aggregation.getName(); + String aggregationField = aggregationToFieldNameMap.get(aggregationName); + + aggregationsByField.putIfAbsent(aggregationField, new ArrayList()); + aggregationsByField.get(aggregationField).add(aggregation); + } + + for (Map.Entry> entry: aggregationsByField.entrySet()) { + Map row = new HashMap<>(); + row.put("Field", entry.getKey()); + row.putAll(transformKeys(metricsParser.parse(new Aggregations(entry.getValue())))); + summaryTable.add(row); + } + + return summaryTable; + } + + private static Map transformKeys(Map map) { + Map newMap = new HashMap<>(); + + for (Map.Entry entry : map.entrySet()) { + String originalKey = entry.getKey(); + String newKey = extractAggregationType(originalKey); + newMap.put(newKey, entry.getValue()); + } + + return newMap; + } + + private static String extractAggregationType(String key) { + String[] aggregationTypes = {"Count", "Avg", "Min", "Max", "Distinct", "Sum"}; + + for (String type : aggregationTypes) { + if (key.startsWith(type)) { + return type; + } + } + + return "Unknown"; + } +} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java index 02ac21a39dc..0c2b9862e7a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java @@ -6,6 +6,7 @@ package org.opensearch.sql.opensearch.storage.scan; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; @@ -21,6 +22,7 @@ import org.opensearch.sql.opensearch.storage.script.aggregation.AggregationQueryBuilder; import org.opensearch.sql.opensearch.storage.serialization.DefaultExpressionSerializer; import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalSort; @@ -37,9 +39,19 @@ class OpenSearchIndexScanAggregationBuilder implements PushDownQueryBuilder { /** Grouping items pushed down. */ private final List groupByList; + /** Is aggregation performing a fieldsumary aggregation */ + private Map aggregationToFieldNameMap; + /** Sorting items pushed down. */ private List> sortList; + OpenSearchIndexScanAggregationBuilder( + OpenSearchRequestBuilder requestBuilder, LogicalFieldSummary aggregation + ) { + this(requestBuilder, (LogicalAggregation) aggregation); + aggregationToFieldNameMap = aggregation.getAggregationToFieldNameMap(); + } + OpenSearchIndexScanAggregationBuilder( OpenSearchRequestBuilder requestBuilder, LogicalAggregation aggregation) { this.requestBuilder = requestBuilder; @@ -52,9 +64,12 @@ public OpenSearchRequestBuilder build() { AggregationQueryBuilder builder = new AggregationQueryBuilder(new DefaultExpressionSerializer()); Pair, OpenSearchAggregationResponseParser> aggregationBuilder = - builder.buildAggregationBuilder(aggregatorList, groupByList, sortList); + builder.buildAggregationBuilder(aggregatorList, groupByList, sortList, aggregationToFieldNameMap); requestBuilder.pushDownAggregation(aggregationBuilder); requestBuilder.pushTypeMapping(builder.buildTypeMapping(aggregatorList, groupByList)); + if (aggregationToFieldNameMap != null) { + requestBuilder.pushFieldSummaryTypeMapping(); + } return requestBuilder; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index 8a2f3e98f42..b098963b9c9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -12,6 +12,7 @@ import org.opensearch.sql.expression.ReferenceExpression; import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder; import org.opensearch.sql.planner.logical.LogicalAggregation; +import org.opensearch.sql.planner.logical.LogicalFieldSummary; import org.opensearch.sql.planner.logical.LogicalFilter; import org.opensearch.sql.planner.logical.LogicalHighlight; import org.opensearch.sql.planner.logical.LogicalLimit; @@ -71,7 +72,12 @@ public boolean pushDownAggregation(LogicalAggregation aggregation) { // Switch to builder for aggregate query which has different push down logic // for later filter, sort and limit operator. - delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), aggregation); + if (aggregation instanceof LogicalFieldSummary){ + delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), (LogicalFieldSummary) aggregation); + } else { + delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), aggregation); + } + return true; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index a218151b2e3..6a9e7952a89 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -30,6 +30,7 @@ import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.opensearch.data.type.OpenSearchDataType; import org.opensearch.sql.opensearch.response.agg.CompositeAggregationParser; +import org.opensearch.sql.opensearch.response.agg.FieldSummaryAggregationParser; import org.opensearch.sql.opensearch.response.agg.MetricParser; import org.opensearch.sql.opensearch.response.agg.NoBucketAggregationParser; import org.opensearch.sql.opensearch.response.agg.OpenSearchAggregationResponseParser; @@ -61,16 +62,30 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { /** Build AggregationBuilder. */ public Pair, OpenSearchAggregationResponseParser> - buildAggregationBuilder( + buildAggregationBuilder( List namedAggregatorList, List groupByList, List> sortList) { + return buildAggregationBuilder(namedAggregatorList, groupByList, sortList, null); + } + + /** Build AggregationBuilder. */ + public Pair, OpenSearchAggregationResponseParser> + buildAggregationBuilder( + List namedAggregatorList, + List groupByList, + List> sortList, + Map aggregationToFieldNameMap) { final Pair> metrics = metricBuilder.build(namedAggregatorList); - if (groupByList.isEmpty()) { - // no bucket + if (aggregationToFieldNameMap != null) { + return Pair.of( + ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), + new FieldSummaryAggregationParser(metrics.getRight(), aggregationToFieldNameMap)); + } else if (groupByList.isEmpty()) { + // no bucket return Pair.of( ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), new NoBucketAggregationParser(metrics.getRight())); @@ -92,7 +107,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { .collect(Collectors.toList()))) .subAggregations(metrics.getLeft()) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metrics.getRight())); + new CompositeAggregationParser(metrics.getRight())); } } diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 7d6c95d5503..c61a8a31a61 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -41,7 +41,6 @@ TRENDLINE: 'TRENDLINE'; // FIELDSUMMARY keywords FIELDSUMMARY: 'FIELDSUMMARY'; INCLUDEFIELDS: 'INCLUDEFIELDS'; -NULLS: 'NULLS'; // COMMAND ASSIST KEYWORDS AS: 'AS'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index c30d9ca803e..e40cd6ab2c1 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -51,7 +51,7 @@ commands | mlCommand | fillnullCommand | trendlineCommand - | fieldSummaryCommand + | fieldsummaryCommand ; searchCommand @@ -135,15 +135,6 @@ fillnullCommand | fillNullWithFieldVariousValues) ; -fieldsummaryCommand - : FIELDSUMMARY (fieldsummaryParameter)* - ; - -fieldsummaryParameter - : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields - | NULLS EQUAL booleanLiteral # fieldsummaryNulls - ; - fillNullWithTheSameValue : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList ; @@ -205,6 +196,14 @@ mlArg : (argName = ident EQUAL argValue = literalValue) ; +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* + ; + +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + ; + // clauses fromClause : SOURCE EQUAL tableSourceClause diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index c3c31ee2e1b..6001eb2ddea 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; @@ -437,6 +438,11 @@ public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommand .orElse(new Trendline(Optional.empty(), trendlineComputations)); } + @Override + public UnresolvedPlan visitFieldsummaryCommand(OpenSearchPPLParser.FieldsummaryCommandContext ctx) { + return new FieldSummary(ctx.fieldsummaryParameter().stream().map(arg -> expressionBuilder.visit(arg)).collect(Collectors.toList())); + } + /** Get original text in query. */ private String getTextInQuery(ParserRuleContext ctx) { Token start = ctx.getStart(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index 5a7522683a1..f71a47bcab2 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -174,6 +174,14 @@ public UnresolvedExpression visitStatsFunctionCall(StatsFunctionCallContext ctx) return new AggregateFunction(ctx.statsFunctionName().getText(), visit(ctx.valueExpression())); } + @Override + public UnresolvedExpression visitFieldsummaryIncludeFields(OpenSearchPPLParser.FieldsummaryIncludeFieldsContext ctx) { + List list = ctx.fieldList().fieldExpression().stream() + .map(this::visitFieldExpression) + .collect(Collectors.toList()); + return new AttributeList(list); + } + @Override public UnresolvedExpression visitCountAllFunctionCall(CountAllFunctionCallContext ctx) { return new AggregateFunction("count", AllFields.of()); From 3a4e7c4e9919de253dbad8dfdeac29b39ec5a440 Mon Sep 17 00:00:00 2001 From: Kenrick Yap Date: Wed, 12 Feb 2025 09:13:31 -0800 Subject: [PATCH 3/3] doc tests + initial unit tests Signed-off-by: Kenrick Yap --- .../org/opensearch/sql/analysis/Analyzer.java | 36 ++++--- .../org/opensearch/sql/ast/dsl/AstDSL.java | 4 +- .../sql/ast/expression/FieldList.java | 23 +++-- .../opensearch/sql/ast/tree/FieldSummary.java | 55 ++++++----- .../planner/logical/LogicalFieldSummary.java | 25 ++--- .../opensearch/sql/analysis/AnalyzerTest.java | 7 ++ docs/user/ppl/cmd/fields.rst | 2 +- docs/user/ppl/cmd/fieldsummary.rst | 56 +++++++++++ .../request/OpenSearchRequestBuilder.java | 26 +++-- .../agg/FieldSummaryAggregationParser.java | 94 ++++++++++--------- ...OpenSearchIndexScanAggregationBuilder.java | 9 +- .../scan/OpenSearchIndexScanBuilder.java | 6 +- .../aggregation/AggregationQueryBuilder.java | 13 +-- .../opensearch/sql/ppl/parser/AstBuilder.java | 8 +- .../sql/ppl/parser/AstExpressionBuilder.java | 6 +- .../sql/ppl/parser/AstBuilderTest.java | 13 +++ 16 files changed, 244 insertions(+), 139 deletions(-) create mode 100644 docs/user/ppl/cmd/fieldsummary.rst diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 53488ab763f..b7cc24c1686 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -34,7 +34,6 @@ import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; -import org.apache.commons.math3.analysis.function.Exp; import org.opensearch.sql.DataSourceSchemaName; import org.opensearch.sql.analysis.symbol.Namespace; import org.opensearch.sql.analysis.symbol.Symbol; @@ -84,9 +83,7 @@ import org.opensearch.sql.expression.LiteralExpression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; -import org.opensearch.sql.expression.aggregation.AggregationState; import org.opensearch.sql.expression.aggregation.Aggregator; -import org.opensearch.sql.expression.aggregation.AvgAggregator; import org.opensearch.sql.expression.aggregation.NamedAggregator; import org.opensearch.sql.expression.function.BuiltinFunctionName; import org.opensearch.sql.expression.function.BuiltinFunctionRepository; @@ -350,8 +347,23 @@ public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) TypeEnvironment env = context.peek(); Map fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME); + if (node.getIncludeFields() != null) { + List includeFields = + node.getIncludeFields().stream() + .map(expr -> ((Field) expr).getField().toString()) + .toList(); + + Map filteredFields = new HashMap<>(); + for (String field : includeFields) { + if (fieldsMap.containsKey(field)) { + filteredFields.put(field, fieldsMap.get(field)); + } + } + fieldsMap = filteredFields; + } + ImmutableList.Builder aggregatorBuilder = new ImmutableList.Builder<>(); - Map aggregatorToFieldNameMap = new HashMap(); + Map> aggregatorToFieldNameMap = new HashMap<>(); for (Map.Entry entry : fieldsMap.entrySet()) { ExprType fieldType = entry.getValue(); @@ -359,17 +371,18 @@ public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType); aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression))); - aggregatorToFieldNameMap.put("Count" + fieldName, fieldName); - aggregatorBuilder.add(new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression))); - aggregatorToFieldNameMap.put("Distinct" + fieldName, fieldName); + aggregatorToFieldNameMap.put("Count" + fieldName, entry); + aggregatorBuilder.add( + new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression))); + aggregatorToFieldNameMap.put("Distinct" + fieldName, entry); if (ExprCoreType.numberTypes().contains(fieldType)) { - aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression))); - aggregatorToFieldNameMap.put("Avg" + fieldName, fieldName); aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression))); - aggregatorToFieldNameMap.put("Max" + fieldName, fieldName); + aggregatorToFieldNameMap.put("Max" + fieldName, entry); aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression))); - aggregatorToFieldNameMap.put("Min" + fieldName, fieldName); + aggregatorToFieldNameMap.put("Min" + fieldName, entry); + aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression))); + aggregatorToFieldNameMap.put("Avg" + fieldName, entry); } } @@ -386,6 +399,7 @@ public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE); newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE); newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE); + newEnv.define(new Symbol(Namespace.FIELD_NAME, "Type"), ExprCoreType.STRING); return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap); } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 39917337bcb..44b783a5b6c 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -509,9 +509,7 @@ public static FillNull fillNull( } public static FieldSummary fieldSummary( - UnresolvedPlan input, - List includeFields - ) { + UnresolvedPlan input, List includeFields) { return new FieldSummary(includeFields).attach(input); } } diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java index 218aa7a9f0c..335b38ca331 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/FieldList.java @@ -6,29 +6,28 @@ package org.opensearch.sql.ast.expression; import com.google.common.collect.ImmutableList; +import java.util.List; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; -import java.util.List; - /** Expression node that includes a list of fields nodes. */ @Getter @ToString @EqualsAndHashCode(callSuper = false) @AllArgsConstructor public class FieldList extends UnresolvedExpression { - private final List fieldList; + private final List fieldList; - @Override - public List getChild() { - return ImmutableList.copyOf(fieldList); - } + @Override + public List getChild() { + return ImmutableList.copyOf(fieldList); + } - @Override - public R accept(AbstractNodeVisitor nodeVisitor, C context) { - return nodeVisitor.visitFieldList(this, context); - } -} \ No newline at end of file + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldList(this, context); + } +} diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java index aa2570bf7ed..da47b2b4bbd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java +++ b/core/src/main/java/org/opensearch/sql/ast/tree/FieldSummary.java @@ -5,45 +5,44 @@ package org.opensearch.sql.ast.tree; +import java.util.List; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; import org.opensearch.sql.ast.AbstractNodeVisitor; import org.opensearch.sql.ast.Node; -import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.AttributeList; import org.opensearch.sql.ast.expression.UnresolvedExpression; -import java.util.List; - @Getter @ToString @EqualsAndHashCode(callSuper = false) public class FieldSummary extends UnresolvedPlan { - private List includeFields; - private UnresolvedPlan child; - - public FieldSummary(List collect) { - collect.forEach(exp -> { - if (exp instanceof AttributeList) { - this.includeFields = ((AttributeList)exp).getAttrList(); - } + private UnresolvedPlan child; + private List includeFields; + + public FieldSummary(List collect) { + collect.forEach( + exp -> { + if (exp instanceof AttributeList) { + this.includeFields = ((AttributeList) exp).getAttrList(); + } }); - } - - @Override - public List getChild() { - return child == null ? List.of() : List.of(child); - } - - @Override - public R accept(AbstractNodeVisitor nodeVisitor, C context) { - return nodeVisitor.visitFieldSummary(this, context); - } - - @Override - public FieldSummary attach(UnresolvedPlan child) { - this.child = child; - return this; - } + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public R accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFieldSummary(this, context); + } + + @Override + public FieldSummary attach(UnresolvedPlan child) { + this.child = child; + return this; + } } diff --git a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java index 4ff18fc7f67..41cb1524e0c 100644 --- a/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java +++ b/core/src/main/java/org/opensearch/sql/planner/logical/LogicalFieldSummary.java @@ -1,27 +1,28 @@ package org.opensearch.sql.planner.logical; +import java.util.List; +import java.util.Map; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.aggregation.NamedAggregator; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - /** Logical Field Summary. */ @Getter @ToString @EqualsAndHashCode(callSuper = true) public class LogicalFieldSummary extends LogicalAggregation { - Map aggregationToFieldNameMap; + Map> aggregationToFieldNameMap; - public LogicalFieldSummary( - LogicalPlan child, List aggregatorList, List groupByList, Map aggregationToFieldNameMap) { - super(child, aggregatorList, groupByList); - this.aggregationToFieldNameMap = aggregationToFieldNameMap; - } -} \ No newline at end of file + public LogicalFieldSummary( + LogicalPlan child, + List aggregatorList, + List groupByList, + Map> aggregationToFieldNameMap) { + super(child, aggregatorList, groupByList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 3f4752aa2e3..742c6aa6c70 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -85,6 +85,7 @@ import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; @@ -1871,4 +1872,10 @@ public void visit_close_cursor() { () -> assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor())); } + + @Test + public void visit_fieldsummary() { + LogicalPlan actual = analyze(new FieldSummary(List.of())); + assertEquals() + } } diff --git a/docs/user/ppl/cmd/fields.rst b/docs/user/ppl/cmd/fields.rst index 32c3a665d75..f42e9bd74fe 100644 --- a/docs/user/ppl/cmd/fields.rst +++ b/docs/user/ppl/cmd/fields.rst @@ -11,7 +11,7 @@ fields Description ============ -| Using ``field`` command to keep or remove fields from the search result. +| Use ``field`` command to keep or remove fields from the search result. Syntax diff --git a/docs/user/ppl/cmd/fieldsummary.rst b/docs/user/ppl/cmd/fieldsummary.rst new file mode 100644 index 00000000000..47c610d4cfd --- /dev/null +++ b/docs/user/ppl/cmd/fieldsummary.rst @@ -0,0 +1,56 @@ +============= +fieldsummary +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +| Use ``fieldsummary`` command to provide summary statistics for all fields in the current result set. + +This command will: + - Calculate basic statistics for each field (count, distinct count and min, max, avg for numeric fields) + - Determine the data type of each field + +Syntax +============ +fieldsummary [includefields="[,]"] + +* includefields: optional. specify which fields to include in the summary. + +Example 1: Perform fieldsummary without additional fields +============================================== + +PPL query:: + + os> source=nyc_taxi | fieldsummary; + fetched rows / total rows = 4/4 + +--------------+-------+----------+--------------------+---------+--------+--------+ + | Field | Count | Distinct | Avg | Max | Min | Type | + |--------------+-------+----------|--------------------+---------+--------+--------| + | anomaly_type | 973 | 1 | | | | string | + | category | 973 | 2 | | | | string | + | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | + | timestamp | 973 | 973 | | | | date | + +--------------+-------+----------+--------------------+---------+--------+--------+ + +Example 2: Perform fieldsummary with includefields +============================================== + +PPL query:: + + os> source=accounts | fieldsummary includefields="category,value" ; + fetched rows / total rows = 2/2 + +--------------+-------+----------+--------------------+---------+--------+--------+ + | Field | Count | Distinct | Avg | Max | Min | Type | + |--------------+-------+----------|--------------------+---------+--------+--------| + | category | 973 | 2 | | | | string | + | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | + +--------------+-------+----------+--------------------+---------+--------+--------+ + + diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java index f6147770dcd..cbf74b8735e 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/OpenSearchRequestBuilder.java @@ -15,7 +15,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -291,15 +290,22 @@ public void pushTypeMapping(Map typeMapping) { } public void pushFieldSummaryTypeMapping() { - Map typeMapping = Map.of( - "Field", OpenSearchDataType.of(OpenSearchDataType.MappingType.Text), - "Count", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), - "Distinct", OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), - "Avg", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), - "Min", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), - "Max", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), - "Sum", OpenSearchDataType.of(OpenSearchDataType.MappingType.Double) - ); + Map typeMapping = + Map.of( + "Field", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Text), + "Count", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Distinct", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer), + "Avg", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Min", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Max", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Double), + "Type", + OpenSearchDataType.of(OpenSearchDataType.MappingType.Text)); exprValueFactory.extendTypeMapping(typeMapping); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java index 0a72416aae0..98294ef2ccd 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/agg/FieldSummaryAggregationParser.java @@ -15,80 +15,82 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; - import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.sql.data.type.ExprType; /** No Bucket Aggregation Parser which include only metric parsers. */ public class FieldSummaryAggregationParser implements OpenSearchAggregationResponseParser { - private final MetricParserHelper metricsParser; - private Map aggregationToFieldNameMap; - + private final MetricParserHelper metricsParser; + private Map> aggregationToFieldNameMap; - public FieldSummaryAggregationParser(List metricParserList, Map aggregationToFieldNameMap) { - metricParserList.addAll(Arrays.asList( + public FieldSummaryAggregationParser( + List metricParserList, + Map> aggregationToFieldNameMap) { + metricParserList.addAll( + Arrays.asList( new SingleValueParser("Field"), new SingleValueParser("Count"), new SingleValueParser("Distinct"), new SingleValueParser("Avg"), new SingleValueParser("Max"), new SingleValueParser("Min"), - new SingleValueParser("Sum") - )); - metricsParser = new MetricParserHelper(metricParserList); - this.aggregationToFieldNameMap = aggregationToFieldNameMap; - } + new SingleValueParser("Type"))); + metricsParser = new MetricParserHelper(metricParserList); + this.aggregationToFieldNameMap = aggregationToFieldNameMap; + } - @Override - public List> parse(Aggregations aggregations) { - List> summaryTable = new ArrayList<>(); + @Override + public List> parse(Aggregations aggregations) { + List> summaryTable = new ArrayList<>(); - Map> aggregationsByField = new HashMap<>(); + Map, List> aggregationsByField = new HashMap<>(); - for (Aggregation aggregation: aggregations.asList()) { - String aggregationName = aggregation.getName(); - String aggregationField = aggregationToFieldNameMap.get(aggregationName); + for (Aggregation aggregation : aggregations.asList()) { + String aggregationName = aggregation.getName(); + Map.Entry aggregationField = aggregationToFieldNameMap.get(aggregationName); - aggregationsByField.putIfAbsent(aggregationField, new ArrayList()); - aggregationsByField.get(aggregationField).add(aggregation); - } - - for (Map.Entry> entry: aggregationsByField.entrySet()) { - Map row = new HashMap<>(); - row.put("Field", entry.getKey()); - row.putAll(transformKeys(metricsParser.parse(new Aggregations(entry.getValue())))); - summaryTable.add(row); - } + aggregationsByField.putIfAbsent(aggregationField, new ArrayList()); + aggregationsByField.get(aggregationField).add(aggregation); + } - return summaryTable; + for (Map.Entry, List> entry : + aggregationsByField.entrySet()) { + Map row = new HashMap<>(); + row.put("Field", entry.getKey().getKey()); + row.putAll(transformKeys(metricsParser.parse(new Aggregations(entry.getValue())))); + row.put("Type", entry.getKey().getValue().toString()); + summaryTable.add(row); } - private static Map transformKeys(Map map) { - Map newMap = new HashMap<>(); + return summaryTable; + } - for (Map.Entry entry : map.entrySet()) { - String originalKey = entry.getKey(); - String newKey = extractAggregationType(originalKey); - newMap.put(newKey, entry.getValue()); - } + private static Map transformKeys(Map map) { + Map newMap = new HashMap<>(); - return newMap; + for (Map.Entry entry : map.entrySet()) { + String originalKey = entry.getKey(); + String newKey = extractAggregationType(originalKey); + newMap.put(newKey, entry.getValue()); } - private static String extractAggregationType(String key) { - String[] aggregationTypes = {"Count", "Avg", "Min", "Max", "Distinct", "Sum"}; + return newMap; + } - for (String type : aggregationTypes) { - if (key.startsWith(type)) { - return type; - } - } + private static String extractAggregationType(String key) { + String[] aggregationTypes = {"Count", "Avg", "Min", "Max", "Distinct"}; - return "Unknown"; + for (String type : aggregationTypes) { + if (key.startsWith(type)) { + return type; + } } + + return "Unknown"; + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java index 0c2b9862e7a..c91b767be55 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanAggregationBuilder.java @@ -13,6 +13,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.NamedExpression; import org.opensearch.sql.expression.ReferenceExpression; @@ -40,14 +41,13 @@ class OpenSearchIndexScanAggregationBuilder implements PushDownQueryBuilder { private final List groupByList; /** Is aggregation performing a fieldsumary aggregation */ - private Map aggregationToFieldNameMap; + private Map> aggregationToFieldNameMap; /** Sorting items pushed down. */ private List> sortList; OpenSearchIndexScanAggregationBuilder( - OpenSearchRequestBuilder requestBuilder, LogicalFieldSummary aggregation - ) { + OpenSearchRequestBuilder requestBuilder, LogicalFieldSummary aggregation) { this(requestBuilder, (LogicalAggregation) aggregation); aggregationToFieldNameMap = aggregation.getAggregationToFieldNameMap(); } @@ -64,7 +64,8 @@ public OpenSearchRequestBuilder build() { AggregationQueryBuilder builder = new AggregationQueryBuilder(new DefaultExpressionSerializer()); Pair, OpenSearchAggregationResponseParser> aggregationBuilder = - builder.buildAggregationBuilder(aggregatorList, groupByList, sortList, aggregationToFieldNameMap); + builder.buildAggregationBuilder( + aggregatorList, groupByList, sortList, aggregationToFieldNameMap); requestBuilder.pushDownAggregation(aggregationBuilder); requestBuilder.pushTypeMapping(builder.buildTypeMapping(aggregatorList, groupByList)); if (aggregationToFieldNameMap != null) { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java index b098963b9c9..9e4dba9436a 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java @@ -72,8 +72,10 @@ public boolean pushDownAggregation(LogicalAggregation aggregation) { // Switch to builder for aggregate query which has different push down logic // for later filter, sort and limit operator. - if (aggregation instanceof LogicalFieldSummary){ - delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), (LogicalFieldSummary) aggregation); + if (aggregation instanceof LogicalFieldSummary) { + delegate = + new OpenSearchIndexScanAggregationBuilder( + delegate.build(), (LogicalFieldSummary) aggregation); } else { delegate = new OpenSearchIndexScanAggregationBuilder(delegate.build(), aggregation); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java index 6a9e7952a89..9d4ceef4456 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/aggregation/AggregationQueryBuilder.java @@ -23,6 +23,7 @@ import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.sort.SortOrder; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.expression.Expression; import org.opensearch.sql.expression.ExpressionNodeVisitor; import org.opensearch.sql.expression.NamedExpression; @@ -62,7 +63,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { /** Build AggregationBuilder. */ public Pair, OpenSearchAggregationResponseParser> - buildAggregationBuilder( + buildAggregationBuilder( List namedAggregatorList, List groupByList, List> sortList) { @@ -75,17 +76,17 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { List namedAggregatorList, List groupByList, List> sortList, - Map aggregationToFieldNameMap) { + Map> aggregationToFieldNameMap) { final Pair> metrics = metricBuilder.build(namedAggregatorList); if (aggregationToFieldNameMap != null) { return Pair.of( - ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), - new FieldSummaryAggregationParser(metrics.getRight(), aggregationToFieldNameMap)); + ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), + new FieldSummaryAggregationParser(metrics.getRight(), aggregationToFieldNameMap)); } else if (groupByList.isEmpty()) { - // no bucket + // no bucket return Pair.of( ImmutableList.copyOf(metrics.getLeft().getAggregatorFactories()), new NoBucketAggregationParser(metrics.getRight())); @@ -107,7 +108,7 @@ public AggregationQueryBuilder(ExpressionSerializer serializer) { .collect(Collectors.toList()))) .subAggregations(metrics.getLeft()) .size(AGGREGATION_BUCKET_SIZE)), - new CompositeAggregationParser(metrics.getRight())); + new CompositeAggregationParser(metrics.getRight())); } } diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 6001eb2ddea..dd27f44e6c6 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -439,8 +439,12 @@ public UnresolvedPlan visitTrendlineCommand(OpenSearchPPLParser.TrendlineCommand } @Override - public UnresolvedPlan visitFieldsummaryCommand(OpenSearchPPLParser.FieldsummaryCommandContext ctx) { - return new FieldSummary(ctx.fieldsummaryParameter().stream().map(arg -> expressionBuilder.visit(arg)).collect(Collectors.toList())); + public UnresolvedPlan visitFieldsummaryCommand( + OpenSearchPPLParser.FieldsummaryCommandContext ctx) { + return new FieldSummary( + ctx.fieldsummaryParameter().stream() + .map(arg -> expressionBuilder.visit(arg)) + .collect(Collectors.toList())); } /** Get original text in query. */ diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java index f71a47bcab2..c78568925ee 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java @@ -175,8 +175,10 @@ public UnresolvedExpression visitStatsFunctionCall(StatsFunctionCallContext ctx) } @Override - public UnresolvedExpression visitFieldsummaryIncludeFields(OpenSearchPPLParser.FieldsummaryIncludeFieldsContext ctx) { - List list = ctx.fieldList().fieldExpression().stream() + public UnresolvedExpression visitFieldsummaryIncludeFields( + OpenSearchPPLParser.FieldsummaryIncludeFieldsContext ctx) { + List list = + ctx.fieldList().fieldExpression().stream() .map(this::visitFieldExpression) .collect(Collectors.toList()); return new AttributeList(list); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index c6f4ed20447..b0d79ee7dfb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -49,18 +49,21 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; +import java.util.List; import java.util.Optional; import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.AttributeList; import org.opensearch.sql.ast.expression.DataType; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.FieldSummary; import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; @@ -848,6 +851,16 @@ public void testShowDataSourcesCommand() { assertEqual("show datasources", relation(DATASOURCES_TABLE_NAME)); } + @Test + public void testFieldSummaryCommand() { + assertEqual("source=t | fieldsummary", new FieldSummary(List.of()).attach(relation("t"))); + + assertEqual( + "source=t | fieldsummary includefields = field_name", + new FieldSummary(List.of(new AttributeList(List.of(field("field_name"))))) + .attach(relation("t"))); + } + protected void assertEqual(String query, Node expectedPlan) { Node actualPlan = plan(query); assertEquals(expectedPlan, actualPlan);