-
Notifications
You must be signed in to change notification settings - Fork 181
#3026 PPL - fieldsummary command
#3320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,9 @@ | |
| import com.google.common.collect.ImmutableSet; | ||
| import java.util.ArrayList; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
| import java.util.stream.Collectors; | ||
|
|
@@ -40,7 +42,6 @@ | |
| import org.opensearch.sql.ast.expression.Field; | ||
| import org.opensearch.sql.ast.expression.Let; | ||
| import org.opensearch.sql.ast.expression.Literal; | ||
| import org.opensearch.sql.ast.expression.Map; | ||
| import org.opensearch.sql.ast.expression.ParseMethod; | ||
| import org.opensearch.sql.ast.expression.QualifiedName; | ||
| import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
|
@@ -50,6 +51,7 @@ | |
| import org.opensearch.sql.ast.tree.Dedupe; | ||
| import org.opensearch.sql.ast.tree.Eval; | ||
| import org.opensearch.sql.ast.tree.FetchCursor; | ||
| import org.opensearch.sql.ast.tree.FieldSummary; | ||
| import org.opensearch.sql.ast.tree.FillNull; | ||
| import org.opensearch.sql.ast.tree.Filter; | ||
| import org.opensearch.sql.ast.tree.Head; | ||
|
|
@@ -72,6 +74,7 @@ | |
| import org.opensearch.sql.common.antlr.SyntaxCheckException; | ||
| import org.opensearch.sql.data.model.ExprMissingValue; | ||
| import org.opensearch.sql.data.type.ExprCoreType; | ||
| import org.opensearch.sql.data.type.ExprType; | ||
| import org.opensearch.sql.datasource.DataSourceService; | ||
| import org.opensearch.sql.exception.SemanticCheckException; | ||
| import org.opensearch.sql.expression.DSL; | ||
|
|
@@ -93,6 +96,7 @@ | |
| import org.opensearch.sql.planner.logical.LogicalDedupe; | ||
| import org.opensearch.sql.planner.logical.LogicalEval; | ||
| import org.opensearch.sql.planner.logical.LogicalFetchCursor; | ||
| import org.opensearch.sql.planner.logical.LogicalFieldSummary; | ||
| import org.opensearch.sql.planner.logical.LogicalFilter; | ||
| import org.opensearch.sql.planner.logical.LogicalLimit; | ||
| import org.opensearch.sql.planner.logical.LogicalML; | ||
|
|
@@ -277,7 +281,7 @@ public LogicalPlan visitRename(Rename node, AnalysisContext context) { | |
| LogicalPlan child = node.getChild().get(0).accept(this, context); | ||
| ImmutableMap.Builder<ReferenceExpression, ReferenceExpression> renameMapBuilder = | ||
| new ImmutableMap.Builder<>(); | ||
| for (Map renameMap : node.getRenameList()) { | ||
| for (org.opensearch.sql.ast.expression.Map renameMap : node.getRenameList()) { | ||
| Expression origin = expressionAnalyzer.analyze(renameMap.getOrigin(), context); | ||
| // We should define the new target field in the context instead of analyze it. | ||
| if (renameMap.getTarget() instanceof Field) { | ||
|
|
@@ -336,6 +340,70 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) { | |
| return new LogicalAggregation(child, aggregators, groupBys); | ||
| } | ||
|
|
||
| @Override | ||
| public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) { | ||
| LogicalPlan child = node.getChild().getFirst().accept(this, context); | ||
|
|
||
| TypeEnvironment env = context.peek(); | ||
| Map<String, ExprType> fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME); | ||
|
|
||
| if (node.getIncludeFields() != null) { | ||
| List<String> includeFields = | ||
| node.getIncludeFields().stream() | ||
| .map(expr -> ((Field) expr).getField().toString()) | ||
| .toList(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can do your filter and collect to map in your stream, and then return a map. |
||
|
|
||
| Map<String, ExprType> filteredFields = new HashMap<>(); | ||
| for (String field : includeFields) { | ||
| if (fieldsMap.containsKey(field)) { | ||
| filteredFields.put(field, fieldsMap.get(field)); | ||
| } | ||
| } | ||
| fieldsMap = filteredFields; | ||
| } | ||
|
|
||
| ImmutableList.Builder<NamedAggregator> aggregatorBuilder = new ImmutableList.Builder<>(); | ||
| Map<String, Map.Entry<String, ExprType>> aggregatorToFieldNameMap = new HashMap<>(); | ||
|
|
||
| for (Map.Entry<String, ExprType> entry : fieldsMap.entrySet()) { | ||
| ExprType fieldType = entry.getValue(); | ||
| String fieldName = entry.getKey(); | ||
| ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType); | ||
|
|
||
| aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression))); | ||
| aggregatorToFieldNameMap.put("Count" + fieldName, entry); | ||
| aggregatorBuilder.add( | ||
| new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression))); | ||
| aggregatorToFieldNameMap.put("Distinct" + fieldName, entry); | ||
|
|
||
| if (ExprCoreType.numberTypes().contains(fieldType)) { | ||
| aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression))); | ||
| aggregatorToFieldNameMap.put("Max" + fieldName, entry); | ||
| aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression))); | ||
| aggregatorToFieldNameMap.put("Min" + fieldName, entry); | ||
| aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression))); | ||
| aggregatorToFieldNameMap.put("Avg" + fieldName, entry); | ||
| } | ||
| } | ||
|
|
||
| ImmutableList<NamedAggregator> aggregators = aggregatorBuilder.build(); | ||
| ImmutableList<NamedExpression> groupBys = new ImmutableList.Builder<NamedExpression>().build(); | ||
|
|
||
| // new context | ||
| context.push(); | ||
| TypeEnvironment newEnv = context.peek(); | ||
|
|
||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Field"), ExprCoreType.STRING); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider encapsulating all the types and lists for FieldSummary within a class, like a |
||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Count"), ExprCoreType.INTEGER); | ||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Distinct"), ExprCoreType.INTEGER); | ||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE); | ||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE); | ||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE); | ||
| newEnv.define(new Symbol(Namespace.FIELD_NAME, "Type"), ExprCoreType.STRING); | ||
|
|
||
| return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap); | ||
| } | ||
|
|
||
| /** Build {@link LogicalRareTopN}. */ | ||
| @Override | ||
| public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.ast.expression; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import java.util.List; | ||
| import lombok.AllArgsConstructor; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
|
|
||
| /** Expression node that includes a list of fields nodes. */ | ||
| @Getter | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = false) | ||
| @AllArgsConstructor | ||
| public class FieldList extends UnresolvedExpression { | ||
| private final List<Field> fieldList; | ||
|
|
||
| @Override | ||
| public List<UnresolvedExpression> getChild() { | ||
| return ImmutableList.copyOf(fieldList); | ||
| } | ||
|
|
||
| @Override | ||
| public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) { | ||
| return nodeVisitor.visitFieldList(this, context); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.ast.tree; | ||
|
|
||
| import java.util.List; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.ast.AbstractNodeVisitor; | ||
| import org.opensearch.sql.ast.Node; | ||
| import org.opensearch.sql.ast.expression.AttributeList; | ||
| import org.opensearch.sql.ast.expression.UnresolvedExpression; | ||
|
|
||
| @Getter | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = false) | ||
| public class FieldSummary extends UnresolvedPlan { | ||
| private UnresolvedPlan child; | ||
| private List<UnresolvedExpression> includeFields; | ||
|
|
||
| public FieldSummary(List<UnresolvedExpression> collect) { | ||
| collect.forEach( | ||
| exp -> { | ||
| if (exp instanceof AttributeList) { | ||
| this.includeFields = ((AttributeList) exp).getAttrList(); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| @Override | ||
| public List<? extends Node> getChild() { | ||
| return child == null ? List.of() : List.of(child); | ||
| } | ||
|
|
||
| @Override | ||
| public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) { | ||
| return nodeVisitor.visitFieldSummary(this, context); | ||
| } | ||
|
|
||
| @Override | ||
| public FieldSummary attach(UnresolvedPlan child) { | ||
| this.child = child; | ||
| return this; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| package org.opensearch.sql.planner.logical; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Map; | ||
| import lombok.EqualsAndHashCode; | ||
| import lombok.Getter; | ||
| import lombok.ToString; | ||
| import org.opensearch.sql.data.type.ExprType; | ||
| import org.opensearch.sql.expression.NamedExpression; | ||
| import org.opensearch.sql.expression.aggregation.NamedAggregator; | ||
|
|
||
| /** Logical Field Summary. */ | ||
| @Getter | ||
| @ToString | ||
| @EqualsAndHashCode(callSuper = true) | ||
| public class LogicalFieldSummary extends LogicalAggregation { | ||
|
|
||
| Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap; | ||
|
|
||
| public LogicalFieldSummary( | ||
| LogicalPlan child, | ||
| List<NamedAggregator> aggregatorList, | ||
| List<NamedExpression> groupByList, | ||
| Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap) { | ||
| super(child, aggregatorList, groupByList); | ||
| this.aggregationToFieldNameMap = aggregationToFieldNameMap; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| ============= | ||
| fieldsummary | ||
| ============= | ||
|
|
||
| .. rubric:: Table of contents | ||
|
|
||
| .. contents:: | ||
| :local: | ||
| :depth: 2 | ||
|
|
||
|
|
||
| Description | ||
| ============ | ||
| | Use ``fieldsummary`` command to provide summary statistics for all fields in the current result set. | ||
|
|
||
| This command will: | ||
| - Calculate basic statistics for each field (count, distinct count and min, max, avg for numeric fields) | ||
| - Determine the data type of each field | ||
|
|
||
| Syntax | ||
| ============ | ||
| fieldsummary [includefields="<field_name>[,<field_name>]"] | ||
|
|
||
| * includefields: optional. specify which fields to include in the summary. | ||
|
|
||
| Example 1: Perform fieldsummary without additional fields | ||
| ============================================== | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=nyc_taxi | fieldsummary; | ||
| fetched rows / total rows = 4/4 | ||
| +--------------+-------+----------+--------------------+---------+--------+--------+ | ||
| | Field | Count | Distinct | Avg | Max | Min | Type | | ||
| |--------------+-------+----------|--------------------+---------+--------+--------| | ||
| | anomaly_type | 973 | 1 | | | | string | | ||
| | category | 973 | 2 | | | | string | | ||
| | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | | ||
| | timestamp | 973 | 973 | | | | date | | ||
| +--------------+-------+----------+--------------------+---------+--------+--------+ | ||
|
|
||
| Example 2: Perform fieldsummary with includefields | ||
| ============================================== | ||
|
|
||
| PPL query:: | ||
|
|
||
| os> source=accounts | fieldsummary includefields="category,value" ; | ||
| fetched rows / total rows = 2/2 | ||
| +--------------+-------+----------+--------------------+---------+--------+--------+ | ||
| | Field | Count | Distinct | Avg | Max | Min | Type | | ||
| |--------------+-------+----------|--------------------+---------+--------+--------| | ||
| | category | 973 | 2 | | | | string | | ||
| | value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float | | ||
| +--------------+-------+----------+--------------------+---------+--------+--------+ | ||
|
|
||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
consider: for each key in fieldsMap, check to see if it's in includedFields, and if not, remove it from the map.