Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 70 additions & 2 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
import com.google.common.collect.ImmutableSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
Expand All @@ -40,7 +42,6 @@
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Let;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.Map;
import org.opensearch.sql.ast.expression.ParseMethod;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
Expand All @@ -50,6 +51,7 @@
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FieldSummary;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
Expand All @@ -72,6 +74,7 @@
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprMissingValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.datasource.DataSourceService;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
Expand All @@ -93,6 +96,7 @@
import org.opensearch.sql.planner.logical.LogicalDedupe;
import org.opensearch.sql.planner.logical.LogicalEval;
import org.opensearch.sql.planner.logical.LogicalFetchCursor;
import org.opensearch.sql.planner.logical.LogicalFieldSummary;
import org.opensearch.sql.planner.logical.LogicalFilter;
import org.opensearch.sql.planner.logical.LogicalLimit;
import org.opensearch.sql.planner.logical.LogicalML;
Expand Down Expand Up @@ -277,7 +281,7 @@ public LogicalPlan visitRename(Rename node, AnalysisContext context) {
LogicalPlan child = node.getChild().get(0).accept(this, context);
ImmutableMap.Builder<ReferenceExpression, ReferenceExpression> renameMapBuilder =
new ImmutableMap.Builder<>();
for (Map renameMap : node.getRenameList()) {
for (org.opensearch.sql.ast.expression.Map renameMap : node.getRenameList()) {
Expression origin = expressionAnalyzer.analyze(renameMap.getOrigin(), context);
// We should define the new target field in the context instead of analyze it.
if (renameMap.getTarget() instanceof Field) {
Expand Down Expand Up @@ -336,6 +340,70 @@ public LogicalPlan visitAggregation(Aggregation node, AnalysisContext context) {
return new LogicalAggregation(child, aggregators, groupBys);
}

@Override
public LogicalPlan visitFieldSummary(FieldSummary node, AnalysisContext context) {
LogicalPlan child = node.getChild().getFirst().accept(this, context);

TypeEnvironment env = context.peek();
Map<String, ExprType> fieldsMap = env.lookupAllFields(Namespace.FIELD_NAME);

if (node.getIncludeFields() != null) {
List<String> includeFields =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider: for each key in fieldsMap, check to see if it's in includedFields, and if not, remove it from the map.

node.getIncludeFields().stream()
.map(expr -> ((Field) expr).getField().toString())
.toList();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can do your filter and collect to map in your stream, and then return a map.


Map<String, ExprType> filteredFields = new HashMap<>();
for (String field : includeFields) {
if (fieldsMap.containsKey(field)) {
filteredFields.put(field, fieldsMap.get(field));
}
}
fieldsMap = filteredFields;
}

ImmutableList.Builder<NamedAggregator> aggregatorBuilder = new ImmutableList.Builder<>();
Map<String, Map.Entry<String, ExprType>> aggregatorToFieldNameMap = new HashMap<>();

for (Map.Entry<String, ExprType> entry : fieldsMap.entrySet()) {
ExprType fieldType = entry.getValue();
String fieldName = entry.getKey();
ReferenceExpression fieldExpression = DSL.ref(fieldName, fieldType);

aggregatorBuilder.add(new NamedAggregator("Count" + fieldName, DSL.count(fieldExpression)));
aggregatorToFieldNameMap.put("Count" + fieldName, entry);
aggregatorBuilder.add(
new NamedAggregator("Distinct" + fieldName, DSL.distinctCount(fieldExpression)));
aggregatorToFieldNameMap.put("Distinct" + fieldName, entry);

if (ExprCoreType.numberTypes().contains(fieldType)) {
aggregatorBuilder.add(new NamedAggregator("Max" + fieldName, DSL.max(fieldExpression)));
aggregatorToFieldNameMap.put("Max" + fieldName, entry);
aggregatorBuilder.add(new NamedAggregator("Min" + fieldName, DSL.min(fieldExpression)));
aggregatorToFieldNameMap.put("Min" + fieldName, entry);
aggregatorBuilder.add(new NamedAggregator("Avg" + fieldName, DSL.avg(fieldExpression)));
aggregatorToFieldNameMap.put("Avg" + fieldName, entry);
}
}

ImmutableList<NamedAggregator> aggregators = aggregatorBuilder.build();
ImmutableList<NamedExpression> groupBys = new ImmutableList.Builder<NamedExpression>().build();

// new context
context.push();
TypeEnvironment newEnv = context.peek();

newEnv.define(new Symbol(Namespace.FIELD_NAME, "Field"), ExprCoreType.STRING);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider encapsulating all the types and lists for FieldSummary within a class, like a FieldSummaryAggregationExpression. Then you can define enums for each column, and builders for the analyzer and parser.

newEnv.define(new Symbol(Namespace.FIELD_NAME, "Count"), ExprCoreType.INTEGER);
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Distinct"), ExprCoreType.INTEGER);
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Avg"), ExprCoreType.DOUBLE);
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Max"), ExprCoreType.DOUBLE);
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Min"), ExprCoreType.DOUBLE);
newEnv.define(new Symbol(Namespace.FIELD_NAME, "Type"), ExprCoreType.STRING);

return new LogicalFieldSummary(child, aggregators, groupBys, aggregatorToFieldNameMap);
}

/** Build {@link LogicalRareTopN}. */
@Override
public LogicalPlan visitRareTopN(RareTopN node, AnalysisContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.opensearch.sql.ast.expression.Compare;
import org.opensearch.sql.ast.expression.EqualTo;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.FieldList;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.HighlightFunction;
import org.opensearch.sql.ast.expression.In;
Expand Down Expand Up @@ -45,6 +46,7 @@
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FieldSummary;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
Expand Down Expand Up @@ -326,4 +328,12 @@ public T visitCloseCursor(CloseCursor closeCursor, C context) {
public T visitFillNull(FillNull fillNull, C context) {
return visitChildren(fillNull, context);
}

public T visitFieldSummary(FieldSummary fieldSummary, C context) {
return visitChildren(fieldSummary, context);
}

public T visitFieldList(FieldList fieldList, C context) {
return visitChildren(fieldList, context);
}
}
6 changes: 6 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.sql.ast.tree.Aggregation;
import org.opensearch.sql.ast.tree.Dedupe;
import org.opensearch.sql.ast.tree.Eval;
import org.opensearch.sql.ast.tree.FieldSummary;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Filter;
import org.opensearch.sql.ast.tree.Head;
Expand Down Expand Up @@ -506,4 +507,9 @@ public static FillNull fillNull(
return new FillNull(
FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build()));
}

public static FieldSummary fieldSummary(
UnresolvedPlan input, List<UnresolvedExpression> includeFields) {
return new FieldSummary(includeFields).attach(input);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.expression;

import com.google.common.collect.ImmutableList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;

/** Expression node that includes a list of fields nodes. */
@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
@AllArgsConstructor
public class FieldList extends UnresolvedExpression {
private final List<Field> fieldList;

@Override
public List<UnresolvedExpression> getChild() {
return ImmutableList.copyOf(fieldList);
}

@Override
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitFieldList(this, context);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ast.tree;

import java.util.List;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.Node;
import org.opensearch.sql.ast.expression.AttributeList;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

@Getter
@ToString
@EqualsAndHashCode(callSuper = false)
public class FieldSummary extends UnresolvedPlan {
private UnresolvedPlan child;
private List<UnresolvedExpression> includeFields;

public FieldSummary(List<UnresolvedExpression> collect) {
collect.forEach(
exp -> {
if (exp instanceof AttributeList) {
this.includeFields = ((AttributeList) exp).getAttrList();
}
});
}

@Override
public List<? extends Node> getChild() {
return child == null ? List.of() : List.of(child);
}

@Override
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitFieldSummary(this, context);
}

@Override
public FieldSummary attach(UnresolvedPlan child) {
this.child = child;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.opensearch.sql.planner.logical;

import java.util.List;
import java.util.Map;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.aggregation.NamedAggregator;

/** Logical Field Summary. */
@Getter
@ToString
@EqualsAndHashCode(callSuper = true)
public class LogicalFieldSummary extends LogicalAggregation {

Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap;

public LogicalFieldSummary(
LogicalPlan child,
List<NamedAggregator> aggregatorList,
List<NamedExpression> groupByList,
Map<String, Map.Entry<String, ExprType>> aggregationToFieldNameMap) {
super(child, aggregatorList, groupByList);
this.aggregationToFieldNameMap = aggregationToFieldNameMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
import org.opensearch.sql.ast.tree.AD;
import org.opensearch.sql.ast.tree.CloseCursor;
import org.opensearch.sql.ast.tree.FetchCursor;
import org.opensearch.sql.ast.tree.FieldSummary;
import org.opensearch.sql.ast.tree.FillNull;
import org.opensearch.sql.ast.tree.Kmeans;
import org.opensearch.sql.ast.tree.ML;
Expand Down Expand Up @@ -1871,4 +1872,10 @@ public void visit_close_cursor() {
() ->
assertEquals("pewpew", ((LogicalFetchCursor) analyzed.getChild().get(0)).getCursor()));
}

@Test
public void visit_fieldsummary() {
LogicalPlan actual = analyze(new FieldSummary(List.of()));
assertEquals()
}
}
2 changes: 1 addition & 1 deletion docs/user/ppl/cmd/fields.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fields

Description
============
| Using ``field`` command to keep or remove fields from the search result.
| Use ``field`` command to keep or remove fields from the search result.


Syntax
Expand Down
56 changes: 56 additions & 0 deletions docs/user/ppl/cmd/fieldsummary.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
=============
fieldsummary
=============

.. rubric:: Table of contents

.. contents::
:local:
:depth: 2


Description
============
| Use ``fieldsummary`` command to provide summary statistics for all fields in the current result set.

This command will:
- Calculate basic statistics for each field (count, distinct count and min, max, avg for numeric fields)
- Determine the data type of each field

Syntax
============
fieldsummary [includefields="<field_name>[,<field_name>]"]

* includefields: optional. specify which fields to include in the summary.

Example 1: Perform fieldsummary without additional fields
==============================================

PPL query::

os> source=nyc_taxi | fieldsummary;
fetched rows / total rows = 4/4
+--------------+-------+----------+--------------------+---------+--------+--------+
| Field | Count | Distinct | Avg | Max | Min | Type |
|--------------+-------+----------|--------------------+---------+--------+--------|
| anomaly_type | 973 | 1 | | | | string |
| category | 973 | 2 | | | | string |
| value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float |
| timestamp | 973 | 973 | | | | date |
+--------------+-------+----------+--------------------+---------+--------+--------+

Example 2: Perform fieldsummary with includefields
==============================================

PPL query::

os> source=accounts | fieldsummary includefields="category,value" ;
fetched rows / total rows = 2/2
+--------------+-------+----------+--------------------+---------+--------+--------+
| Field | Count | Distinct | Avg | Max | Min | Type |
|--------------+-------+----------|--------------------+---------+--------+--------|
| category | 973 | 2 | | | | string |
| value | 973 | 953 | 14679.110996916752 | 29985.0 | 1769.0 | float |
+--------------+-------+----------+--------------------+---------+--------+--------+


Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,26 @@ public void pushTypeMapping(Map<String, OpenSearchDataType> typeMapping) {
exprValueFactory.extendTypeMapping(typeMapping);
}

public void pushFieldSummaryTypeMapping() {
Map<String, OpenSearchDataType> typeMapping =
Map.of(
"Field",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Text),
"Count",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
"Distinct",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Integer),
"Avg",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
"Min",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
"Max",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Double),
"Type",
OpenSearchDataType.of(OpenSearchDataType.MappingType.Text));
exprValueFactory.extendTypeMapping(typeMapping);
}

private boolean isSortByDocOnly() {
List<SortBuilder<?>> sorts = sourceBuilder.sorts();
if (sorts != null) {
Expand Down
Loading
Loading