diff --git a/docs/3_building_queries.md b/docs/3_building_queries.md index d8207b0..da33de6 100644 --- a/docs/3_building_queries.md +++ b/docs/3_building_queries.md @@ -22,6 +22,8 @@ A Query Builder can be initialized by calling one of these methods on a TypedTab - where - select - join +- groupby +- having - cache e.g. `Person.where(...)` -> `QueryBuilder[Person]` @@ -155,6 +157,35 @@ Person.join('articles', method='inner') # will only yield persons that have rel For more details about relationships and joins, see [4. Relationships](./4_relationships.md). +### groupby & having + +Group query results by one or more fields, typically used with aggregate functions like `count()`, `sum()`, `avg()`, etc. +Use `having` to filter the grouped results based on aggregate conditions. + +```python +# Basic grouping: count articles per author +Article.select(Article.author, Article.id.count().with_alias("article_count")) + .groupby(Article.author) + .collect() + +# Group by multiple fields +Sale.select(Sale.product, Sale.region, Sale.amount.sum().with_alias("total")) + .groupby(Sale.product, Sale.region) + .collect() + +# Filter groups with having: only authors with more than 5 articles +Article.select(Article.author, Article.id.count().with_alias("article_count")) + .groupby(Article.author) + .having(Article.id.count() > 5) + .collect() + +# Can be chained in any order +School.groupby(School.id) + .having(Team.id.count() > 0) + .select(School.id, Team.id.count()) + .collect() +``` + ### cache ```python diff --git a/src/typedal/caching.py b/src/typedal/caching.py index 05175ca..ba18ec9 100644 --- a/src/typedal/caching.py +++ b/src/typedal/caching.py @@ -12,6 +12,7 @@ from pydal.objects import Field, Rows, Set from .fields import TypedField +from .helpers import throw from .rows import TypedRows from .tables import TypedTable from .types import CacheStatus, Query @@ -177,8 +178,12 @@ def clear_cache() -> None: Immediately commits """ + db: TypeDAL = _TypedalCache._db or throw( + RuntimeError("@define or db.define is not called on typedal caching classes yet!") + ) + _TypedalCache.truncate("RESTART IDENTITY CASCADE") - _TypedalCache._db.commit() + db.commit() def clear_expired() -> int: diff --git a/src/typedal/cli.py b/src/typedal/cli.py index ae6c7a0..d18204a 100644 --- a/src/typedal/cli.py +++ b/src/typedal/cli.py @@ -392,7 +392,8 @@ def fake_migrations( previously_migrated = ( db( - db.ewh_implemented_features.name.belongs(to_fake) & (db.ewh_implemented_features.installed == True) # noqa E712 + db.ewh_implemented_features.name.belongs(to_fake) + & (db.ewh_implemented_features.installed == True) # noqa E712 ) .select(db.ewh_implemented_features.name) .column("name") diff --git a/src/typedal/core.py b/src/typedal/core.py index 417948e..b753525 100644 --- a/src/typedal/core.py +++ b/src/typedal/core.py @@ -4,6 +4,7 @@ from __future__ import annotations +import datetime as dt import sys import typing as t import warnings diff --git a/src/typedal/query_builder.py b/src/typedal/query_builder.py index 1f1d760..296ed86 100644 --- a/src/typedal/query_builder.py +++ b/src/typedal/query_builder.py @@ -70,8 +70,7 @@ def __init__( """ self.model = model table = self._ensure_table_defined() - - default_query = table.id > 0 + default_query: Query = table.id > 0 self.query = add_query or default_query self.select_args = select_args or [] self.select_kwargs = select_kwargs or {} @@ -111,7 +110,7 @@ def __bool__(self) -> bool: Querybuilder is truthy if it has t.Any conditions. """ table = self._ensure_table_defined() - default_query = table.id > 0 + default_query: Query = table.id > 0 return any( [ self.query != default_query, @@ -183,6 +182,31 @@ def orderby(self, *fields: OrderBy) -> "QueryBuilder[T_MetaInstance]": """ return self.select(orderby=fields) + def groupby(self, *fields: t.Any) -> "QueryBuilder[T_MetaInstance]": + """ + Group the query results by specified fields. + + Args: + fields: Field(s) to group by (e.g., Table.column) + + Returns: + QueryBuilder: A new QueryBuilder instance with grouping applied. + """ + groupby_value = fields[0] if len(fields) == 1 else fields + return self.select(groupby=groupby_value) + + def having(self, condition: t.Any) -> "QueryBuilder[T_MetaInstance]": + """ + Filter grouped query results based on aggregate conditions. + + Args: + condition: Query condition for filtering groups (e.g., Team.id.count() > 0) + + Returns: + QueryBuilder: A new QueryBuilder instance with having condition applied. + """ + return self.select(having=condition) + def where( self, *queries_or_lambdas: Query | t.Callable[[t.Type[T_MetaInstance]], Query] | dict[str, t.Any], @@ -524,7 +548,7 @@ def execute(self, add_id: bool = False) -> Rows: Raw version of .collect which only executes the SQL, without performing t.Any magic afterwards. """ db = self._get_db() - metadata = self.metadata.copy() + metadata: Metadata = self.metadata.copy() query, select_args, select_kwargs = self._before_query(metadata, add_id=add_id) @@ -552,7 +576,7 @@ def collect( for fn_before in db._before_collect: fn_before(self) - metadata = self.metadata.copy() + metadata: Metadata = self.metadata.copy() if metadata.get("cache", {}).get("enabled") and (result := self._collect_cached(metadata)): return result diff --git a/src/typedal/tables.py b/src/typedal/tables.py index bc9d296..87c30f1 100644 --- a/src/typedal/tables.py +++ b/src/typedal/tables.py @@ -326,6 +326,18 @@ def orderby(self: t.Type[T_MetaInstance], *fields: OrderBy) -> "QueryBuilder[T_M """ return QueryBuilder(self).orderby(*fields) + def groupby(self: t.Type[T_MetaInstance], *fields: t.Any) -> "QueryBuilder[T_MetaInstance]": + """ + See QueryBuilder.groupby! + """ + return QueryBuilder(self).groupby(*fields) + + def having(self: t.Type[T_MetaInstance], condition: t.Any) -> "QueryBuilder[T_MetaInstance]": + """ + See QueryBuilder.having! + """ + return QueryBuilder(self).having(condition) + def cache(self: t.Type[T_MetaInstance], *deps: t.Any, **kwargs: t.Any) -> "QueryBuilder[T_MetaInstance]": """ See QueryBuilder.cache! diff --git a/src/typedal/types.py b/src/typedal/types.py index 3c5fec9..4e2e3dc 100644 --- a/src/typedal/types.py +++ b/src/typedal/types.py @@ -220,6 +220,8 @@ class SelectKwargs(t.TypedDict, total=False): join: t.Optional[list[Expression]] left: t.Optional[list[Expression]] orderby: "OrderBy | t.Iterable[OrderBy] | None" + groupby: "GroupBy | t.Iterable[GroupBy] | None" + having: "Having | None" limitby: t.Optional[tuple[int, int]] distinct: bool | Field | Expression orderby_on_limitby: bool @@ -323,5 +325,7 @@ class FieldSettings(t.TypedDict, total=False): CacheTuple = tuple[CacheModel, int] OrderBy: t.TypeAlias = str | Expression +GroupBy: t.TypeAlias = Field | Expression +Having: t.TypeAlias = Query | Expression T_annotation = t.Type[t.Any] | types.UnionType diff --git a/tests/test_query_builder.py b/tests/test_query_builder.py index f5f981e..5b368d3 100644 --- a/tests/test_query_builder.py +++ b/tests/test_query_builder.py @@ -45,25 +45,6 @@ def test_query_type(): assert isinstance(TestQueryTable.number != 3, Query) -""" -SELECT "test_query_table"."id" - , "test_query_table"."number" - , "relations_8106139955393"."id" - , "relations_8106139955393"."name" - , "relations_8106139955393"."value" - , "relations_8106139955393"."querytable" - FROM "test_query_table" - LEFT JOIN "test_relationship" AS "relations_8106139955393" - ON ("relations_8106139955393"."querytable" = "test_query_table"."id") - WHERE ("test_query_table"."id" IN (SELECT "test_query_table"."id" - FROM "test_query_table" - WHERE ("test_query_table"."id" > 0) - ORDER BY "test_query_table"."id" - LIMIT 3 OFFSET 0)) - ORDER BY "test_query_table"."number" DESC; -""" - - def _setup_data(): TestQueryTable.truncate() first = TestQueryTable.insert(number=0) @@ -562,3 +543,131 @@ def print_duration(_qb: QueryBuilder, rows, _raw): db._after_collect.append(print_duration) TestQueryTable.all() + + +def test_groupby_basic(): + """Test basic groupby with count aggregation.""" + _setup_data() + + result = TestRelationship.select( + TestRelationship.querytable.with_alias("query_table"), + TestRelationship.querytable.count().with_alias("count"), + ).groupby(TestRelationship.querytable).execute() + + assert len(result) == 2 + for row in result: + assert row["count"] == 4 + + +def test_groupby_multiple_fields(): + """Test grouping by multiple fields.""" + _setup_data() + + result = TestRelationship.select( + TestRelationship.querytable, + TestRelationship.value, + TestRelationship.id.count().with_alias("count"), + ).groupby(TestRelationship.querytable, TestRelationship.value).execute() + + # Should group by combination of querytable and value + assert len(result) > 0 + + +def test_groupby_with_having(): + """Test groupby with having to filter groups.""" + _setup_data() + + result = TestRelationship.select( + TestRelationship.querytable.with_alias("query_table"), + TestRelationship.querytable.count().with_alias("count"), + ).groupby(TestRelationship.querytable).having(TestRelationship.querytable.count() > 3).execute() + + # Only groups with count > 3 + assert len(result) == 2 + for row in result: + assert row["count"] > 3 + + +def test_having_filters_aggregates(): + """Test that having properly filters based on aggregate conditions.""" + _setup_data() + + # Get all groups + all_groups = TestRelationship.select( + TestRelationship.querytable, + TestRelationship.querytable.count().with_alias("count"), + ).groupby(TestRelationship.querytable).execute() + + # Filter with having + filtered = TestRelationship.select( + TestRelationship.querytable, + TestRelationship.querytable.count().with_alias("count"), + ).groupby(TestRelationship.querytable).having(TestRelationship.querytable.count() > 10).execute() + + # Should have fewer results (or zero if no groups have count > 10) + assert len(filtered) <= len(all_groups) + + +def test_groupby_to_sql(): + """Verify SQL generation includes GROUP BY.""" + sql = TestRelationship.select( + TestRelationship.querytable, TestRelationship.querytable.count() + ).groupby(TestRelationship.querytable).to_sql() + + assert "GROUP BY" in sql + + +def test_having_to_sql(): + """Verify SQL generation includes HAVING.""" + sql = ( + TestRelationship.select(TestRelationship.querytable, TestRelationship.querytable.count()) + .groupby(TestRelationship.querytable) + .having(TestRelationship.querytable.count() > 0) + .to_sql() + ) + + assert "GROUP BY" in sql + assert "HAVING" in sql + + +def test_groupby_chaining(): + """Test that multiple groupby calls work (last one should win).""" + _setup_data() + + # First groupby by querytable + builder1 = TestRelationship.select( + TestRelationship.querytable, TestRelationship.querytable.count().with_alias("count") + ).groupby(TestRelationship.querytable) + + # Then groupby by value (should override) + builder2 = builder1.groupby(TestRelationship.value) + + sql = builder2.to_sql() + # Should only have the second groupby + assert "GROUP BY" in sql + + +def test_groupby_having_on_table_class(): + """Test calling .groupby() and .having() directly on table class in different orders.""" + _setup_data() + + builder1 = ( + TestRelationship.groupby(TestRelationship.querytable) + .having(TestRelationship.querytable.count() > 0) + .select(TestRelationship.querytable, TestRelationship.querytable.count()) + ) + + sql1 = builder1.to_sql() + + builder2 = ( + TestRelationship.having(TestRelationship.querytable.count() > 0) + .groupby(TestRelationship.querytable) + .select(TestRelationship.querytable, TestRelationship.querytable.count()) + ) + sql2 = builder2.to_sql() + + assert sql1 == sql2 + assert "GROUP BY" in sql1 + assert "HAVING" in sql1 + + assert builder1.execute() == builder2.execute()