diff --git a/README.md b/README.md index f3175be..21e9773 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ rdf4j-python bridges the gap between Python and the robust [Eclipse RDF4J](https - **Async-First Design**: Native support for async/await with synchronous fallback - **Repository Management**: Create, access, and manage RDF4J repositories programmatically - **SPARQL Support**: Execute SELECT, ASK, CONSTRUCT, and UPDATE queries effortlessly +- **SPARQL Query Builder**: Fluent, programmatic query construction with method chaining - **Transaction Support**: Atomic operations with commit/rollback and isolation levels - **Flexible Data Handling**: Add, retrieve, and manipulate RDF triples and quads - **File Upload**: Upload RDF files (Turtle, N-Triples, N-Quads, RDF/XML, JSON-LD, TriG, N3) directly to repositories @@ -88,6 +89,62 @@ if __name__ == "__main__": asyncio.run(main()) ``` +### SPARQL Query Builder + +Build queries programmatically with method chaining instead of writing raw SPARQL strings: + +```python +from rdf4j_python import select, ask, construct, describe, GraphPattern, Namespace + +ex = Namespace("ex", "http://example.org/") +foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/") + +# SELECT with typed terms — IRIs serialize automatically +query = ( + select("?person", "?name") + .where("?person", foaf.type, ex.Person) + .where("?person", foaf.name, "?name") + .optional("?person", foaf.email, "?email") + .filter("?name != 'Bob'") + .order_by("?name") + .limit(10) + .build() +) + +# Or use string-based prefixed names +query = ( + select("?name") + .prefix("foaf", "http://xmlns.com/foaf/0.1/") + .where("?person", "a", "foaf:Person") + .where("?person", "foaf:name", "?name") + .build() +) + +# GROUP BY with aggregation +query = ( + select("?city", "(COUNT(?person) AS ?count)") + .where("?person", ex.city, "?city") + .group_by("?city") + .having("COUNT(?person) > 1") + .order_by("DESC(?count)") + .build() +) + +# ASK, CONSTRUCT, and DESCRIBE +ask_query = ask().where("?s", ex.name, "?name").build() + +construct_query = ( + construct(("?s", ex.fullName, "?name")) + .where("?s", ex.firstName, "?fname") + .bind("CONCAT(?fname, ' ', ?lname)", "?name") + .build() +) + +describe_query = describe(ex.alice).build() +``` + +The query builder supports FILTER, OPTIONAL, UNION, BIND, VALUES, sub-queries, DISTINCT, ORDER BY, GROUP BY, HAVING, LIMIT, and OFFSET. Both raw strings and typed objects (`IRI`, `Variable`, `Literal`, `Namespace`) work as terms. + ### Working with Multiple Graphs ```python @@ -155,15 +212,18 @@ async def advanced_example(): ] await repo.add_statements(statements) - # Complex SPARQL query - query = """ - PREFIX foaf: - SELECT ?name ?email WHERE { - ?person foaf:name ?name . - OPTIONAL { ?person foaf:email ?email } - } - ORDER BY ?name - """ + # Query with the fluent query builder + from rdf4j_python import select + from rdf4j_python.model._namespace import Namespace + + foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/") + query = ( + select("?name", "?email") + .where("?person", foaf.name, "?name") + .optional("?person", foaf.email, "?email") + .order_by("?name") + .build() + ) results = await repo.query(query) ``` @@ -256,6 +316,7 @@ For more detailed examples, see the [examples](examples/) directory. rdf4j_python/ ├── _driver/ # Core async driver implementation ├── model/ # Data models and configurations +├── query/ # SPARQL query builder ├── exception/ # Custom exceptions └── utils/ # Utility functions diff --git a/examples/README.md b/examples/README.md index 28b7553..1519ca4 100644 --- a/examples/README.md +++ b/examples/README.md @@ -74,7 +74,7 @@ python examples/delete_repository.py ### 🔍 Data Querying Examples #### `query_and_print.py` -Comprehensive example of SPARQL querying and result formatting: +Comprehensive example of SPARQL querying and result formatting using the fluent query builder: - SELECT queries with various clauses (FILTER, OPTIONAL, JOIN) - CONSTRUCT queries for data transformation - ASK queries for boolean checks @@ -82,6 +82,7 @@ Comprehensive example of SPARQL querying and result formatting: - Multiple result formatting options **Key Features:** +- Fluent SPARQL query builder (no raw query strings) - Automatic test data setup - Formatted table output - JSON-like result formatting @@ -92,8 +93,8 @@ Comprehensive example of SPARQL querying and result formatting: python examples/query_and_print.py ``` -#### `query.py` (Original) -Simple query example showing basic SPARQL execution. +#### `query.py` +Simple query example showing basic SPARQL execution using the query builder. **Usage:** ```bash @@ -112,6 +113,7 @@ End-to-end example demonstrating the full repository lifecycle: **Key Features:** - Multi-repository workflow +- Fluent SPARQL query builder with Namespace objects - Real-world data scenarios (customers, products, analytics) - Named graph usage - Comprehensive error handling and cleanup @@ -121,7 +123,7 @@ End-to-end example demonstrating the full repository lifecycle: python examples/complete_workflow.py ``` -#### `repo.py` (Original) +#### `repo.py` Basic repository creation and data insertion example. **Usage:** diff --git a/examples/complete_workflow.py b/examples/complete_workflow.py index d38d83d..171e730 100644 --- a/examples/complete_workflow.py +++ b/examples/complete_workflow.py @@ -13,7 +13,8 @@ import asyncio -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, select +from rdf4j_python.model._namespace import Namespace from rdf4j_python.model.repository_config import ( MemoryStoreConfig, RepositoryConfig, @@ -21,6 +22,9 @@ ) from rdf4j_python.model.term import IRI, Literal, Quad +# Define namespaces for query building +ecom = Namespace("ecom", "http://example.com/") + async def workflow_step_1_create_repositories(): """Step 1: Create multiple repositories with different configurations.""" @@ -199,14 +203,14 @@ async def workflow_step_3_query_data(): # Query 1: Customer information print("👥 Customer Information:") customer_repo = await db.get_repository("customer-data") - customer_query = """ - SELECT ?customer ?name ?email ?age WHERE { - ?customer ?name . - OPTIONAL { ?customer ?email } - OPTIONAL { ?customer ?age } - } - ORDER BY ?name - """ + customer_query = ( + select("?customer", "?name", "?email", "?age") + .where("?customer", ecom.name, "?name") + .optional("?customer", ecom.email, "?email") + .optional("?customer", ecom.age, "?age") + .order_by("?name") + .build() + ) customer_results = await customer_repo.query(customer_query) for result in customer_results: name = result["name"].value if result["name"] else "N/A" @@ -217,14 +221,14 @@ async def workflow_step_3_query_data(): # Query 2: Product catalog print("\n🛍️ Product Catalog:") product_repo = await db.get_repository("product-catalog") - product_query = """ - SELECT ?product ?name ?price ?category WHERE { - ?product ?name . - OPTIONAL { ?product ?price } - OPTIONAL { ?product ?category } - } - ORDER BY ?price - """ + product_query = ( + select("?product", "?name", "?price", "?category") + .where("?product", ecom.name, "?name") + .optional("?product", ecom.price, "?price") + .optional("?product", ecom.category, "?category") + .order_by("?price") + .build() + ) product_results = await product_repo.query(product_query) for result in product_results: name = result["name"].value if result["name"] else "N/A" @@ -235,14 +239,14 @@ async def workflow_step_3_query_data(): # Query 3: Purchase analytics print("\n📊 Purchase Analytics:") analytics_repo = await db.get_repository("analytics-data") - analytics_query = """ - SELECT ?purchase ?customer ?product ?date WHERE { - ?purchase ?customer . - ?purchase ?product . - OPTIONAL { ?purchase ?date } - } - ORDER BY ?date - """ + analytics_query = ( + select("?purchase", "?customer", "?product", "?date") + .where("?purchase", ecom.customer, "?customer") + .where("?purchase", ecom.product, "?product") + .optional("?purchase", ecom.date, "?date") + .order_by("?date") + .build() + ) analytics_results = await analytics_repo.query(analytics_query) for result in analytics_results: customer = result["customer"].value if result["customer"] else "N/A" diff --git a/examples/query.py b/examples/query.py index 64f9ec8..b29af5a 100644 --- a/examples/query.py +++ b/examples/query.py @@ -2,7 +2,7 @@ from pyoxigraph import QuerySolutions -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, select from rdf4j_python.model.term import IRI, Literal, Quad @@ -25,7 +25,11 @@ async def main(): ), ] ) - result = await repo.query("SELECT * WHERE { ?s ?p ?o }") + + # Build the query using the fluent query builder + query = select("?s", "?p", "?o").where("?s", "?p", "?o").build() + + result = await repo.query(query) assert isinstance(result, QuerySolutions) for solution in result: print(solution) diff --git a/examples/query_and_print.py b/examples/query_and_print.py index 6735889..cf2551b 100644 --- a/examples/query_and_print.py +++ b/examples/query_and_print.py @@ -3,6 +3,8 @@ This example demonstrates how to execute various types of SPARQL queries against RDF4J repositories and format the results in different ways. + +Queries are built using the fluent SPARQL query builder. """ import asyncio @@ -10,7 +12,8 @@ from pyoxigraph import QuerySolutions, QueryTriples -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, ask, construct, select +from rdf4j_python.model._namespace import Namespace from rdf4j_python.model.repository_config import ( MemoryStoreConfig, RepositoryConfig, @@ -18,6 +21,9 @@ ) from rdf4j_python.model.term import IRI, Literal, Quad +# Define namespaces for query building +ex = Namespace("ex", "http://example.org/") + async def setup_test_data(repo): """Add some test data to the repository for querying.""" @@ -189,43 +195,43 @@ async def execute_select_queries(repo): print("=" * 50) # Query 1: Simple SELECT - get all people and their names - query1 = """ - SELECT ?person ?name WHERE { - ?person ?name . - } - """ + query1 = ( + select("?person", "?name") + .where("?person", ex.name, "?name") + .build() + ) result1 = await repo.query(query1) print_select_results(result1, "All People and Their Names") # Query 2: SELECT with FILTER - people older than 30 - query2 = """ - SELECT ?person ?name ?age WHERE { - ?person ?name . - ?person ?age . - FILTER(?age > 30) - } - """ + query2 = ( + select("?person", "?name", "?age") + .where("?person", ex.name, "?name") + .where("?person", ex.age, "?age") + .filter("?age > 30") + .build() + ) result2 = await repo.query(query2) print_select_results(result2, "People Older Than 30") # Query 3: SELECT with OPTIONAL - people and their email (if available) - query3 = """ - SELECT ?person ?name ?email WHERE { - ?person ?name . - OPTIONAL { ?person ?email } - } - """ + query3 = ( + select("?person", "?name", "?email") + .where("?person", ex.name, "?name") + .optional("?person", ex.email, "?email") + .build() + ) result3 = await repo.query(query3) print_select_results(result3, "People and Their Email Addresses") # Query 4: SELECT with JOIN - people and their employers - query4 = """ - SELECT ?person ?name ?company WHERE { - ?person ?name . - ?person ?org . - ?org ?company . - } - """ + query4 = ( + select("?person", "?name", "?company") + .where("?person", ex.name, "?name") + .where("?person", ex.worksFor, "?org") + .where("?org", ex.name, "?company") + .build() + ) result4 = await repo.query(query4) print_select_results(result4, "People and Their Employers") @@ -239,29 +245,25 @@ async def execute_construct_queries(repo): print("=" * 50) # Query 1: CONSTRUCT - create simplified person data - query1 = """ - CONSTRUCT { - ?person ?name ; - ?age . - } - WHERE { - ?person ?name . - ?person ?age . - } - """ + query1 = ( + construct( + ("?person", ex.hasName, "?name"), + ("?person", ex.hasAge, "?age"), + ) + .where("?person", ex.name, "?name") + .where("?person", ex.age, "?age") + .build() + ) result1 = await repo.query(query1) print_construct_results(result1, "Simplified Person Data") # Query 2: CONSTRUCT - create employment relationships - query2 = """ - CONSTRUCT { - ?person ?company . - } - WHERE { - ?person ?org . - ?org ?company . - } - """ + query2 = ( + construct(("?person", ex.employedBy, "?company")) + .where("?person", ex.worksFor, "?org") + .where("?org", ex.name, "?company") + .build() + ) result2 = await repo.query(query2) print_construct_results(result2, "Employment Relationships") @@ -272,30 +274,30 @@ async def execute_ask_queries(repo): print("=" * 50) # Query 1: ASK - check if Alice exists - query1 = """ - ASK { - ?person "Alice" . - } - """ + query1 = ( + ask() + .where("?person", ex.name, '"Alice"') + .build() + ) result1 = await repo.query(query1) print(f"Does Alice exist? {'✅ Yes' if result1 else '❌ No'}") # Query 2: ASK - check if anyone is older than 40 - query2 = """ - ASK { - ?person ?age . - FILTER(?age > 40) - } - """ + query2 = ( + ask() + .where("?person", ex.age, "?age") + .filter("?age > 40") + .build() + ) result2 = await repo.query(query2) print(f"Is anyone older than 40? {'✅ Yes' if result2 else '❌ No'}") # Query 3: ASK - check if there are any email addresses - query3 = """ - ASK { - ?person ?email . - } - """ + query3 = ( + ask() + .where("?person", ex.email, "?email") + .build() + ) result3 = await repo.query(query3) print(f"Are there any email addresses? {'✅ Yes' if result3 else '❌ No'}") @@ -306,29 +308,29 @@ async def execute_aggregate_queries(repo): print("=" * 50) # Query 1: COUNT - total number of people - query1 = """ - SELECT (COUNT(?person) AS ?totalPeople) WHERE { - ?person ?name . - } - """ + query1 = ( + select("(COUNT(?person) AS ?totalPeople)") + .where("?person", ex.name, "?name") + .build() + ) result1 = await repo.query(query1) print_select_results(result1, "Total Number of People") # Query 2: AVG - average age - query2 = """ - SELECT (AVG(?age) AS ?averageAge) WHERE { - ?person ?age . - } - """ + query2 = ( + select("(AVG(?age) AS ?averageAge)") + .where("?person", ex.age, "?age") + .build() + ) result2 = await repo.query(query2) print_select_results(result2, "Average Age") # Query 3: MIN/MAX - youngest and oldest person - query3 = """ - SELECT (MIN(?age) AS ?minAge) (MAX(?age) AS ?maxAge) WHERE { - ?person ?age . - } - """ + query3 = ( + select("(MIN(?age) AS ?minAge)", "(MAX(?age) AS ?maxAge)") + .where("?person", ex.age, "?age") + .build() + ) result3 = await repo.query(query3) print_select_results(result3, "Age Range (Min/Max)") diff --git a/rdf4j_python/__init__.py b/rdf4j_python/__init__.py index 2a65acb..f43a7b9 100644 --- a/rdf4j_python/__init__.py +++ b/rdf4j_python/__init__.py @@ -40,6 +40,7 @@ Triple, Variable, ) +from .query import GraphPattern, ask, construct, describe, select __all__ = [ # Main classes @@ -78,4 +79,10 @@ "Object", "Context", "QuadResultSet", + # Query builder + "select", + "ask", + "construct", + "describe", + "GraphPattern", ] diff --git a/rdf4j_python/query/__init__.py b/rdf4j_python/query/__init__.py new file mode 100644 index 0000000..a9be7af --- /dev/null +++ b/rdf4j_python/query/__init__.py @@ -0,0 +1,37 @@ +"""SPARQL query builder for RDF4J Python.""" + +from ._builder import AskQuery, ConstructQuery, DescribeQuery, SelectQuery +from ._pattern import GraphPattern + + +def select(*variables: str) -> SelectQuery: + """Create a new ``SELECT`` query builder.""" + return SelectQuery(*variables) + + +def ask() -> AskQuery: + """Create a new ``ASK`` query builder.""" + return AskQuery() + + +def construct(*templates: tuple) -> ConstructQuery: + """Create a new ``CONSTRUCT`` query builder.""" + return ConstructQuery(*templates) + + +def describe(*resources) -> DescribeQuery: + """Create a new ``DESCRIBE`` query builder.""" + return DescribeQuery(*resources) + + +__all__ = [ + "select", + "ask", + "construct", + "describe", + "GraphPattern", + "SelectQuery", + "AskQuery", + "ConstructQuery", + "DescribeQuery", +] diff --git a/rdf4j_python/query/_builder.py b/rdf4j_python/query/_builder.py new file mode 100644 index 0000000..dfb414f --- /dev/null +++ b/rdf4j_python/query/_builder.py @@ -0,0 +1,224 @@ +"""SPARQL query builder classes.""" + +from __future__ import annotations + +import copy +from typing import Any + +from rdf4j_python.model._namespace import Namespace + +from ._pattern import GraphPattern +from ._term import Term, serialize_term + + +class _QueryBase: + """Shared state and behaviour for all query builders. + + Provides prefix management, WHERE-clause delegation, ``copy()``, and + ``__str__()`` so concrete builders only need to implement ``build()``. + """ + + def __init__(self) -> None: + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + + # ── prefix handling ────────────────────────────────────────────── + + def prefix(self, name_or_ns: str | Namespace, uri: str | None = None) -> Any: + """Register a prefix. + + - ``prefix("ex", "http://example.org/")`` — string pair + - ``prefix(ns)`` — extract from a ``Namespace`` object + """ + if isinstance(name_or_ns, Namespace): + self._prefixes[name_or_ns.prefix] = name_or_ns.namespace.value + else: + if uri is None: + raise ValueError("uri is required when name_or_ns is a string") + self._prefixes[name_or_ns] = uri + return self + + def _render_prefixes(self) -> str: + return "\n".join(f"PREFIX {k}: <{v}>" for k, v in self._prefixes.items()) + + # ── WHERE-clause delegation ────────────────────────────────────── + + def where(self, s: Term, p: Term, o: Term) -> Any: + self._pattern.where(s, p, o) + return self + + def filter(self, expr: str) -> Any: + self._pattern.filter(expr) + return self + + def optional( + self, + s_or_pattern: Term | GraphPattern, + p: Term | None = None, + o: Term | None = None, + ) -> Any: + self._pattern.optional(s_or_pattern, p, o) + return self + + def union(self, *patterns: GraphPattern) -> Any: + self._pattern.union(*patterns) + return self + + def bind(self, expr: str, var: str) -> Any: + self._pattern.bind(expr, var) + return self + + def values(self, var: str, vals: list[Any]) -> Any: + self._pattern.values(var, vals) + return self + + def sub_query(self, builder: SelectQuery) -> Any: + self._pattern.sub_query(builder) + return self + + # ── common helpers ─────────────────────────────────────────────── + + def _render_where(self, parts: list[str]) -> None: + """Append ``WHERE { … }`` to *parts* if patterns exist.""" + if len(self._pattern) > 0: + parts.append("WHERE {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + + def _build_parts(self) -> list[str]: + """Return the prefix lines (if any) as a starting list.""" + parts: list[str] = [] + if self._prefixes: + parts.append(self._render_prefixes()) + return parts + + def copy(self): + return copy.deepcopy(self) + + def build(self) -> str: + raise NotImplementedError + + def __str__(self) -> str: + return self.build() + + +# ── concrete builders ──────────────────────────────────────────────── + + +class SelectQuery(_QueryBase): + """Builder for ``SELECT`` queries.""" + + def __init__(self, *variables: str) -> None: + super().__init__() + self._variables = list(variables) + self._distinct = False + self._order_by: list[str] = [] + self._group_by: list[str] = [] + self._having: str | None = None + self._limit: int | None = None + self._offset: int | None = None + + def distinct(self) -> SelectQuery: + self._distinct = True + return self + + def order_by(self, *exprs: str) -> SelectQuery: + self._order_by.extend(exprs) + return self + + def group_by(self, *exprs: str) -> SelectQuery: + self._group_by.extend(exprs) + return self + + def having(self, expr: str) -> SelectQuery: + self._having = expr + return self + + def limit(self, n: int) -> SelectQuery: + self._limit = n + return self + + def offset(self, n: int) -> SelectQuery: + self._offset = n + return self + + def build(self) -> str: + if not self._variables: + raise ValueError("SELECT query requires at least one variable") + if len(self._pattern) == 0: + raise ValueError("SELECT query requires at least one WHERE pattern") + + parts = self._build_parts() + + keyword = "SELECT DISTINCT" if self._distinct else "SELECT" + parts.append(f"{keyword} {' '.join(self._variables)}") + + self._render_where(parts) + + if self._group_by: + parts.append(f"GROUP BY {' '.join(self._group_by)}") + if self._having: + parts.append(f"HAVING ({self._having})") + if self._order_by: + parts.append(f"ORDER BY {' '.join(self._order_by)}") + if self._limit is not None: + parts.append(f"LIMIT {self._limit}") + if self._offset is not None: + parts.append(f"OFFSET {self._offset}") + + return "\n".join(parts) + + +class AskQuery(_QueryBase): + """Builder for ``ASK`` queries.""" + + def build(self) -> str: + if len(self._pattern) == 0: + raise ValueError("ASK query requires at least one WHERE pattern") + + parts = self._build_parts() + parts.append("ASK {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + return "\n".join(parts) + + +class ConstructQuery(_QueryBase): + """Builder for ``CONSTRUCT`` queries.""" + + def __init__(self, *templates: tuple[Term, Term, Term]) -> None: + super().__init__() + self._templates = list(templates) + + def build(self) -> str: + if not self._templates: + raise ValueError("CONSTRUCT query requires at least one template triple") + + parts = self._build_parts() + + parts.append("CONSTRUCT {") + for s, p, o in self._templates: + parts.append( + f" {serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ." + ) + parts.append("}") + + self._render_where(parts) + return "\n".join(parts) + + +class DescribeQuery(_QueryBase): + """Builder for ``DESCRIBE`` queries.""" + + def __init__(self, *resources: Term) -> None: + super().__init__() + self._resources = list(resources) + + def build(self) -> str: + if not self._resources: + raise ValueError("DESCRIBE query requires at least one resource") + + parts = self._build_parts() + parts.append(f"DESCRIBE {' '.join(serialize_term(r) for r in self._resources)}") + self._render_where(parts) + return "\n".join(parts) diff --git a/rdf4j_python/query/_pattern.py b/rdf4j_python/query/_pattern.py new file mode 100644 index 0000000..114ee57 --- /dev/null +++ b/rdf4j_python/query/_pattern.py @@ -0,0 +1,102 @@ +"""GraphPattern — composable SPARQL WHERE block.""" + +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING, Any + +from ._term import Term, serialize_term + +if TYPE_CHECKING: + from ._builder import SelectQuery + + +def _ensure_var(var: str) -> str: + """Prefix *var* with ``?`` if missing.""" + return var if var.startswith("?") else f"?{var}" + + +class GraphPattern: + """A composable block of SPARQL graph patterns. + + Every mutating method returns ``self`` for fluent chaining. + """ + + def __init__(self) -> None: + self._elements: list[str] = [] + + def where(self, s: Term, p: Term, o: Term) -> GraphPattern: + """Add a triple pattern.""" + self._elements.append( + f"{serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ." + ) + return self + + def filter(self, expr: str) -> GraphPattern: + """Add a ``FILTER(expr)`` clause.""" + self._elements.append(f"FILTER({expr})") + return self + + def optional( + self, + s_or_pattern: Term | GraphPattern, + p: Term | None = None, + o: Term | None = None, + ) -> GraphPattern: + """Add an ``OPTIONAL { … }`` block. + + - ``optional(s, p, o)`` — single triple shorthand + - ``optional(GraphPattern())`` — complex pattern block + """ + if isinstance(s_or_pattern, GraphPattern): + body = s_or_pattern.to_sparql(indent=4) + self._elements.append(f"OPTIONAL {{\n{body}\n }}") + else: + if p is None or o is None: + raise ValueError( + "optional() requires either a GraphPattern or three term arguments (s, p, o)" + ) + triple = f"{serialize_term(s_or_pattern)} {serialize_term(p)} {serialize_term(o)} ." + self._elements.append(f"OPTIONAL {{ {triple} }}") + return self + + def union(self, *patterns: GraphPattern) -> GraphPattern: + """Add ``{ … } UNION { … }`` blocks.""" + if len(patterns) < 2: + raise ValueError("union() requires at least two GraphPattern arguments") + parts = [] + for pat in patterns: + body = pat.to_sparql(indent=4) + parts.append(f"{{\n{body}\n }}") + self._elements.append(" UNION ".join(parts)) + return self + + def bind(self, expr: str, var: str) -> GraphPattern: + """Add a ``BIND(expr AS ?var)`` clause.""" + self._elements.append(f"BIND({expr} AS {_ensure_var(var)})") + return self + + def values(self, var: str, vals: list[Any]) -> GraphPattern: + """Add a ``VALUES ?var { … }`` clause.""" + serialized = " ".join(serialize_term(val) for val in vals) + self._elements.append(f"VALUES {_ensure_var(var)} {{ {serialized} }}") + return self + + def sub_query(self, builder: SelectQuery) -> GraphPattern: + """Embed a sub-SELECT inside this pattern.""" + inner = builder.build() + indented = "\n".join(f" {line}" for line in inner.splitlines()) + self._elements.append(f"{{\n{indented}\n }}") + return self + + def to_sparql(self, indent: int = 2) -> str: + """Render the pattern body (without the outer braces).""" + prefix = " " * indent + return "\n".join(f"{prefix}{el}" for el in self._elements) + + def copy(self) -> GraphPattern: + """Return a deep copy of this pattern.""" + return copy.deepcopy(self) + + def __len__(self) -> int: + return len(self._elements) diff --git a/rdf4j_python/query/_term.py b/rdf4j_python/query/_term.py new file mode 100644 index 0000000..631dd15 --- /dev/null +++ b/rdf4j_python/query/_term.py @@ -0,0 +1,35 @@ +"""Term serialization for SPARQL query building.""" + +from __future__ import annotations + +from typing import Union + +import pyoxigraph as og + +Term = Union[str, og.NamedNode, og.Variable, og.Literal, og.BlankNode] + +_XSD_STRING = "http://www.w3.org/2001/XMLSchema#string" + + +def serialize_term(term: Term) -> str: + """Convert a term to its SPARQL string representation. + + Strings are passed through as-is (variables, prefixed names, full IRIs, etc.). + Typed objects (NamedNode, Variable, Literal, BlankNode) are formatted accordingly. + """ + if isinstance(term, str): + return term + if isinstance(term, og.NamedNode): + return f"<{term.value}>" + if isinstance(term, og.Variable): + return f"?{term.value}" + if isinstance(term, og.Literal): + value = term.value.replace("\\", "\\\\").replace('"', '\\"') + if term.language: + return f'"{value}"@{term.language}' + if term.datatype and term.datatype.value != _XSD_STRING: + return f'"{value}"^^<{term.datatype.value}>' + return f'"{value}"' + if isinstance(term, og.BlankNode): + return f"_:{term.value}" + raise TypeError(f"Unsupported term type: {type(term)}") diff --git a/tests/test_query_builder.py b/tests/test_query_builder.py new file mode 100644 index 0000000..2df553f --- /dev/null +++ b/tests/test_query_builder.py @@ -0,0 +1,488 @@ +"""Tests for the SPARQL query builder.""" + +import pytest +import pyoxigraph as og + +from rdf4j_python.model._namespace import Namespace +from rdf4j_python.model.vocabulary import EXAMPLE as ex +from rdf4j_python.model.vocabulary import RDF +from rdf4j_python.query import ( + GraphPattern, + ask, + construct, + describe, + select, +) + + +# ── serialize_term ─────────────────────────────────────────────────── + + +class TestSerializeTerm: + def test_iri(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.NamedNode("http://example.org/Person")) == "" + + def test_variable(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Variable("name")) == "?name" + + def test_literal_plain(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal("hello")) == '"hello"' + + def test_literal_with_language(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal("hello", language="en")) == '"hello"@en' + + def test_literal_with_datatype(self): + from rdf4j_python.query._term import serialize_term + + dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#integer") + assert serialize_term(og.Literal("42", datatype=dt)) == '"42"^^' + + def test_literal_string_datatype_omitted(self): + from rdf4j_python.query._term import serialize_term + + dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#string") + assert serialize_term(og.Literal("hello", datatype=dt)) == '"hello"' + + def test_blank_node(self): + from rdf4j_python.query._term import serialize_term + + bn = og.BlankNode("b0") + assert serialize_term(bn) == "_:b0" + + def test_string_variable(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("?x") == "?x" + + def test_string_a(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("a") == "a" + + def test_string_prefixed(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("foaf:name") == "foaf:name" + + def test_string_full_iri(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("") == "" + + def test_string_literal(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term('"hello"') == '"hello"' + + def test_unsupported_type(self): + from rdf4j_python.query._term import serialize_term + + with pytest.raises(TypeError): + serialize_term(42) + + def test_literal_with_quotes(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal('say "hi"')) == '"say \\"hi\\""' + + def test_namespace_produced_iri(self): + from rdf4j_python.query._term import serialize_term + + person = ex.Person + assert serialize_term(person) == "" + + +# ── SelectQuery ────────────────────────────────────────────────────── + + +class TestSelectQuery: + def test_basic_select(self): + q = select("?s", "?p").where("?s", "a", "?p").build() + assert "SELECT ?s ?p" in q + assert "?s a ?p ." in q + + def test_select_with_typed_terms(self): + q = ( + select("?person", "?name") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .build() + ) + assert "" in q + assert "" in q + assert "" in q + + def test_select_with_limit(self): + q = select("?s").where("?s", "a", "?o").limit(10).build() + assert "LIMIT 10" in q + + def test_select_with_offset(self): + q = select("?s").where("?s", "a", "?o").offset(5).build() + assert "OFFSET 5" in q + + def test_select_with_order_by(self): + q = select("?name").where("?s", "a", "?name").order_by("?name").build() + assert "ORDER BY ?name" in q + + def test_select_distinct(self): + q = select("?name").distinct().where("?s", "a", "?name").build() + assert "SELECT DISTINCT ?name" in q + + def test_select_with_filter(self): + q = ( + select("?name") + .where("?person", "a", "ex:Person") + .where("?person", "ex:name", "?name") + .filter("?name != 'Bob'") + .build() + ) + assert "FILTER(?name != 'Bob')" in q + + def test_select_with_optional_triple(self): + q = ( + select("?name", "?email") + .where("?person", "a", "ex:Person") + .optional("?person", "ex:email", "?email") + .build() + ) + assert "OPTIONAL { ?person ex:email ?email . }" in q + + def test_select_with_optional_pattern(self): + pattern = GraphPattern().where("?person", ex.email, "?email").filter("bound(?email)") + q = ( + select("?name", "?email") + .where("?person", "a", "ex:Person") + .optional(pattern) + .build() + ) + assert "OPTIONAL {" in q + assert "FILTER(bound(?email))" in q + + def test_select_with_group_by_having(self): + q = ( + select("?city", "(COUNT(?person) AS ?count)") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.city, "?city") + .group_by("?city") + .having("COUNT(?person) > 1") + .order_by("DESC(?count)") + .build() + ) + assert "GROUP BY ?city" in q + assert "HAVING (COUNT(?person) > 1)" in q + assert "ORDER BY DESC(?count)" in q + + def test_select_with_union(self): + q = ( + select("?label") + .where("?s", RDF.type, ex.Person) + .union( + GraphPattern().where("?s", ex.name, "?label"), + GraphPattern().where("?s", ex.nickname, "?label"), + ) + .build() + ) + assert "UNION" in q + + def test_select_with_bind(self): + q = ( + select("?fullName") + .where("?s", ex.firstName, "?fname") + .where("?s", ex.lastName, "?lname") + .bind("CONCAT(?fname, ' ', ?lname)", "?fullName") + .build() + ) + assert "BIND(CONCAT(?fname, ' ', ?lname) AS ?fullName)" in q + + def test_select_with_values(self): + q = ( + select("?person", "?name") + .where("?person", ex.name, "?name") + .values("?person", [ex.alice, ex.bob]) + .build() + ) + assert "VALUES ?person { }" in q + + def test_select_with_sub_query(self): + sub = ( + select("?person", "(MAX(?score) AS ?maxScore)") + .where("?person", ex.score, "?score") + .group_by("?person") + ) + q = ( + select("?person", "?maxScore", "?name") + .where("?person", ex.name, "?name") + .sub_query(sub) + .build() + ) + assert "SELECT ?person (MAX(?score) AS ?maxScore)" in q + assert "GROUP BY ?person" in q + + def test_select_with_string_prefix(self): + q = ( + select("?name") + .prefix("ex", "http://example.org/") + .prefix("foaf", "http://xmlns.com/foaf/0.1/") + .where("?person", "a", "ex:Person") + .where("?person", "foaf:name", "?name") + .build() + ) + assert "PREFIX ex: " in q + assert "PREFIX foaf: " in q + + def test_select_with_namespace_prefix(self): + ns = Namespace("schema", "http://schema.org/") + q = ( + select("?name") + .prefix(ns) + .where("?person", "schema:name", "?name") + .build() + ) + assert "PREFIX schema: " in q + + def test_str_equals_build(self): + builder = select("?s").where("?s", "a", "?o") + assert str(builder) == builder.build() + + def test_copy_independence(self): + original = select("?s").where("?s", "a", "?o") + cloned = original.copy() + cloned.limit(10) + assert "LIMIT" not in original.build() + assert "LIMIT 10" in cloned.build() + + def test_validation_no_variables(self): + with pytest.raises(ValueError, match="at least one variable"): + select().where("?s", "a", "?o").build() + + def test_validation_no_where(self): + with pytest.raises(ValueError, match="at least one WHERE pattern"): + select("?s").build() + + def test_modifier_order(self): + """GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET appear in correct order.""" + q = ( + select("?x", "(COUNT(?y) AS ?c)") + .where("?x", "a", "?y") + .group_by("?x") + .having("COUNT(?y) > 1") + .order_by("?x") + .limit(10) + .offset(5) + .build() + ) + lines = q.splitlines() + idx = {kw: i for i, line in enumerate(lines) for kw in ("GROUP BY", "HAVING", "ORDER BY", "LIMIT", "OFFSET") if line.startswith(kw)} + assert idx["GROUP BY"] < idx["HAVING"] < idx["ORDER BY"] < idx["LIMIT"] < idx["OFFSET"] + + def test_multiple_order_by(self): + q = select("?a", "?b").where("?a", "a", "?b").order_by("?a", "DESC(?b)").build() + assert "ORDER BY ?a DESC(?b)" in q + + def test_chaining_returns_self(self): + builder = select("?s") + assert builder.where("?s", "a", "?o") is builder + assert builder.filter("true") is builder + assert builder.optional("?s", "?p", "?o") is builder + assert builder.bind("1", "?x") is builder + assert builder.distinct() is builder + assert builder.order_by("?s") is builder + assert builder.group_by("?s") is builder + assert builder.having("true") is builder + assert builder.limit(1) is builder + assert builder.offset(0) is builder + + +# ── AskQuery ───────────────────────────────────────────────────────── + + +class TestAskQuery: + def test_basic_ask(self): + q = ask().where("?s", RDF.type, ex.Person).build() + assert q.startswith("ASK {") + assert "" in q + + def test_ask_with_filter(self): + q = ask().where("?s", "a", "?o").filter("?o = ").build() + assert "FILTER(?o = )" in q + + def test_ask_validation(self): + with pytest.raises(ValueError, match="at least one WHERE pattern"): + ask().build() + + def test_ask_str_equals_build(self): + builder = ask().where("?s", "a", "?o") + assert str(builder) == builder.build() + + def test_ask_with_prefix(self): + q = ( + ask() + .prefix("ex", "http://example.org/") + .where("?s", "a", "ex:Person") + .build() + ) + assert "PREFIX ex: " in q + + +# ── ConstructQuery ─────────────────────────────────────────────────── + + +class TestConstructQuery: + def test_basic_construct(self): + q = ( + construct(("?s", ex.fullName, "?name")) + .where("?s", ex.firstName, "?fname") + .bind("CONCAT(?fname, ' ', ?lname)", "?name") + .build() + ) + assert "CONSTRUCT {" in q + assert "" in q + assert "WHERE {" in q + assert "BIND(" in q + + def test_construct_validation(self): + with pytest.raises(ValueError, match="at least one template triple"): + construct().build() + + def test_construct_str_equals_build(self): + builder = construct(("?s", "?p", "?o")).where("?s", "?p", "?o") + assert str(builder) == builder.build() + + def test_construct_without_where(self): + """CONSTRUCT without WHERE patterns should not emit WHERE block.""" + q = construct(("?s", ex.label, '"test"')).build() + assert "CONSTRUCT {" in q + assert "WHERE" not in q + + +# ── DescribeQuery ──────────────────────────────────────────────────── + + +class TestDescribeQuery: + def test_describe_resource(self): + q = describe(ex.alice).build() + assert q == "DESCRIBE " + + def test_describe_with_where(self): + q = ( + describe("?person") + .where("?person", RDF.type, ex.Person) + .filter("?person = ") + .build() + ) + assert "DESCRIBE ?person" in q + assert "WHERE {" in q + assert "FILTER(?person = )" in q + + def test_describe_multiple_resources(self): + q = describe(ex.alice, ex.bob).build() + assert "" in q + assert "" in q + + def test_describe_validation(self): + with pytest.raises(ValueError, match="at least one resource"): + describe().build() + + def test_describe_str_equals_build(self): + builder = describe(ex.alice) + assert str(builder) == builder.build() + + +# ── GraphPattern ───────────────────────────────────────────────────── + + +class TestGraphPattern: + def test_copy_independence(self): + p1 = GraphPattern().where("?s", "a", "?o") + p2 = p1.copy() + p2.filter("true") + assert "FILTER" not in p1.to_sparql() + assert "FILTER" in p2.to_sparql() + + def test_union_requires_two_patterns(self): + p = GraphPattern() + with pytest.raises(ValueError, match="at least two"): + p.union(GraphPattern().where("?s", "a", "?o")) + + def test_optional_requires_three_args_or_pattern(self): + p = GraphPattern() + with pytest.raises(ValueError): + p.optional("?s", "?p") + + def test_nested_optional(self): + inner = GraphPattern().where("?s", ex.email, "?email").filter("bound(?email)") + outer = GraphPattern().where("?s", "a", "ex:Person").optional(inner) + sparql = outer.to_sparql() + assert "OPTIONAL {" in sparql + assert "FILTER(bound(?email))" in sparql + + def test_values_with_string_var(self): + p = GraphPattern().values("person", [ex.alice]) + assert "VALUES ?person" in p.to_sparql() + + def test_bind_with_string_var_no_question_mark(self): + p = GraphPattern().bind("1 + 1", "result") + assert "BIND(1 + 1 AS ?result)" in p.to_sparql() + + +# ── Integration / complex compositions ─────────────────────────────── + + +class TestComplexCompositions: + def test_full_query_from_plan_example(self): + """Test the main SELECT example from the plan.""" + q = ( + select("?person", "?name") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .order_by("?name") + .limit(10) + .build() + ) + assert "SELECT ?person ?name" in q + assert "" in q + assert "" in q + assert "ORDER BY ?name" in q + assert "LIMIT 10" in q + + def test_optional_filter_combo(self): + q = ( + select("?name", "?email") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .optional("?person", ex.email, "?email") + .filter("?name != 'Bob'") + .build() + ) + assert "OPTIONAL" in q + assert "FILTER" in q + + def test_variable_objects(self): + """Test using pyoxigraph Variable objects directly.""" + person = og.Variable("person") + name = og.Variable("name") + q = ( + select("?person", "?name") + .where(person, RDF.type, ex.Person) + .where(person, ex.name, name) + .build() + ) + assert "?person" in q + assert "?name" in q + + def test_literal_in_where(self): + lit = og.Literal("Alice", language="en") + q = ( + select("?s") + .where("?s", ex.name, lit) + .build() + ) + assert '"Alice"@en' in q