diff --git a/README.md b/README.md
index f3175be..21e9773 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ rdf4j-python bridges the gap between Python and the robust [Eclipse RDF4J](https
- **Async-First Design**: Native support for async/await with synchronous fallback
- **Repository Management**: Create, access, and manage RDF4J repositories programmatically
- **SPARQL Support**: Execute SELECT, ASK, CONSTRUCT, and UPDATE queries effortlessly
+- **SPARQL Query Builder**: Fluent, programmatic query construction with method chaining
- **Transaction Support**: Atomic operations with commit/rollback and isolation levels
- **Flexible Data Handling**: Add, retrieve, and manipulate RDF triples and quads
- **File Upload**: Upload RDF files (Turtle, N-Triples, N-Quads, RDF/XML, JSON-LD, TriG, N3) directly to repositories
@@ -88,6 +89,62 @@ if __name__ == "__main__":
asyncio.run(main())
```
+### SPARQL Query Builder
+
+Build queries programmatically with method chaining instead of writing raw SPARQL strings:
+
+```python
+from rdf4j_python import select, ask, construct, describe, GraphPattern, Namespace
+
+ex = Namespace("ex", "http://example.org/")
+foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/")
+
+# SELECT with typed terms — IRIs serialize automatically
+query = (
+ select("?person", "?name")
+ .where("?person", foaf.type, ex.Person)
+ .where("?person", foaf.name, "?name")
+ .optional("?person", foaf.email, "?email")
+ .filter("?name != 'Bob'")
+ .order_by("?name")
+ .limit(10)
+ .build()
+)
+
+# Or use string-based prefixed names
+query = (
+ select("?name")
+ .prefix("foaf", "http://xmlns.com/foaf/0.1/")
+ .where("?person", "a", "foaf:Person")
+ .where("?person", "foaf:name", "?name")
+ .build()
+)
+
+# GROUP BY with aggregation
+query = (
+ select("?city", "(COUNT(?person) AS ?count)")
+ .where("?person", ex.city, "?city")
+ .group_by("?city")
+ .having("COUNT(?person) > 1")
+ .order_by("DESC(?count)")
+ .build()
+)
+
+# ASK, CONSTRUCT, and DESCRIBE
+ask_query = ask().where("?s", ex.name, "?name").build()
+
+construct_query = (
+ construct(("?s", ex.fullName, "?name"))
+ .where("?s", ex.firstName, "?fname")
+ .bind("CONCAT(?fname, ' ', ?lname)", "?name")
+ .build()
+)
+
+describe_query = describe(ex.alice).build()
+```
+
+The query builder supports FILTER, OPTIONAL, UNION, BIND, VALUES, sub-queries, DISTINCT, ORDER BY, GROUP BY, HAVING, LIMIT, and OFFSET. Both raw strings and typed objects (`IRI`, `Variable`, `Literal`, `Namespace`) work as terms.
+
### Working with Multiple Graphs
```python
@@ -155,15 +212,18 @@ async def advanced_example():
]
await repo.add_statements(statements)
- # Complex SPARQL query
- query = """
- PREFIX foaf:
- SELECT ?name ?email WHERE {
- ?person foaf:name ?name .
- OPTIONAL { ?person foaf:email ?email }
- }
- ORDER BY ?name
- """
+ # Query with the fluent query builder
+ from rdf4j_python import select
+ from rdf4j_python.model._namespace import Namespace
+
+ foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/")
+ query = (
+ select("?name", "?email")
+ .where("?person", foaf.name, "?name")
+ .optional("?person", foaf.email, "?email")
+ .order_by("?name")
+ .build()
+ )
results = await repo.query(query)
```
@@ -256,6 +316,7 @@ For more detailed examples, see the [examples](examples/) directory.
rdf4j_python/
├── _driver/ # Core async driver implementation
├── model/ # Data models and configurations
+├── query/ # SPARQL query builder
├── exception/ # Custom exceptions
└── utils/ # Utility functions
diff --git a/examples/README.md b/examples/README.md
index 28b7553..1519ca4 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -74,7 +74,7 @@ python examples/delete_repository.py
### 🔍 Data Querying Examples
#### `query_and_print.py`
-Comprehensive example of SPARQL querying and result formatting:
+Comprehensive example of SPARQL querying and result formatting using the fluent query builder:
- SELECT queries with various clauses (FILTER, OPTIONAL, JOIN)
- CONSTRUCT queries for data transformation
- ASK queries for boolean checks
@@ -82,6 +82,7 @@ Comprehensive example of SPARQL querying and result formatting:
- Multiple result formatting options
**Key Features:**
+- Fluent SPARQL query builder (no raw query strings)
- Automatic test data setup
- Formatted table output
- JSON-like result formatting
@@ -92,8 +93,8 @@ Comprehensive example of SPARQL querying and result formatting:
python examples/query_and_print.py
```
-#### `query.py` (Original)
-Simple query example showing basic SPARQL execution.
+#### `query.py`
+Simple query example showing basic SPARQL execution using the query builder.
**Usage:**
```bash
@@ -112,6 +113,7 @@ End-to-end example demonstrating the full repository lifecycle:
**Key Features:**
- Multi-repository workflow
+- Fluent SPARQL query builder with Namespace objects
- Real-world data scenarios (customers, products, analytics)
- Named graph usage
- Comprehensive error handling and cleanup
@@ -121,7 +123,7 @@ End-to-end example demonstrating the full repository lifecycle:
python examples/complete_workflow.py
```
-#### `repo.py` (Original)
+#### `repo.py`
Basic repository creation and data insertion example.
**Usage:**
diff --git a/examples/complete_workflow.py b/examples/complete_workflow.py
index d38d83d..171e730 100644
--- a/examples/complete_workflow.py
+++ b/examples/complete_workflow.py
@@ -13,7 +13,8 @@
import asyncio
-from rdf4j_python import AsyncRdf4j
+from rdf4j_python import AsyncRdf4j, select
+from rdf4j_python.model._namespace import Namespace
from rdf4j_python.model.repository_config import (
MemoryStoreConfig,
RepositoryConfig,
@@ -21,6 +22,9 @@
)
from rdf4j_python.model.term import IRI, Literal, Quad
+# Define namespaces for query building
+ecom = Namespace("ecom", "http://example.com/")
+
async def workflow_step_1_create_repositories():
"""Step 1: Create multiple repositories with different configurations."""
@@ -199,14 +203,14 @@ async def workflow_step_3_query_data():
# Query 1: Customer information
print("👥 Customer Information:")
customer_repo = await db.get_repository("customer-data")
- customer_query = """
- SELECT ?customer ?name ?email ?age WHERE {
- ?customer ?name .
- OPTIONAL { ?customer ?email }
- OPTIONAL { ?customer ?age }
- }
- ORDER BY ?name
- """
+ customer_query = (
+ select("?customer", "?name", "?email", "?age")
+ .where("?customer", ecom.name, "?name")
+ .optional("?customer", ecom.email, "?email")
+ .optional("?customer", ecom.age, "?age")
+ .order_by("?name")
+ .build()
+ )
customer_results = await customer_repo.query(customer_query)
for result in customer_results:
name = result["name"].value if result["name"] else "N/A"
@@ -217,14 +221,14 @@ async def workflow_step_3_query_data():
# Query 2: Product catalog
print("\n🛍️ Product Catalog:")
product_repo = await db.get_repository("product-catalog")
- product_query = """
- SELECT ?product ?name ?price ?category WHERE {
- ?product ?name .
- OPTIONAL { ?product ?price }
- OPTIONAL { ?product ?category }
- }
- ORDER BY ?price
- """
+ product_query = (
+ select("?product", "?name", "?price", "?category")
+ .where("?product", ecom.name, "?name")
+ .optional("?product", ecom.price, "?price")
+ .optional("?product", ecom.category, "?category")
+ .order_by("?price")
+ .build()
+ )
product_results = await product_repo.query(product_query)
for result in product_results:
name = result["name"].value if result["name"] else "N/A"
@@ -235,14 +239,14 @@ async def workflow_step_3_query_data():
# Query 3: Purchase analytics
print("\n📊 Purchase Analytics:")
analytics_repo = await db.get_repository("analytics-data")
- analytics_query = """
- SELECT ?purchase ?customer ?product ?date WHERE {
- ?purchase ?customer .
- ?purchase ?product .
- OPTIONAL { ?purchase ?date }
- }
- ORDER BY ?date
- """
+ analytics_query = (
+ select("?purchase", "?customer", "?product", "?date")
+ .where("?purchase", ecom.customer, "?customer")
+ .where("?purchase", ecom.product, "?product")
+ .optional("?purchase", ecom.date, "?date")
+ .order_by("?date")
+ .build()
+ )
analytics_results = await analytics_repo.query(analytics_query)
for result in analytics_results:
customer = result["customer"].value if result["customer"] else "N/A"
diff --git a/examples/query.py b/examples/query.py
index 64f9ec8..b29af5a 100644
--- a/examples/query.py
+++ b/examples/query.py
@@ -2,7 +2,7 @@
from pyoxigraph import QuerySolutions
-from rdf4j_python import AsyncRdf4j
+from rdf4j_python import AsyncRdf4j, select
from rdf4j_python.model.term import IRI, Literal, Quad
@@ -25,7 +25,11 @@ async def main():
),
]
)
- result = await repo.query("SELECT * WHERE { ?s ?p ?o }")
+
+ # Build the query using the fluent query builder
+ query = select("?s", "?p", "?o").where("?s", "?p", "?o").build()
+
+ result = await repo.query(query)
assert isinstance(result, QuerySolutions)
for solution in result:
print(solution)
diff --git a/examples/query_and_print.py b/examples/query_and_print.py
index 6735889..cf2551b 100644
--- a/examples/query_and_print.py
+++ b/examples/query_and_print.py
@@ -3,6 +3,8 @@
This example demonstrates how to execute various types of SPARQL queries
against RDF4J repositories and format the results in different ways.
+
+Queries are built using the fluent SPARQL query builder.
"""
import asyncio
@@ -10,7 +12,8 @@
from pyoxigraph import QuerySolutions, QueryTriples
-from rdf4j_python import AsyncRdf4j
+from rdf4j_python import AsyncRdf4j, ask, construct, select
+from rdf4j_python.model._namespace import Namespace
from rdf4j_python.model.repository_config import (
MemoryStoreConfig,
RepositoryConfig,
@@ -18,6 +21,9 @@
)
from rdf4j_python.model.term import IRI, Literal, Quad
+# Define namespaces for query building
+ex = Namespace("ex", "http://example.org/")
+
async def setup_test_data(repo):
"""Add some test data to the repository for querying."""
@@ -189,43 +195,43 @@ async def execute_select_queries(repo):
print("=" * 50)
# Query 1: Simple SELECT - get all people and their names
- query1 = """
- SELECT ?person ?name WHERE {
- ?person ?name .
- }
- """
+ query1 = (
+ select("?person", "?name")
+ .where("?person", ex.name, "?name")
+ .build()
+ )
result1 = await repo.query(query1)
print_select_results(result1, "All People and Their Names")
# Query 2: SELECT with FILTER - people older than 30
- query2 = """
- SELECT ?person ?name ?age WHERE {
- ?person ?name .
- ?person ?age .
- FILTER(?age > 30)
- }
- """
+ query2 = (
+ select("?person", "?name", "?age")
+ .where("?person", ex.name, "?name")
+ .where("?person", ex.age, "?age")
+ .filter("?age > 30")
+ .build()
+ )
result2 = await repo.query(query2)
print_select_results(result2, "People Older Than 30")
# Query 3: SELECT with OPTIONAL - people and their email (if available)
- query3 = """
- SELECT ?person ?name ?email WHERE {
- ?person ?name .
- OPTIONAL { ?person ?email }
- }
- """
+ query3 = (
+ select("?person", "?name", "?email")
+ .where("?person", ex.name, "?name")
+ .optional("?person", ex.email, "?email")
+ .build()
+ )
result3 = await repo.query(query3)
print_select_results(result3, "People and Their Email Addresses")
# Query 4: SELECT with JOIN - people and their employers
- query4 = """
- SELECT ?person ?name ?company WHERE {
- ?person ?name .
- ?person ?org .
- ?org ?company .
- }
- """
+ query4 = (
+ select("?person", "?name", "?company")
+ .where("?person", ex.name, "?name")
+ .where("?person", ex.worksFor, "?org")
+ .where("?org", ex.name, "?company")
+ .build()
+ )
result4 = await repo.query(query4)
print_select_results(result4, "People and Their Employers")
@@ -239,29 +245,25 @@ async def execute_construct_queries(repo):
print("=" * 50)
# Query 1: CONSTRUCT - create simplified person data
- query1 = """
- CONSTRUCT {
- ?person ?name ;
- ?age .
- }
- WHERE {
- ?person ?name .
- ?person ?age .
- }
- """
+ query1 = (
+ construct(
+ ("?person", ex.hasName, "?name"),
+ ("?person", ex.hasAge, "?age"),
+ )
+ .where("?person", ex.name, "?name")
+ .where("?person", ex.age, "?age")
+ .build()
+ )
result1 = await repo.query(query1)
print_construct_results(result1, "Simplified Person Data")
# Query 2: CONSTRUCT - create employment relationships
- query2 = """
- CONSTRUCT {
- ?person ?company .
- }
- WHERE {
- ?person ?org .
- ?org ?company .
- }
- """
+ query2 = (
+ construct(("?person", ex.employedBy, "?company"))
+ .where("?person", ex.worksFor, "?org")
+ .where("?org", ex.name, "?company")
+ .build()
+ )
result2 = await repo.query(query2)
print_construct_results(result2, "Employment Relationships")
@@ -272,30 +274,30 @@ async def execute_ask_queries(repo):
print("=" * 50)
# Query 1: ASK - check if Alice exists
- query1 = """
- ASK {
- ?person "Alice" .
- }
- """
+ query1 = (
+ ask()
+ .where("?person", ex.name, '"Alice"')
+ .build()
+ )
result1 = await repo.query(query1)
print(f"Does Alice exist? {'✅ Yes' if result1 else '❌ No'}")
# Query 2: ASK - check if anyone is older than 40
- query2 = """
- ASK {
- ?person ?age .
- FILTER(?age > 40)
- }
- """
+ query2 = (
+ ask()
+ .where("?person", ex.age, "?age")
+ .filter("?age > 40")
+ .build()
+ )
result2 = await repo.query(query2)
print(f"Is anyone older than 40? {'✅ Yes' if result2 else '❌ No'}")
# Query 3: ASK - check if there are any email addresses
- query3 = """
- ASK {
- ?person ?email .
- }
- """
+ query3 = (
+ ask()
+ .where("?person", ex.email, "?email")
+ .build()
+ )
result3 = await repo.query(query3)
print(f"Are there any email addresses? {'✅ Yes' if result3 else '❌ No'}")
@@ -306,29 +308,29 @@ async def execute_aggregate_queries(repo):
print("=" * 50)
# Query 1: COUNT - total number of people
- query1 = """
- SELECT (COUNT(?person) AS ?totalPeople) WHERE {
- ?person ?name .
- }
- """
+ query1 = (
+ select("(COUNT(?person) AS ?totalPeople)")
+ .where("?person", ex.name, "?name")
+ .build()
+ )
result1 = await repo.query(query1)
print_select_results(result1, "Total Number of People")
# Query 2: AVG - average age
- query2 = """
- SELECT (AVG(?age) AS ?averageAge) WHERE {
- ?person ?age .
- }
- """
+ query2 = (
+ select("(AVG(?age) AS ?averageAge)")
+ .where("?person", ex.age, "?age")
+ .build()
+ )
result2 = await repo.query(query2)
print_select_results(result2, "Average Age")
# Query 3: MIN/MAX - youngest and oldest person
- query3 = """
- SELECT (MIN(?age) AS ?minAge) (MAX(?age) AS ?maxAge) WHERE {
- ?person ?age .
- }
- """
+ query3 = (
+ select("(MIN(?age) AS ?minAge)", "(MAX(?age) AS ?maxAge)")
+ .where("?person", ex.age, "?age")
+ .build()
+ )
result3 = await repo.query(query3)
print_select_results(result3, "Age Range (Min/Max)")
diff --git a/rdf4j_python/__init__.py b/rdf4j_python/__init__.py
index 2a65acb..f43a7b9 100644
--- a/rdf4j_python/__init__.py
+++ b/rdf4j_python/__init__.py
@@ -40,6 +40,7 @@
Triple,
Variable,
)
+from .query import GraphPattern, ask, construct, describe, select
__all__ = [
# Main classes
@@ -78,4 +79,10 @@
"Object",
"Context",
"QuadResultSet",
+ # Query builder
+ "select",
+ "ask",
+ "construct",
+ "describe",
+ "GraphPattern",
]
diff --git a/rdf4j_python/query/__init__.py b/rdf4j_python/query/__init__.py
new file mode 100644
index 0000000..a9be7af
--- /dev/null
+++ b/rdf4j_python/query/__init__.py
@@ -0,0 +1,37 @@
+"""SPARQL query builder for RDF4J Python."""
+
+from ._builder import AskQuery, ConstructQuery, DescribeQuery, SelectQuery
+from ._pattern import GraphPattern
+
+
+def select(*variables: str) -> SelectQuery:
+ """Create a new ``SELECT`` query builder."""
+ return SelectQuery(*variables)
+
+
+def ask() -> AskQuery:
+ """Create a new ``ASK`` query builder."""
+ return AskQuery()
+
+
+def construct(*templates: tuple) -> ConstructQuery:
+ """Create a new ``CONSTRUCT`` query builder."""
+ return ConstructQuery(*templates)
+
+
+def describe(*resources) -> DescribeQuery:
+ """Create a new ``DESCRIBE`` query builder."""
+ return DescribeQuery(*resources)
+
+
+__all__ = [
+ "select",
+ "ask",
+ "construct",
+ "describe",
+ "GraphPattern",
+ "SelectQuery",
+ "AskQuery",
+ "ConstructQuery",
+ "DescribeQuery",
+]
diff --git a/rdf4j_python/query/_builder.py b/rdf4j_python/query/_builder.py
new file mode 100644
index 0000000..dfb414f
--- /dev/null
+++ b/rdf4j_python/query/_builder.py
@@ -0,0 +1,224 @@
+"""SPARQL query builder classes."""
+
+from __future__ import annotations
+
+import copy
+from typing import Any
+
+from rdf4j_python.model._namespace import Namespace
+
+from ._pattern import GraphPattern
+from ._term import Term, serialize_term
+
+
+class _QueryBase:
+ """Shared state and behaviour for all query builders.
+
+ Provides prefix management, WHERE-clause delegation, ``copy()``, and
+ ``__str__()`` so concrete builders only need to implement ``build()``.
+ """
+
+ def __init__(self) -> None:
+ self._pattern = GraphPattern()
+ self._prefixes: dict[str, str] = {}
+
+ # ── prefix handling ──────────────────────────────────────────────
+
+ def prefix(self, name_or_ns: str | Namespace, uri: str | None = None) -> Any:
+ """Register a prefix.
+
+ - ``prefix("ex", "http://example.org/")`` — string pair
+ - ``prefix(ns)`` — extract from a ``Namespace`` object
+ """
+ if isinstance(name_or_ns, Namespace):
+ self._prefixes[name_or_ns.prefix] = name_or_ns.namespace.value
+ else:
+ if uri is None:
+ raise ValueError("uri is required when name_or_ns is a string")
+ self._prefixes[name_or_ns] = uri
+ return self
+
+ def _render_prefixes(self) -> str:
+ return "\n".join(f"PREFIX {k}: <{v}>" for k, v in self._prefixes.items())
+
+ # ── WHERE-clause delegation ──────────────────────────────────────
+
+ def where(self, s: Term, p: Term, o: Term) -> Any:
+ self._pattern.where(s, p, o)
+ return self
+
+ def filter(self, expr: str) -> Any:
+ self._pattern.filter(expr)
+ return self
+
+ def optional(
+ self,
+ s_or_pattern: Term | GraphPattern,
+ p: Term | None = None,
+ o: Term | None = None,
+ ) -> Any:
+ self._pattern.optional(s_or_pattern, p, o)
+ return self
+
+ def union(self, *patterns: GraphPattern) -> Any:
+ self._pattern.union(*patterns)
+ return self
+
+ def bind(self, expr: str, var: str) -> Any:
+ self._pattern.bind(expr, var)
+ return self
+
+ def values(self, var: str, vals: list[Any]) -> Any:
+ self._pattern.values(var, vals)
+ return self
+
+ def sub_query(self, builder: SelectQuery) -> Any:
+ self._pattern.sub_query(builder)
+ return self
+
+ # ── common helpers ───────────────────────────────────────────────
+
+ def _render_where(self, parts: list[str]) -> None:
+ """Append ``WHERE { … }`` to *parts* if patterns exist."""
+ if len(self._pattern) > 0:
+ parts.append("WHERE {")
+ parts.append(self._pattern.to_sparql())
+ parts.append("}")
+
+ def _build_parts(self) -> list[str]:
+ """Return the prefix lines (if any) as a starting list."""
+ parts: list[str] = []
+ if self._prefixes:
+ parts.append(self._render_prefixes())
+ return parts
+
+ def copy(self):
+ return copy.deepcopy(self)
+
+ def build(self) -> str:
+ raise NotImplementedError
+
+ def __str__(self) -> str:
+ return self.build()
+
+
+# ── concrete builders ────────────────────────────────────────────────
+
+
+class SelectQuery(_QueryBase):
+ """Builder for ``SELECT`` queries."""
+
+ def __init__(self, *variables: str) -> None:
+ super().__init__()
+ self._variables = list(variables)
+ self._distinct = False
+ self._order_by: list[str] = []
+ self._group_by: list[str] = []
+ self._having: str | None = None
+ self._limit: int | None = None
+ self._offset: int | None = None
+
+ def distinct(self) -> SelectQuery:
+ self._distinct = True
+ return self
+
+ def order_by(self, *exprs: str) -> SelectQuery:
+ self._order_by.extend(exprs)
+ return self
+
+ def group_by(self, *exprs: str) -> SelectQuery:
+ self._group_by.extend(exprs)
+ return self
+
+ def having(self, expr: str) -> SelectQuery:
+ self._having = expr
+ return self
+
+ def limit(self, n: int) -> SelectQuery:
+ self._limit = n
+ return self
+
+ def offset(self, n: int) -> SelectQuery:
+ self._offset = n
+ return self
+
+ def build(self) -> str:
+ if not self._variables:
+ raise ValueError("SELECT query requires at least one variable")
+ if len(self._pattern) == 0:
+ raise ValueError("SELECT query requires at least one WHERE pattern")
+
+ parts = self._build_parts()
+
+ keyword = "SELECT DISTINCT" if self._distinct else "SELECT"
+ parts.append(f"{keyword} {' '.join(self._variables)}")
+
+ self._render_where(parts)
+
+ if self._group_by:
+ parts.append(f"GROUP BY {' '.join(self._group_by)}")
+ if self._having:
+ parts.append(f"HAVING ({self._having})")
+ if self._order_by:
+ parts.append(f"ORDER BY {' '.join(self._order_by)}")
+ if self._limit is not None:
+ parts.append(f"LIMIT {self._limit}")
+ if self._offset is not None:
+ parts.append(f"OFFSET {self._offset}")
+
+ return "\n".join(parts)
+
+
+class AskQuery(_QueryBase):
+ """Builder for ``ASK`` queries."""
+
+ def build(self) -> str:
+ if len(self._pattern) == 0:
+ raise ValueError("ASK query requires at least one WHERE pattern")
+
+ parts = self._build_parts()
+ parts.append("ASK {")
+ parts.append(self._pattern.to_sparql())
+ parts.append("}")
+ return "\n".join(parts)
+
+
+class ConstructQuery(_QueryBase):
+ """Builder for ``CONSTRUCT`` queries."""
+
+ def __init__(self, *templates: tuple[Term, Term, Term]) -> None:
+ super().__init__()
+ self._templates = list(templates)
+
+ def build(self) -> str:
+ if not self._templates:
+ raise ValueError("CONSTRUCT query requires at least one template triple")
+
+ parts = self._build_parts()
+
+ parts.append("CONSTRUCT {")
+ for s, p, o in self._templates:
+ parts.append(
+ f" {serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ."
+ )
+ parts.append("}")
+
+ self._render_where(parts)
+ return "\n".join(parts)
+
+
+class DescribeQuery(_QueryBase):
+ """Builder for ``DESCRIBE`` queries."""
+
+ def __init__(self, *resources: Term) -> None:
+ super().__init__()
+ self._resources = list(resources)
+
+ def build(self) -> str:
+ if not self._resources:
+ raise ValueError("DESCRIBE query requires at least one resource")
+
+ parts = self._build_parts()
+ parts.append(f"DESCRIBE {' '.join(serialize_term(r) for r in self._resources)}")
+ self._render_where(parts)
+ return "\n".join(parts)
diff --git a/rdf4j_python/query/_pattern.py b/rdf4j_python/query/_pattern.py
new file mode 100644
index 0000000..114ee57
--- /dev/null
+++ b/rdf4j_python/query/_pattern.py
@@ -0,0 +1,102 @@
+"""GraphPattern — composable SPARQL WHERE block."""
+
+from __future__ import annotations
+
+import copy
+from typing import TYPE_CHECKING, Any
+
+from ._term import Term, serialize_term
+
+if TYPE_CHECKING:
+ from ._builder import SelectQuery
+
+
+def _ensure_var(var: str) -> str:
+ """Prefix *var* with ``?`` if missing."""
+ return var if var.startswith("?") else f"?{var}"
+
+
+class GraphPattern:
+ """A composable block of SPARQL graph patterns.
+
+ Every mutating method returns ``self`` for fluent chaining.
+ """
+
+ def __init__(self) -> None:
+ self._elements: list[str] = []
+
+ def where(self, s: Term, p: Term, o: Term) -> GraphPattern:
+ """Add a triple pattern."""
+ self._elements.append(
+ f"{serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ."
+ )
+ return self
+
+ def filter(self, expr: str) -> GraphPattern:
+ """Add a ``FILTER(expr)`` clause."""
+ self._elements.append(f"FILTER({expr})")
+ return self
+
+ def optional(
+ self,
+ s_or_pattern: Term | GraphPattern,
+ p: Term | None = None,
+ o: Term | None = None,
+ ) -> GraphPattern:
+ """Add an ``OPTIONAL { … }`` block.
+
+ - ``optional(s, p, o)`` — single triple shorthand
+ - ``optional(GraphPattern())`` — complex pattern block
+ """
+ if isinstance(s_or_pattern, GraphPattern):
+ body = s_or_pattern.to_sparql(indent=4)
+ self._elements.append(f"OPTIONAL {{\n{body}\n }}")
+ else:
+ if p is None or o is None:
+ raise ValueError(
+ "optional() requires either a GraphPattern or three term arguments (s, p, o)"
+ )
+ triple = f"{serialize_term(s_or_pattern)} {serialize_term(p)} {serialize_term(o)} ."
+ self._elements.append(f"OPTIONAL {{ {triple} }}")
+ return self
+
+ def union(self, *patterns: GraphPattern) -> GraphPattern:
+ """Add ``{ … } UNION { … }`` blocks."""
+ if len(patterns) < 2:
+ raise ValueError("union() requires at least two GraphPattern arguments")
+ parts = []
+ for pat in patterns:
+ body = pat.to_sparql(indent=4)
+ parts.append(f"{{\n{body}\n }}")
+ self._elements.append(" UNION ".join(parts))
+ return self
+
+ def bind(self, expr: str, var: str) -> GraphPattern:
+ """Add a ``BIND(expr AS ?var)`` clause."""
+ self._elements.append(f"BIND({expr} AS {_ensure_var(var)})")
+ return self
+
+ def values(self, var: str, vals: list[Any]) -> GraphPattern:
+ """Add a ``VALUES ?var { … }`` clause."""
+ serialized = " ".join(serialize_term(val) for val in vals)
+ self._elements.append(f"VALUES {_ensure_var(var)} {{ {serialized} }}")
+ return self
+
+ def sub_query(self, builder: SelectQuery) -> GraphPattern:
+ """Embed a sub-SELECT inside this pattern."""
+ inner = builder.build()
+ indented = "\n".join(f" {line}" for line in inner.splitlines())
+ self._elements.append(f"{{\n{indented}\n }}")
+ return self
+
+ def to_sparql(self, indent: int = 2) -> str:
+ """Render the pattern body (without the outer braces)."""
+ prefix = " " * indent
+ return "\n".join(f"{prefix}{el}" for el in self._elements)
+
+ def copy(self) -> GraphPattern:
+ """Return a deep copy of this pattern."""
+ return copy.deepcopy(self)
+
+ def __len__(self) -> int:
+ return len(self._elements)
diff --git a/rdf4j_python/query/_term.py b/rdf4j_python/query/_term.py
new file mode 100644
index 0000000..631dd15
--- /dev/null
+++ b/rdf4j_python/query/_term.py
@@ -0,0 +1,35 @@
+"""Term serialization for SPARQL query building."""
+
+from __future__ import annotations
+
+from typing import Union
+
+import pyoxigraph as og
+
+Term = Union[str, og.NamedNode, og.Variable, og.Literal, og.BlankNode]
+
+_XSD_STRING = "http://www.w3.org/2001/XMLSchema#string"
+
+
+def serialize_term(term: Term) -> str:
+ """Convert a term to its SPARQL string representation.
+
+ Strings are passed through as-is (variables, prefixed names, full IRIs, etc.).
+ Typed objects (NamedNode, Variable, Literal, BlankNode) are formatted accordingly.
+ """
+ if isinstance(term, str):
+ return term
+ if isinstance(term, og.NamedNode):
+ return f"<{term.value}>"
+ if isinstance(term, og.Variable):
+ return f"?{term.value}"
+ if isinstance(term, og.Literal):
+ value = term.value.replace("\\", "\\\\").replace('"', '\\"')
+ if term.language:
+ return f'"{value}"@{term.language}'
+ if term.datatype and term.datatype.value != _XSD_STRING:
+ return f'"{value}"^^<{term.datatype.value}>'
+ return f'"{value}"'
+ if isinstance(term, og.BlankNode):
+ return f"_:{term.value}"
+ raise TypeError(f"Unsupported term type: {type(term)}")
diff --git a/tests/test_query_builder.py b/tests/test_query_builder.py
new file mode 100644
index 0000000..2df553f
--- /dev/null
+++ b/tests/test_query_builder.py
@@ -0,0 +1,488 @@
+"""Tests for the SPARQL query builder."""
+
+import pytest
+import pyoxigraph as og
+
+from rdf4j_python.model._namespace import Namespace
+from rdf4j_python.model.vocabulary import EXAMPLE as ex
+from rdf4j_python.model.vocabulary import RDF
+from rdf4j_python.query import (
+ GraphPattern,
+ ask,
+ construct,
+ describe,
+ select,
+)
+
+
+# ── serialize_term ───────────────────────────────────────────────────
+
+
+class TestSerializeTerm:
+ def test_iri(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term(og.NamedNode("http://example.org/Person")) == ""
+
+ def test_variable(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term(og.Variable("name")) == "?name"
+
+ def test_literal_plain(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term(og.Literal("hello")) == '"hello"'
+
+ def test_literal_with_language(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term(og.Literal("hello", language="en")) == '"hello"@en'
+
+ def test_literal_with_datatype(self):
+ from rdf4j_python.query._term import serialize_term
+
+ dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#integer")
+ assert serialize_term(og.Literal("42", datatype=dt)) == '"42"^^'
+
+ def test_literal_string_datatype_omitted(self):
+ from rdf4j_python.query._term import serialize_term
+
+ dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#string")
+ assert serialize_term(og.Literal("hello", datatype=dt)) == '"hello"'
+
+ def test_blank_node(self):
+ from rdf4j_python.query._term import serialize_term
+
+ bn = og.BlankNode("b0")
+ assert serialize_term(bn) == "_:b0"
+
+ def test_string_variable(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term("?x") == "?x"
+
+ def test_string_a(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term("a") == "a"
+
+ def test_string_prefixed(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term("foaf:name") == "foaf:name"
+
+ def test_string_full_iri(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term("") == ""
+
+ def test_string_literal(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term('"hello"') == '"hello"'
+
+ def test_unsupported_type(self):
+ from rdf4j_python.query._term import serialize_term
+
+ with pytest.raises(TypeError):
+ serialize_term(42)
+
+ def test_literal_with_quotes(self):
+ from rdf4j_python.query._term import serialize_term
+
+ assert serialize_term(og.Literal('say "hi"')) == '"say \\"hi\\""'
+
+ def test_namespace_produced_iri(self):
+ from rdf4j_python.query._term import serialize_term
+
+ person = ex.Person
+ assert serialize_term(person) == ""
+
+
+# ── SelectQuery ──────────────────────────────────────────────────────
+
+
+class TestSelectQuery:
+ def test_basic_select(self):
+ q = select("?s", "?p").where("?s", "a", "?p").build()
+ assert "SELECT ?s ?p" in q
+ assert "?s a ?p ." in q
+
+ def test_select_with_typed_terms(self):
+ q = (
+ select("?person", "?name")
+ .where("?person", RDF.type, ex.Person)
+ .where("?person", ex.name, "?name")
+ .build()
+ )
+ assert "" in q
+ assert "" in q
+ assert "" in q
+
+ def test_select_with_limit(self):
+ q = select("?s").where("?s", "a", "?o").limit(10).build()
+ assert "LIMIT 10" in q
+
+ def test_select_with_offset(self):
+ q = select("?s").where("?s", "a", "?o").offset(5).build()
+ assert "OFFSET 5" in q
+
+ def test_select_with_order_by(self):
+ q = select("?name").where("?s", "a", "?name").order_by("?name").build()
+ assert "ORDER BY ?name" in q
+
+ def test_select_distinct(self):
+ q = select("?name").distinct().where("?s", "a", "?name").build()
+ assert "SELECT DISTINCT ?name" in q
+
+ def test_select_with_filter(self):
+ q = (
+ select("?name")
+ .where("?person", "a", "ex:Person")
+ .where("?person", "ex:name", "?name")
+ .filter("?name != 'Bob'")
+ .build()
+ )
+ assert "FILTER(?name != 'Bob')" in q
+
+ def test_select_with_optional_triple(self):
+ q = (
+ select("?name", "?email")
+ .where("?person", "a", "ex:Person")
+ .optional("?person", "ex:email", "?email")
+ .build()
+ )
+ assert "OPTIONAL { ?person ex:email ?email . }" in q
+
+ def test_select_with_optional_pattern(self):
+ pattern = GraphPattern().where("?person", ex.email, "?email").filter("bound(?email)")
+ q = (
+ select("?name", "?email")
+ .where("?person", "a", "ex:Person")
+ .optional(pattern)
+ .build()
+ )
+ assert "OPTIONAL {" in q
+ assert "FILTER(bound(?email))" in q
+
+ def test_select_with_group_by_having(self):
+ q = (
+ select("?city", "(COUNT(?person) AS ?count)")
+ .where("?person", RDF.type, ex.Person)
+ .where("?person", ex.city, "?city")
+ .group_by("?city")
+ .having("COUNT(?person) > 1")
+ .order_by("DESC(?count)")
+ .build()
+ )
+ assert "GROUP BY ?city" in q
+ assert "HAVING (COUNT(?person) > 1)" in q
+ assert "ORDER BY DESC(?count)" in q
+
+ def test_select_with_union(self):
+ q = (
+ select("?label")
+ .where("?s", RDF.type, ex.Person)
+ .union(
+ GraphPattern().where("?s", ex.name, "?label"),
+ GraphPattern().where("?s", ex.nickname, "?label"),
+ )
+ .build()
+ )
+ assert "UNION" in q
+
+ def test_select_with_bind(self):
+ q = (
+ select("?fullName")
+ .where("?s", ex.firstName, "?fname")
+ .where("?s", ex.lastName, "?lname")
+ .bind("CONCAT(?fname, ' ', ?lname)", "?fullName")
+ .build()
+ )
+ assert "BIND(CONCAT(?fname, ' ', ?lname) AS ?fullName)" in q
+
+ def test_select_with_values(self):
+ q = (
+ select("?person", "?name")
+ .where("?person", ex.name, "?name")
+ .values("?person", [ex.alice, ex.bob])
+ .build()
+ )
+ assert "VALUES ?person { }" in q
+
+ def test_select_with_sub_query(self):
+ sub = (
+ select("?person", "(MAX(?score) AS ?maxScore)")
+ .where("?person", ex.score, "?score")
+ .group_by("?person")
+ )
+ q = (
+ select("?person", "?maxScore", "?name")
+ .where("?person", ex.name, "?name")
+ .sub_query(sub)
+ .build()
+ )
+ assert "SELECT ?person (MAX(?score) AS ?maxScore)" in q
+ assert "GROUP BY ?person" in q
+
+ def test_select_with_string_prefix(self):
+ q = (
+ select("?name")
+ .prefix("ex", "http://example.org/")
+ .prefix("foaf", "http://xmlns.com/foaf/0.1/")
+ .where("?person", "a", "ex:Person")
+ .where("?person", "foaf:name", "?name")
+ .build()
+ )
+ assert "PREFIX ex: " in q
+ assert "PREFIX foaf: " in q
+
+ def test_select_with_namespace_prefix(self):
+ ns = Namespace("schema", "http://schema.org/")
+ q = (
+ select("?name")
+ .prefix(ns)
+ .where("?person", "schema:name", "?name")
+ .build()
+ )
+ assert "PREFIX schema: " in q
+
+ def test_str_equals_build(self):
+ builder = select("?s").where("?s", "a", "?o")
+ assert str(builder) == builder.build()
+
+ def test_copy_independence(self):
+ original = select("?s").where("?s", "a", "?o")
+ cloned = original.copy()
+ cloned.limit(10)
+ assert "LIMIT" not in original.build()
+ assert "LIMIT 10" in cloned.build()
+
+ def test_validation_no_variables(self):
+ with pytest.raises(ValueError, match="at least one variable"):
+ select().where("?s", "a", "?o").build()
+
+ def test_validation_no_where(self):
+ with pytest.raises(ValueError, match="at least one WHERE pattern"):
+ select("?s").build()
+
+ def test_modifier_order(self):
+ """GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET appear in correct order."""
+ q = (
+ select("?x", "(COUNT(?y) AS ?c)")
+ .where("?x", "a", "?y")
+ .group_by("?x")
+ .having("COUNT(?y) > 1")
+ .order_by("?x")
+ .limit(10)
+ .offset(5)
+ .build()
+ )
+ lines = q.splitlines()
+ idx = {kw: i for i, line in enumerate(lines) for kw in ("GROUP BY", "HAVING", "ORDER BY", "LIMIT", "OFFSET") if line.startswith(kw)}
+ assert idx["GROUP BY"] < idx["HAVING"] < idx["ORDER BY"] < idx["LIMIT"] < idx["OFFSET"]
+
+ def test_multiple_order_by(self):
+ q = select("?a", "?b").where("?a", "a", "?b").order_by("?a", "DESC(?b)").build()
+ assert "ORDER BY ?a DESC(?b)" in q
+
+ def test_chaining_returns_self(self):
+ builder = select("?s")
+ assert builder.where("?s", "a", "?o") is builder
+ assert builder.filter("true") is builder
+ assert builder.optional("?s", "?p", "?o") is builder
+ assert builder.bind("1", "?x") is builder
+ assert builder.distinct() is builder
+ assert builder.order_by("?s") is builder
+ assert builder.group_by("?s") is builder
+ assert builder.having("true") is builder
+ assert builder.limit(1) is builder
+ assert builder.offset(0) is builder
+
+
+# ── AskQuery ─────────────────────────────────────────────────────────
+
+
+class TestAskQuery:
+ def test_basic_ask(self):
+ q = ask().where("?s", RDF.type, ex.Person).build()
+ assert q.startswith("ASK {")
+ assert "" in q
+
+ def test_ask_with_filter(self):
+ q = ask().where("?s", "a", "?o").filter("?o = ").build()
+ assert "FILTER(?o = )" in q
+
+ def test_ask_validation(self):
+ with pytest.raises(ValueError, match="at least one WHERE pattern"):
+ ask().build()
+
+ def test_ask_str_equals_build(self):
+ builder = ask().where("?s", "a", "?o")
+ assert str(builder) == builder.build()
+
+ def test_ask_with_prefix(self):
+ q = (
+ ask()
+ .prefix("ex", "http://example.org/")
+ .where("?s", "a", "ex:Person")
+ .build()
+ )
+ assert "PREFIX ex: " in q
+
+
+# ── ConstructQuery ───────────────────────────────────────────────────
+
+
+class TestConstructQuery:
+ def test_basic_construct(self):
+ q = (
+ construct(("?s", ex.fullName, "?name"))
+ .where("?s", ex.firstName, "?fname")
+ .bind("CONCAT(?fname, ' ', ?lname)", "?name")
+ .build()
+ )
+ assert "CONSTRUCT {" in q
+ assert "" in q
+ assert "WHERE {" in q
+ assert "BIND(" in q
+
+ def test_construct_validation(self):
+ with pytest.raises(ValueError, match="at least one template triple"):
+ construct().build()
+
+ def test_construct_str_equals_build(self):
+ builder = construct(("?s", "?p", "?o")).where("?s", "?p", "?o")
+ assert str(builder) == builder.build()
+
+ def test_construct_without_where(self):
+ """CONSTRUCT without WHERE patterns should not emit WHERE block."""
+ q = construct(("?s", ex.label, '"test"')).build()
+ assert "CONSTRUCT {" in q
+ assert "WHERE" not in q
+
+
+# ── DescribeQuery ────────────────────────────────────────────────────
+
+
+class TestDescribeQuery:
+ def test_describe_resource(self):
+ q = describe(ex.alice).build()
+ assert q == "DESCRIBE "
+
+ def test_describe_with_where(self):
+ q = (
+ describe("?person")
+ .where("?person", RDF.type, ex.Person)
+ .filter("?person = ")
+ .build()
+ )
+ assert "DESCRIBE ?person" in q
+ assert "WHERE {" in q
+ assert "FILTER(?person = )" in q
+
+ def test_describe_multiple_resources(self):
+ q = describe(ex.alice, ex.bob).build()
+ assert "" in q
+ assert "" in q
+
+ def test_describe_validation(self):
+ with pytest.raises(ValueError, match="at least one resource"):
+ describe().build()
+
+ def test_describe_str_equals_build(self):
+ builder = describe(ex.alice)
+ assert str(builder) == builder.build()
+
+
+# ── GraphPattern ─────────────────────────────────────────────────────
+
+
+class TestGraphPattern:
+ def test_copy_independence(self):
+ p1 = GraphPattern().where("?s", "a", "?o")
+ p2 = p1.copy()
+ p2.filter("true")
+ assert "FILTER" not in p1.to_sparql()
+ assert "FILTER" in p2.to_sparql()
+
+ def test_union_requires_two_patterns(self):
+ p = GraphPattern()
+ with pytest.raises(ValueError, match="at least two"):
+ p.union(GraphPattern().where("?s", "a", "?o"))
+
+ def test_optional_requires_three_args_or_pattern(self):
+ p = GraphPattern()
+ with pytest.raises(ValueError):
+ p.optional("?s", "?p")
+
+ def test_nested_optional(self):
+ inner = GraphPattern().where("?s", ex.email, "?email").filter("bound(?email)")
+ outer = GraphPattern().where("?s", "a", "ex:Person").optional(inner)
+ sparql = outer.to_sparql()
+ assert "OPTIONAL {" in sparql
+ assert "FILTER(bound(?email))" in sparql
+
+ def test_values_with_string_var(self):
+ p = GraphPattern().values("person", [ex.alice])
+ assert "VALUES ?person" in p.to_sparql()
+
+ def test_bind_with_string_var_no_question_mark(self):
+ p = GraphPattern().bind("1 + 1", "result")
+ assert "BIND(1 + 1 AS ?result)" in p.to_sparql()
+
+
+# ── Integration / complex compositions ───────────────────────────────
+
+
+class TestComplexCompositions:
+ def test_full_query_from_plan_example(self):
+ """Test the main SELECT example from the plan."""
+ q = (
+ select("?person", "?name")
+ .where("?person", RDF.type, ex.Person)
+ .where("?person", ex.name, "?name")
+ .order_by("?name")
+ .limit(10)
+ .build()
+ )
+ assert "SELECT ?person ?name" in q
+ assert "" in q
+ assert "" in q
+ assert "ORDER BY ?name" in q
+ assert "LIMIT 10" in q
+
+ def test_optional_filter_combo(self):
+ q = (
+ select("?name", "?email")
+ .where("?person", RDF.type, ex.Person)
+ .where("?person", ex.name, "?name")
+ .optional("?person", ex.email, "?email")
+ .filter("?name != 'Bob'")
+ .build()
+ )
+ assert "OPTIONAL" in q
+ assert "FILTER" in q
+
+ def test_variable_objects(self):
+ """Test using pyoxigraph Variable objects directly."""
+ person = og.Variable("person")
+ name = og.Variable("name")
+ q = (
+ select("?person", "?name")
+ .where(person, RDF.type, ex.Person)
+ .where(person, ex.name, name)
+ .build()
+ )
+ assert "?person" in q
+ assert "?name" in q
+
+ def test_literal_in_where(self):
+ lit = og.Literal("Alice", language="en")
+ q = (
+ select("?s")
+ .where("?s", ex.name, lit)
+ .build()
+ )
+ assert '"Alice"@en' in q