From 9ad084695f0663d78418111c9850b07df48fc33c Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 28 Feb 2026 12:41:44 -0500 Subject: [PATCH 1/4] feat: add fluent SPARQL query builder Add a programmatic query builder as an alternative to writing raw SPARQL strings. Supports SELECT, ASK, CONSTRUCT, and DESCRIBE queries with fluent method chaining. Accepts both string terms and typed objects (IRI, Variable, Literal, Namespace-produced IRIs). Features: - GraphPattern class for composable WHERE blocks - WHERE, FILTER, OPTIONAL, UNION, BIND, VALUES, sub-query clauses - SELECT modifiers: DISTINCT, ORDER BY, GROUP BY, HAVING, LIMIT, OFFSET - Prefix handling via string pairs or Namespace objects - Light validation in build() for structural correctness - copy() for safe builder reuse --- rdf4j_python/__init__.py | 7 + rdf4j_python/query/__init__.py | 37 +++ rdf4j_python/query/_builder.py | 272 ++++++++++++++++++ rdf4j_python/query/_pattern.py | 110 ++++++++ rdf4j_python/query/_term.py | 45 +++ tests/test_query_builder.py | 488 +++++++++++++++++++++++++++++++++ 6 files changed, 959 insertions(+) create mode 100644 rdf4j_python/query/__init__.py create mode 100644 rdf4j_python/query/_builder.py create mode 100644 rdf4j_python/query/_pattern.py create mode 100644 rdf4j_python/query/_term.py create mode 100644 tests/test_query_builder.py diff --git a/rdf4j_python/__init__.py b/rdf4j_python/__init__.py index 2a65acb..f43a7b9 100644 --- a/rdf4j_python/__init__.py +++ b/rdf4j_python/__init__.py @@ -40,6 +40,7 @@ Triple, Variable, ) +from .query import GraphPattern, ask, construct, describe, select __all__ = [ # Main classes @@ -78,4 +79,10 @@ "Object", "Context", "QuadResultSet", + # Query builder + "select", + "ask", + "construct", + "describe", + "GraphPattern", ] diff --git a/rdf4j_python/query/__init__.py b/rdf4j_python/query/__init__.py new file mode 100644 index 0000000..a9be7af --- /dev/null +++ b/rdf4j_python/query/__init__.py @@ -0,0 +1,37 @@ +"""SPARQL query builder for RDF4J Python.""" + +from ._builder import AskQuery, ConstructQuery, DescribeQuery, SelectQuery +from ._pattern import GraphPattern + + +def select(*variables: str) -> SelectQuery: + """Create a new ``SELECT`` query builder.""" + return SelectQuery(*variables) + + +def ask() -> AskQuery: + """Create a new ``ASK`` query builder.""" + return AskQuery() + + +def construct(*templates: tuple) -> ConstructQuery: + """Create a new ``CONSTRUCT`` query builder.""" + return ConstructQuery(*templates) + + +def describe(*resources) -> DescribeQuery: + """Create a new ``DESCRIBE`` query builder.""" + return DescribeQuery(*resources) + + +__all__ = [ + "select", + "ask", + "construct", + "describe", + "GraphPattern", + "SelectQuery", + "AskQuery", + "ConstructQuery", + "DescribeQuery", +] diff --git a/rdf4j_python/query/_builder.py b/rdf4j_python/query/_builder.py new file mode 100644 index 0000000..74b106b --- /dev/null +++ b/rdf4j_python/query/_builder.py @@ -0,0 +1,272 @@ +"""SPARQL query builder classes.""" + +from __future__ import annotations + +import copy +from typing import Any + +from rdf4j_python.model._namespace import Namespace + +from ._pattern import GraphPattern +from ._term import Term, serialize_term + + +# ── mixin for WHERE-clause delegation ──────────────────────────────── + + +class _WhereClauseMixin: + """Methods that delegate to the internal ``_pattern: GraphPattern``.""" + + _pattern: GraphPattern + + def where(self, s: Term, p: Term, o: Term) -> Any: + self._pattern.where(s, p, o) + return self + + def filter(self, expr: str) -> Any: + self._pattern.filter(expr) + return self + + def optional( + self, + s_or_pattern: Term | GraphPattern, + p: Term | None = None, + o: Term | None = None, + ) -> Any: + self._pattern.optional(s_or_pattern, p, o) + return self + + def union(self, *patterns: GraphPattern) -> Any: + self._pattern.union(*patterns) + return self + + def bind(self, expr: str, var: str) -> Any: + self._pattern.bind(expr, var) + return self + + def values(self, var: str, vals: list[Any]) -> Any: + self._pattern.values(var, vals) + return self + + def sub_query(self, builder: SelectQuery) -> Any: + self._pattern.sub_query(builder) + return self + + +# ── prefix handling mixin ──────────────────────────────────────────── + + +class _PrefixMixin: + """Shared prefix management.""" + + _prefixes: dict[str, str] + + def prefix(self, name_or_ns: str | Namespace, uri: str | None = None) -> Any: + """Register a prefix. + + - ``prefix("ex", "http://example.org/")`` — string pair + - ``prefix(ns)`` — extract from a ``Namespace`` object + """ + if isinstance(name_or_ns, Namespace): + self._prefixes[name_or_ns.prefix] = name_or_ns.namespace.value + else: + if uri is None: + raise ValueError("uri is required when name_or_ns is a string") + self._prefixes[name_or_ns] = uri + return self + + def _render_prefixes(self) -> str: + lines = [f"PREFIX {k}: <{v}>" for k, v in self._prefixes.items()] + return "\n".join(lines) + + +# ── SelectQuery ────────────────────────────────────────────────────── + + +class SelectQuery(_WhereClauseMixin, _PrefixMixin): + """Builder for ``SELECT`` queries.""" + + def __init__(self, *variables: str) -> None: + self._variables = list(variables) + self._distinct = False + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + self._order_by: list[str] = [] + self._group_by: list[str] = [] + self._having: str | None = None + self._limit: int | None = None + self._offset: int | None = None + + def distinct(self) -> SelectQuery: + self._distinct = True + return self + + def order_by(self, *exprs: str) -> SelectQuery: + self._order_by.extend(exprs) + return self + + def group_by(self, *exprs: str) -> SelectQuery: + self._group_by.extend(exprs) + return self + + def having(self, expr: str) -> SelectQuery: + self._having = expr + return self + + def limit(self, n: int) -> SelectQuery: + self._limit = n + return self + + def offset(self, n: int) -> SelectQuery: + self._offset = n + return self + + def copy(self) -> SelectQuery: + return copy.deepcopy(self) + + def build(self) -> str: + if not self._variables: + raise ValueError("SELECT query requires at least one variable") + if len(self._pattern) == 0: + raise ValueError("SELECT query requires at least one WHERE pattern") + + parts: list[str] = [] + + # prefixes + if self._prefixes: + parts.append(self._render_prefixes()) + + # SELECT line + keyword = "SELECT DISTINCT" if self._distinct else "SELECT" + parts.append(f"{keyword} {' '.join(self._variables)}") + + # WHERE + parts.append("WHERE {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + + # modifiers + if self._group_by: + parts.append(f"GROUP BY {' '.join(self._group_by)}") + if self._having: + parts.append(f"HAVING ({self._having})") + if self._order_by: + parts.append(f"ORDER BY {' '.join(self._order_by)}") + if self._limit is not None: + parts.append(f"LIMIT {self._limit}") + if self._offset is not None: + parts.append(f"OFFSET {self._offset}") + + return "\n".join(parts) + + def __str__(self) -> str: + return self.build() + + +# ── AskQuery ───────────────────────────────────────────────────────── + + +class AskQuery(_WhereClauseMixin, _PrefixMixin): + """Builder for ``ASK`` queries.""" + + def __init__(self) -> None: + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + + def copy(self) -> AskQuery: + return copy.deepcopy(self) + + def build(self) -> str: + if len(self._pattern) == 0: + raise ValueError("ASK query requires at least one WHERE pattern") + + parts: list[str] = [] + if self._prefixes: + parts.append(self._render_prefixes()) + parts.append("ASK {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + return "\n".join(parts) + + def __str__(self) -> str: + return self.build() + + +# ── ConstructQuery ─────────────────────────────────────────────────── + + +class ConstructQuery(_WhereClauseMixin, _PrefixMixin): + """Builder for ``CONSTRUCT`` queries.""" + + def __init__(self, *templates: tuple[Term, Term, Term]) -> None: + self._templates = list(templates) + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + + def copy(self) -> ConstructQuery: + return copy.deepcopy(self) + + def build(self) -> str: + if not self._templates: + raise ValueError("CONSTRUCT query requires at least one template triple") + + parts: list[str] = [] + if self._prefixes: + parts.append(self._render_prefixes()) + + # CONSTRUCT template + template_lines = [] + for s, p, o in self._templates: + template_lines.append( + f" {serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ." + ) + parts.append("CONSTRUCT {") + parts.extend(template_lines) + parts.append("}") + + # WHERE (optional for CONSTRUCT but we always emit it if patterns exist) + if len(self._pattern) > 0: + parts.append("WHERE {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + + return "\n".join(parts) + + def __str__(self) -> str: + return self.build() + + +# ── DescribeQuery ──────────────────────────────────────────────────── + + +class DescribeQuery(_WhereClauseMixin, _PrefixMixin): + """Builder for ``DESCRIBE`` queries.""" + + def __init__(self, *resources: Term) -> None: + self._resources = list(resources) + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + + def copy(self) -> DescribeQuery: + return copy.deepcopy(self) + + def build(self) -> str: + if not self._resources: + raise ValueError("DESCRIBE query requires at least one resource") + + parts: list[str] = [] + if self._prefixes: + parts.append(self._render_prefixes()) + + resources_str = " ".join(serialize_term(r) for r in self._resources) + parts.append(f"DESCRIBE {resources_str}") + + if len(self._pattern) > 0: + parts.append("WHERE {") + parts.append(self._pattern.to_sparql()) + parts.append("}") + + return "\n".join(parts) + + def __str__(self) -> str: + return self.build() diff --git a/rdf4j_python/query/_pattern.py b/rdf4j_python/query/_pattern.py new file mode 100644 index 0000000..6b27fdc --- /dev/null +++ b/rdf4j_python/query/_pattern.py @@ -0,0 +1,110 @@ +"""GraphPattern — composable SPARQL WHERE block.""" + +from __future__ import annotations + +import copy +from typing import TYPE_CHECKING, Any + +from ._term import Term, serialize_term + +if TYPE_CHECKING: + from ._builder import SelectQuery + + +class GraphPattern: + """A composable block of SPARQL graph patterns (triples, filters, etc.). + + Used as the body of a WHERE clause and inside OPTIONAL / UNION blocks. + Every mutating method returns ``self`` for fluent chaining. + """ + + def __init__(self) -> None: + self._elements: list[str] = [] + + # ── triple patterns ────────────────────────────────────────────── + + def where(self, s: Term, p: Term, o: Term) -> GraphPattern: + """Add a triple pattern.""" + self._elements.append( + f"{serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ." + ) + return self + + # ── FILTER ─────────────────────────────────────────────────────── + + def filter(self, expr: str) -> GraphPattern: + """Add a ``FILTER(expr)`` clause.""" + self._elements.append(f"FILTER({expr})") + return self + + # ── OPTIONAL ───────────────────────────────────────────────────── + + def optional(self, s_or_pattern: Term | GraphPattern, p: Term | None = None, o: Term | None = None) -> GraphPattern: + """Add an ``OPTIONAL { … }`` block. + + Two calling conventions: + - ``optional(s, p, o)`` — single triple shorthand + - ``optional(GraphPattern())`` — complex pattern block + """ + if isinstance(s_or_pattern, GraphPattern): + body = s_or_pattern.to_sparql(indent=4) + self._elements.append(f"OPTIONAL {{\n{body}\n }}") + else: + if p is None or o is None: + raise ValueError("optional() requires either a GraphPattern or three term arguments (s, p, o)") + triple = f"{serialize_term(s_or_pattern)} {serialize_term(p)} {serialize_term(o)} ." + self._elements.append(f"OPTIONAL {{ {triple} }}") + return self + + # ── UNION ──────────────────────────────────────────────────────── + + def union(self, *patterns: GraphPattern) -> GraphPattern: + """Add ``{ … } UNION { … }`` blocks.""" + if len(patterns) < 2: + raise ValueError("union() requires at least two GraphPattern arguments") + parts = [] + for pat in patterns: + body = pat.to_sparql(indent=4) + parts.append(f"{{\n{body}\n }}") + self._elements.append(" UNION ".join(parts)) + return self + + # ── BIND ───────────────────────────────────────────────────────── + + def bind(self, expr: str, var: str) -> GraphPattern: + """Add a ``BIND(expr AS ?var)`` clause.""" + v = var if var.startswith("?") else f"?{var}" + self._elements.append(f"BIND({expr} AS {v})") + return self + + # ── VALUES ─────────────────────────────────────────────────────── + + def values(self, var: str, vals: list[Any]) -> GraphPattern: + """Add a ``VALUES ?var { … }`` clause.""" + v = var if var.startswith("?") else f"?{var}" + serialized = " ".join(serialize_term(val) for val in vals) + self._elements.append(f"VALUES {v} {{ {serialized} }}") + return self + + # ── sub-query ──────────────────────────────────────────────────── + + def sub_query(self, builder: SelectQuery) -> GraphPattern: + """Embed a sub-SELECT inside this pattern.""" + inner = builder.build() + indented = "\n".join(f" {line}" for line in inner.splitlines()) + self._elements.append(f"{{\n{indented}\n }}") + return self + + # ── rendering ──────────────────────────────────────────────────── + + def to_sparql(self, indent: int = 2) -> str: + """Render the pattern body (without the outer braces).""" + prefix = " " * indent + return "\n".join(f"{prefix}{el}" for el in self._elements) + + def copy(self) -> GraphPattern: + """Return a deep copy of this pattern.""" + return copy.deepcopy(self) + + def __len__(self) -> int: + return len(self._elements) diff --git a/rdf4j_python/query/_term.py b/rdf4j_python/query/_term.py new file mode 100644 index 0000000..ca27db5 --- /dev/null +++ b/rdf4j_python/query/_term.py @@ -0,0 +1,45 @@ +"""Term serialization for SPARQL query building.""" + +from __future__ import annotations + +import re +from typing import Union + +import pyoxigraph as og + +Term = Union[str, og.NamedNode, og.Variable, og.Literal, og.BlankNode] + +_PREFIXED_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.-]*:[a-zA-Z_][a-zA-Z0-9_.-]*$") + + +def serialize_term(term: Term) -> str: + """Convert a term to its SPARQL string representation. + + Handles: + - str starting with ``?`` — pass through as variable + - str ``"a"`` — pass through (rdf:type shorthand) + - str matching ``prefix:local`` — pass through + - str wrapped in ``<>`` — pass through as full IRI + - str wrapped in ``"`` or ``'`` — pass through as literal + - str that is a SPARQL expression (e.g. aggregates) — pass through + - ``IRI`` (NamedNode) — ```` + - ``Variable`` — ``?value`` + - ``Literal`` — ``"value"`` with optional ``@lang`` or ``^^`` + - ``BlankNode`` — ``_:value`` + """ + if isinstance(term, og.NamedNode): + return f"<{term.value}>" + if isinstance(term, og.Variable): + return f"?{term.value}" + if isinstance(term, og.Literal): + value = term.value.replace("\\", "\\\\").replace('"', '\\"') + if term.language: + return f'"{value}"@{term.language}' + if term.datatype and term.datatype.value != "http://www.w3.org/2001/XMLSchema#string": + return f'"{value}"^^<{term.datatype.value}>' + return f'"{value}"' + if isinstance(term, og.BlankNode): + return f"_:{term.value}" + if isinstance(term, str): + return term + raise TypeError(f"Unsupported term type: {type(term)}") diff --git a/tests/test_query_builder.py b/tests/test_query_builder.py new file mode 100644 index 0000000..2df553f --- /dev/null +++ b/tests/test_query_builder.py @@ -0,0 +1,488 @@ +"""Tests for the SPARQL query builder.""" + +import pytest +import pyoxigraph as og + +from rdf4j_python.model._namespace import Namespace +from rdf4j_python.model.vocabulary import EXAMPLE as ex +from rdf4j_python.model.vocabulary import RDF +from rdf4j_python.query import ( + GraphPattern, + ask, + construct, + describe, + select, +) + + +# ── serialize_term ─────────────────────────────────────────────────── + + +class TestSerializeTerm: + def test_iri(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.NamedNode("http://example.org/Person")) == "" + + def test_variable(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Variable("name")) == "?name" + + def test_literal_plain(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal("hello")) == '"hello"' + + def test_literal_with_language(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal("hello", language="en")) == '"hello"@en' + + def test_literal_with_datatype(self): + from rdf4j_python.query._term import serialize_term + + dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#integer") + assert serialize_term(og.Literal("42", datatype=dt)) == '"42"^^' + + def test_literal_string_datatype_omitted(self): + from rdf4j_python.query._term import serialize_term + + dt = og.NamedNode("http://www.w3.org/2001/XMLSchema#string") + assert serialize_term(og.Literal("hello", datatype=dt)) == '"hello"' + + def test_blank_node(self): + from rdf4j_python.query._term import serialize_term + + bn = og.BlankNode("b0") + assert serialize_term(bn) == "_:b0" + + def test_string_variable(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("?x") == "?x" + + def test_string_a(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("a") == "a" + + def test_string_prefixed(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("foaf:name") == "foaf:name" + + def test_string_full_iri(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term("") == "" + + def test_string_literal(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term('"hello"') == '"hello"' + + def test_unsupported_type(self): + from rdf4j_python.query._term import serialize_term + + with pytest.raises(TypeError): + serialize_term(42) + + def test_literal_with_quotes(self): + from rdf4j_python.query._term import serialize_term + + assert serialize_term(og.Literal('say "hi"')) == '"say \\"hi\\""' + + def test_namespace_produced_iri(self): + from rdf4j_python.query._term import serialize_term + + person = ex.Person + assert serialize_term(person) == "" + + +# ── SelectQuery ────────────────────────────────────────────────────── + + +class TestSelectQuery: + def test_basic_select(self): + q = select("?s", "?p").where("?s", "a", "?p").build() + assert "SELECT ?s ?p" in q + assert "?s a ?p ." in q + + def test_select_with_typed_terms(self): + q = ( + select("?person", "?name") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .build() + ) + assert "" in q + assert "" in q + assert "" in q + + def test_select_with_limit(self): + q = select("?s").where("?s", "a", "?o").limit(10).build() + assert "LIMIT 10" in q + + def test_select_with_offset(self): + q = select("?s").where("?s", "a", "?o").offset(5).build() + assert "OFFSET 5" in q + + def test_select_with_order_by(self): + q = select("?name").where("?s", "a", "?name").order_by("?name").build() + assert "ORDER BY ?name" in q + + def test_select_distinct(self): + q = select("?name").distinct().where("?s", "a", "?name").build() + assert "SELECT DISTINCT ?name" in q + + def test_select_with_filter(self): + q = ( + select("?name") + .where("?person", "a", "ex:Person") + .where("?person", "ex:name", "?name") + .filter("?name != 'Bob'") + .build() + ) + assert "FILTER(?name != 'Bob')" in q + + def test_select_with_optional_triple(self): + q = ( + select("?name", "?email") + .where("?person", "a", "ex:Person") + .optional("?person", "ex:email", "?email") + .build() + ) + assert "OPTIONAL { ?person ex:email ?email . }" in q + + def test_select_with_optional_pattern(self): + pattern = GraphPattern().where("?person", ex.email, "?email").filter("bound(?email)") + q = ( + select("?name", "?email") + .where("?person", "a", "ex:Person") + .optional(pattern) + .build() + ) + assert "OPTIONAL {" in q + assert "FILTER(bound(?email))" in q + + def test_select_with_group_by_having(self): + q = ( + select("?city", "(COUNT(?person) AS ?count)") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.city, "?city") + .group_by("?city") + .having("COUNT(?person) > 1") + .order_by("DESC(?count)") + .build() + ) + assert "GROUP BY ?city" in q + assert "HAVING (COUNT(?person) > 1)" in q + assert "ORDER BY DESC(?count)" in q + + def test_select_with_union(self): + q = ( + select("?label") + .where("?s", RDF.type, ex.Person) + .union( + GraphPattern().where("?s", ex.name, "?label"), + GraphPattern().where("?s", ex.nickname, "?label"), + ) + .build() + ) + assert "UNION" in q + + def test_select_with_bind(self): + q = ( + select("?fullName") + .where("?s", ex.firstName, "?fname") + .where("?s", ex.lastName, "?lname") + .bind("CONCAT(?fname, ' ', ?lname)", "?fullName") + .build() + ) + assert "BIND(CONCAT(?fname, ' ', ?lname) AS ?fullName)" in q + + def test_select_with_values(self): + q = ( + select("?person", "?name") + .where("?person", ex.name, "?name") + .values("?person", [ex.alice, ex.bob]) + .build() + ) + assert "VALUES ?person { }" in q + + def test_select_with_sub_query(self): + sub = ( + select("?person", "(MAX(?score) AS ?maxScore)") + .where("?person", ex.score, "?score") + .group_by("?person") + ) + q = ( + select("?person", "?maxScore", "?name") + .where("?person", ex.name, "?name") + .sub_query(sub) + .build() + ) + assert "SELECT ?person (MAX(?score) AS ?maxScore)" in q + assert "GROUP BY ?person" in q + + def test_select_with_string_prefix(self): + q = ( + select("?name") + .prefix("ex", "http://example.org/") + .prefix("foaf", "http://xmlns.com/foaf/0.1/") + .where("?person", "a", "ex:Person") + .where("?person", "foaf:name", "?name") + .build() + ) + assert "PREFIX ex: " in q + assert "PREFIX foaf: " in q + + def test_select_with_namespace_prefix(self): + ns = Namespace("schema", "http://schema.org/") + q = ( + select("?name") + .prefix(ns) + .where("?person", "schema:name", "?name") + .build() + ) + assert "PREFIX schema: " in q + + def test_str_equals_build(self): + builder = select("?s").where("?s", "a", "?o") + assert str(builder) == builder.build() + + def test_copy_independence(self): + original = select("?s").where("?s", "a", "?o") + cloned = original.copy() + cloned.limit(10) + assert "LIMIT" not in original.build() + assert "LIMIT 10" in cloned.build() + + def test_validation_no_variables(self): + with pytest.raises(ValueError, match="at least one variable"): + select().where("?s", "a", "?o").build() + + def test_validation_no_where(self): + with pytest.raises(ValueError, match="at least one WHERE pattern"): + select("?s").build() + + def test_modifier_order(self): + """GROUP BY, HAVING, ORDER BY, LIMIT, OFFSET appear in correct order.""" + q = ( + select("?x", "(COUNT(?y) AS ?c)") + .where("?x", "a", "?y") + .group_by("?x") + .having("COUNT(?y) > 1") + .order_by("?x") + .limit(10) + .offset(5) + .build() + ) + lines = q.splitlines() + idx = {kw: i for i, line in enumerate(lines) for kw in ("GROUP BY", "HAVING", "ORDER BY", "LIMIT", "OFFSET") if line.startswith(kw)} + assert idx["GROUP BY"] < idx["HAVING"] < idx["ORDER BY"] < idx["LIMIT"] < idx["OFFSET"] + + def test_multiple_order_by(self): + q = select("?a", "?b").where("?a", "a", "?b").order_by("?a", "DESC(?b)").build() + assert "ORDER BY ?a DESC(?b)" in q + + def test_chaining_returns_self(self): + builder = select("?s") + assert builder.where("?s", "a", "?o") is builder + assert builder.filter("true") is builder + assert builder.optional("?s", "?p", "?o") is builder + assert builder.bind("1", "?x") is builder + assert builder.distinct() is builder + assert builder.order_by("?s") is builder + assert builder.group_by("?s") is builder + assert builder.having("true") is builder + assert builder.limit(1) is builder + assert builder.offset(0) is builder + + +# ── AskQuery ───────────────────────────────────────────────────────── + + +class TestAskQuery: + def test_basic_ask(self): + q = ask().where("?s", RDF.type, ex.Person).build() + assert q.startswith("ASK {") + assert "" in q + + def test_ask_with_filter(self): + q = ask().where("?s", "a", "?o").filter("?o = ").build() + assert "FILTER(?o = )" in q + + def test_ask_validation(self): + with pytest.raises(ValueError, match="at least one WHERE pattern"): + ask().build() + + def test_ask_str_equals_build(self): + builder = ask().where("?s", "a", "?o") + assert str(builder) == builder.build() + + def test_ask_with_prefix(self): + q = ( + ask() + .prefix("ex", "http://example.org/") + .where("?s", "a", "ex:Person") + .build() + ) + assert "PREFIX ex: " in q + + +# ── ConstructQuery ─────────────────────────────────────────────────── + + +class TestConstructQuery: + def test_basic_construct(self): + q = ( + construct(("?s", ex.fullName, "?name")) + .where("?s", ex.firstName, "?fname") + .bind("CONCAT(?fname, ' ', ?lname)", "?name") + .build() + ) + assert "CONSTRUCT {" in q + assert "" in q + assert "WHERE {" in q + assert "BIND(" in q + + def test_construct_validation(self): + with pytest.raises(ValueError, match="at least one template triple"): + construct().build() + + def test_construct_str_equals_build(self): + builder = construct(("?s", "?p", "?o")).where("?s", "?p", "?o") + assert str(builder) == builder.build() + + def test_construct_without_where(self): + """CONSTRUCT without WHERE patterns should not emit WHERE block.""" + q = construct(("?s", ex.label, '"test"')).build() + assert "CONSTRUCT {" in q + assert "WHERE" not in q + + +# ── DescribeQuery ──────────────────────────────────────────────────── + + +class TestDescribeQuery: + def test_describe_resource(self): + q = describe(ex.alice).build() + assert q == "DESCRIBE " + + def test_describe_with_where(self): + q = ( + describe("?person") + .where("?person", RDF.type, ex.Person) + .filter("?person = ") + .build() + ) + assert "DESCRIBE ?person" in q + assert "WHERE {" in q + assert "FILTER(?person = )" in q + + def test_describe_multiple_resources(self): + q = describe(ex.alice, ex.bob).build() + assert "" in q + assert "" in q + + def test_describe_validation(self): + with pytest.raises(ValueError, match="at least one resource"): + describe().build() + + def test_describe_str_equals_build(self): + builder = describe(ex.alice) + assert str(builder) == builder.build() + + +# ── GraphPattern ───────────────────────────────────────────────────── + + +class TestGraphPattern: + def test_copy_independence(self): + p1 = GraphPattern().where("?s", "a", "?o") + p2 = p1.copy() + p2.filter("true") + assert "FILTER" not in p1.to_sparql() + assert "FILTER" in p2.to_sparql() + + def test_union_requires_two_patterns(self): + p = GraphPattern() + with pytest.raises(ValueError, match="at least two"): + p.union(GraphPattern().where("?s", "a", "?o")) + + def test_optional_requires_three_args_or_pattern(self): + p = GraphPattern() + with pytest.raises(ValueError): + p.optional("?s", "?p") + + def test_nested_optional(self): + inner = GraphPattern().where("?s", ex.email, "?email").filter("bound(?email)") + outer = GraphPattern().where("?s", "a", "ex:Person").optional(inner) + sparql = outer.to_sparql() + assert "OPTIONAL {" in sparql + assert "FILTER(bound(?email))" in sparql + + def test_values_with_string_var(self): + p = GraphPattern().values("person", [ex.alice]) + assert "VALUES ?person" in p.to_sparql() + + def test_bind_with_string_var_no_question_mark(self): + p = GraphPattern().bind("1 + 1", "result") + assert "BIND(1 + 1 AS ?result)" in p.to_sparql() + + +# ── Integration / complex compositions ─────────────────────────────── + + +class TestComplexCompositions: + def test_full_query_from_plan_example(self): + """Test the main SELECT example from the plan.""" + q = ( + select("?person", "?name") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .order_by("?name") + .limit(10) + .build() + ) + assert "SELECT ?person ?name" in q + assert "" in q + assert "" in q + assert "ORDER BY ?name" in q + assert "LIMIT 10" in q + + def test_optional_filter_combo(self): + q = ( + select("?name", "?email") + .where("?person", RDF.type, ex.Person) + .where("?person", ex.name, "?name") + .optional("?person", ex.email, "?email") + .filter("?name != 'Bob'") + .build() + ) + assert "OPTIONAL" in q + assert "FILTER" in q + + def test_variable_objects(self): + """Test using pyoxigraph Variable objects directly.""" + person = og.Variable("person") + name = og.Variable("name") + q = ( + select("?person", "?name") + .where(person, RDF.type, ex.Person) + .where(person, ex.name, name) + .build() + ) + assert "?person" in q + assert "?name" in q + + def test_literal_in_where(self): + lit = og.Literal("Alice", language="en") + q = ( + select("?s") + .where("?s", ex.name, lit) + .build() + ) + assert '"Alice"@en' in q From 2fc9e479069b06f8c94f12e93ec135e7cef8db12 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 28 Feb 2026 12:46:04 -0500 Subject: [PATCH 2/4] docs: update README and examples to use query builder Replace raw SPARQL strings with fluent query builder calls in all example files (query.py, query_and_print.py, complete_workflow.py). Add a Query Builder section to the main README with usage examples. --- README.md | 79 +++++++++++++++-- examples/README.md | 10 ++- examples/complete_workflow.py | 54 ++++++------ examples/query.py | 8 +- examples/query_and_print.py | 154 +++++++++++++++++----------------- 5 files changed, 189 insertions(+), 116 deletions(-) diff --git a/README.md b/README.md index f3175be..21e9773 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ rdf4j-python bridges the gap between Python and the robust [Eclipse RDF4J](https - **Async-First Design**: Native support for async/await with synchronous fallback - **Repository Management**: Create, access, and manage RDF4J repositories programmatically - **SPARQL Support**: Execute SELECT, ASK, CONSTRUCT, and UPDATE queries effortlessly +- **SPARQL Query Builder**: Fluent, programmatic query construction with method chaining - **Transaction Support**: Atomic operations with commit/rollback and isolation levels - **Flexible Data Handling**: Add, retrieve, and manipulate RDF triples and quads - **File Upload**: Upload RDF files (Turtle, N-Triples, N-Quads, RDF/XML, JSON-LD, TriG, N3) directly to repositories @@ -88,6 +89,62 @@ if __name__ == "__main__": asyncio.run(main()) ``` +### SPARQL Query Builder + +Build queries programmatically with method chaining instead of writing raw SPARQL strings: + +```python +from rdf4j_python import select, ask, construct, describe, GraphPattern, Namespace + +ex = Namespace("ex", "http://example.org/") +foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/") + +# SELECT with typed terms — IRIs serialize automatically +query = ( + select("?person", "?name") + .where("?person", foaf.type, ex.Person) + .where("?person", foaf.name, "?name") + .optional("?person", foaf.email, "?email") + .filter("?name != 'Bob'") + .order_by("?name") + .limit(10) + .build() +) + +# Or use string-based prefixed names +query = ( + select("?name") + .prefix("foaf", "http://xmlns.com/foaf/0.1/") + .where("?person", "a", "foaf:Person") + .where("?person", "foaf:name", "?name") + .build() +) + +# GROUP BY with aggregation +query = ( + select("?city", "(COUNT(?person) AS ?count)") + .where("?person", ex.city, "?city") + .group_by("?city") + .having("COUNT(?person) > 1") + .order_by("DESC(?count)") + .build() +) + +# ASK, CONSTRUCT, and DESCRIBE +ask_query = ask().where("?s", ex.name, "?name").build() + +construct_query = ( + construct(("?s", ex.fullName, "?name")) + .where("?s", ex.firstName, "?fname") + .bind("CONCAT(?fname, ' ', ?lname)", "?name") + .build() +) + +describe_query = describe(ex.alice).build() +``` + +The query builder supports FILTER, OPTIONAL, UNION, BIND, VALUES, sub-queries, DISTINCT, ORDER BY, GROUP BY, HAVING, LIMIT, and OFFSET. Both raw strings and typed objects (`IRI`, `Variable`, `Literal`, `Namespace`) work as terms. + ### Working with Multiple Graphs ```python @@ -155,15 +212,18 @@ async def advanced_example(): ] await repo.add_statements(statements) - # Complex SPARQL query - query = """ - PREFIX foaf: - SELECT ?name ?email WHERE { - ?person foaf:name ?name . - OPTIONAL { ?person foaf:email ?email } - } - ORDER BY ?name - """ + # Query with the fluent query builder + from rdf4j_python import select + from rdf4j_python.model._namespace import Namespace + + foaf = Namespace("foaf", "http://xmlns.com/foaf/0.1/") + query = ( + select("?name", "?email") + .where("?person", foaf.name, "?name") + .optional("?person", foaf.email, "?email") + .order_by("?name") + .build() + ) results = await repo.query(query) ``` @@ -256,6 +316,7 @@ For more detailed examples, see the [examples](examples/) directory. rdf4j_python/ ├── _driver/ # Core async driver implementation ├── model/ # Data models and configurations +├── query/ # SPARQL query builder ├── exception/ # Custom exceptions └── utils/ # Utility functions diff --git a/examples/README.md b/examples/README.md index 28b7553..1519ca4 100644 --- a/examples/README.md +++ b/examples/README.md @@ -74,7 +74,7 @@ python examples/delete_repository.py ### 🔍 Data Querying Examples #### `query_and_print.py` -Comprehensive example of SPARQL querying and result formatting: +Comprehensive example of SPARQL querying and result formatting using the fluent query builder: - SELECT queries with various clauses (FILTER, OPTIONAL, JOIN) - CONSTRUCT queries for data transformation - ASK queries for boolean checks @@ -82,6 +82,7 @@ Comprehensive example of SPARQL querying and result formatting: - Multiple result formatting options **Key Features:** +- Fluent SPARQL query builder (no raw query strings) - Automatic test data setup - Formatted table output - JSON-like result formatting @@ -92,8 +93,8 @@ Comprehensive example of SPARQL querying and result formatting: python examples/query_and_print.py ``` -#### `query.py` (Original) -Simple query example showing basic SPARQL execution. +#### `query.py` +Simple query example showing basic SPARQL execution using the query builder. **Usage:** ```bash @@ -112,6 +113,7 @@ End-to-end example demonstrating the full repository lifecycle: **Key Features:** - Multi-repository workflow +- Fluent SPARQL query builder with Namespace objects - Real-world data scenarios (customers, products, analytics) - Named graph usage - Comprehensive error handling and cleanup @@ -121,7 +123,7 @@ End-to-end example demonstrating the full repository lifecycle: python examples/complete_workflow.py ``` -#### `repo.py` (Original) +#### `repo.py` Basic repository creation and data insertion example. **Usage:** diff --git a/examples/complete_workflow.py b/examples/complete_workflow.py index d38d83d..171e730 100644 --- a/examples/complete_workflow.py +++ b/examples/complete_workflow.py @@ -13,7 +13,8 @@ import asyncio -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, select +from rdf4j_python.model._namespace import Namespace from rdf4j_python.model.repository_config import ( MemoryStoreConfig, RepositoryConfig, @@ -21,6 +22,9 @@ ) from rdf4j_python.model.term import IRI, Literal, Quad +# Define namespaces for query building +ecom = Namespace("ecom", "http://example.com/") + async def workflow_step_1_create_repositories(): """Step 1: Create multiple repositories with different configurations.""" @@ -199,14 +203,14 @@ async def workflow_step_3_query_data(): # Query 1: Customer information print("👥 Customer Information:") customer_repo = await db.get_repository("customer-data") - customer_query = """ - SELECT ?customer ?name ?email ?age WHERE { - ?customer ?name . - OPTIONAL { ?customer ?email } - OPTIONAL { ?customer ?age } - } - ORDER BY ?name - """ + customer_query = ( + select("?customer", "?name", "?email", "?age") + .where("?customer", ecom.name, "?name") + .optional("?customer", ecom.email, "?email") + .optional("?customer", ecom.age, "?age") + .order_by("?name") + .build() + ) customer_results = await customer_repo.query(customer_query) for result in customer_results: name = result["name"].value if result["name"] else "N/A" @@ -217,14 +221,14 @@ async def workflow_step_3_query_data(): # Query 2: Product catalog print("\n🛍️ Product Catalog:") product_repo = await db.get_repository("product-catalog") - product_query = """ - SELECT ?product ?name ?price ?category WHERE { - ?product ?name . - OPTIONAL { ?product ?price } - OPTIONAL { ?product ?category } - } - ORDER BY ?price - """ + product_query = ( + select("?product", "?name", "?price", "?category") + .where("?product", ecom.name, "?name") + .optional("?product", ecom.price, "?price") + .optional("?product", ecom.category, "?category") + .order_by("?price") + .build() + ) product_results = await product_repo.query(product_query) for result in product_results: name = result["name"].value if result["name"] else "N/A" @@ -235,14 +239,14 @@ async def workflow_step_3_query_data(): # Query 3: Purchase analytics print("\n📊 Purchase Analytics:") analytics_repo = await db.get_repository("analytics-data") - analytics_query = """ - SELECT ?purchase ?customer ?product ?date WHERE { - ?purchase ?customer . - ?purchase ?product . - OPTIONAL { ?purchase ?date } - } - ORDER BY ?date - """ + analytics_query = ( + select("?purchase", "?customer", "?product", "?date") + .where("?purchase", ecom.customer, "?customer") + .where("?purchase", ecom.product, "?product") + .optional("?purchase", ecom.date, "?date") + .order_by("?date") + .build() + ) analytics_results = await analytics_repo.query(analytics_query) for result in analytics_results: customer = result["customer"].value if result["customer"] else "N/A" diff --git a/examples/query.py b/examples/query.py index 64f9ec8..b29af5a 100644 --- a/examples/query.py +++ b/examples/query.py @@ -2,7 +2,7 @@ from pyoxigraph import QuerySolutions -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, select from rdf4j_python.model.term import IRI, Literal, Quad @@ -25,7 +25,11 @@ async def main(): ), ] ) - result = await repo.query("SELECT * WHERE { ?s ?p ?o }") + + # Build the query using the fluent query builder + query = select("?s", "?p", "?o").where("?s", "?p", "?o").build() + + result = await repo.query(query) assert isinstance(result, QuerySolutions) for solution in result: print(solution) diff --git a/examples/query_and_print.py b/examples/query_and_print.py index 6735889..d30a41d 100644 --- a/examples/query_and_print.py +++ b/examples/query_and_print.py @@ -3,6 +3,8 @@ This example demonstrates how to execute various types of SPARQL queries against RDF4J repositories and format the results in different ways. + +Queries are built using the fluent SPARQL query builder. """ import asyncio @@ -10,7 +12,8 @@ from pyoxigraph import QuerySolutions, QueryTriples -from rdf4j_python import AsyncRdf4j +from rdf4j_python import AsyncRdf4j, GraphPattern, ask, construct, select +from rdf4j_python.model._namespace import Namespace from rdf4j_python.model.repository_config import ( MemoryStoreConfig, RepositoryConfig, @@ -18,6 +21,9 @@ ) from rdf4j_python.model.term import IRI, Literal, Quad +# Define namespaces for query building +ex = Namespace("ex", "http://example.org/") + async def setup_test_data(repo): """Add some test data to the repository for querying.""" @@ -189,43 +195,43 @@ async def execute_select_queries(repo): print("=" * 50) # Query 1: Simple SELECT - get all people and their names - query1 = """ - SELECT ?person ?name WHERE { - ?person ?name . - } - """ + query1 = ( + select("?person", "?name") + .where("?person", ex.name, "?name") + .build() + ) result1 = await repo.query(query1) print_select_results(result1, "All People and Their Names") # Query 2: SELECT with FILTER - people older than 30 - query2 = """ - SELECT ?person ?name ?age WHERE { - ?person ?name . - ?person ?age . - FILTER(?age > 30) - } - """ + query2 = ( + select("?person", "?name", "?age") + .where("?person", ex.name, "?name") + .where("?person", ex.age, "?age") + .filter("?age > 30") + .build() + ) result2 = await repo.query(query2) print_select_results(result2, "People Older Than 30") # Query 3: SELECT with OPTIONAL - people and their email (if available) - query3 = """ - SELECT ?person ?name ?email WHERE { - ?person ?name . - OPTIONAL { ?person ?email } - } - """ + query3 = ( + select("?person", "?name", "?email") + .where("?person", ex.name, "?name") + .optional("?person", ex.email, "?email") + .build() + ) result3 = await repo.query(query3) print_select_results(result3, "People and Their Email Addresses") # Query 4: SELECT with JOIN - people and their employers - query4 = """ - SELECT ?person ?name ?company WHERE { - ?person ?name . - ?person ?org . - ?org ?company . - } - """ + query4 = ( + select("?person", "?name", "?company") + .where("?person", ex.name, "?name") + .where("?person", ex.worksFor, "?org") + .where("?org", ex.name, "?company") + .build() + ) result4 = await repo.query(query4) print_select_results(result4, "People and Their Employers") @@ -239,29 +245,25 @@ async def execute_construct_queries(repo): print("=" * 50) # Query 1: CONSTRUCT - create simplified person data - query1 = """ - CONSTRUCT { - ?person ?name ; - ?age . - } - WHERE { - ?person ?name . - ?person ?age . - } - """ + query1 = ( + construct( + ("?person", ex.hasName, "?name"), + ("?person", ex.hasAge, "?age"), + ) + .where("?person", ex.name, "?name") + .where("?person", ex.age, "?age") + .build() + ) result1 = await repo.query(query1) print_construct_results(result1, "Simplified Person Data") # Query 2: CONSTRUCT - create employment relationships - query2 = """ - CONSTRUCT { - ?person ?company . - } - WHERE { - ?person ?org . - ?org ?company . - } - """ + query2 = ( + construct(("?person", ex.employedBy, "?company")) + .where("?person", ex.worksFor, "?org") + .where("?org", ex.name, "?company") + .build() + ) result2 = await repo.query(query2) print_construct_results(result2, "Employment Relationships") @@ -272,30 +274,30 @@ async def execute_ask_queries(repo): print("=" * 50) # Query 1: ASK - check if Alice exists - query1 = """ - ASK { - ?person "Alice" . - } - """ + query1 = ( + ask() + .where("?person", ex.name, '"Alice"') + .build() + ) result1 = await repo.query(query1) print(f"Does Alice exist? {'✅ Yes' if result1 else '❌ No'}") # Query 2: ASK - check if anyone is older than 40 - query2 = """ - ASK { - ?person ?age . - FILTER(?age > 40) - } - """ + query2 = ( + ask() + .where("?person", ex.age, "?age") + .filter("?age > 40") + .build() + ) result2 = await repo.query(query2) print(f"Is anyone older than 40? {'✅ Yes' if result2 else '❌ No'}") # Query 3: ASK - check if there are any email addresses - query3 = """ - ASK { - ?person ?email . - } - """ + query3 = ( + ask() + .where("?person", ex.email, "?email") + .build() + ) result3 = await repo.query(query3) print(f"Are there any email addresses? {'✅ Yes' if result3 else '❌ No'}") @@ -306,29 +308,29 @@ async def execute_aggregate_queries(repo): print("=" * 50) # Query 1: COUNT - total number of people - query1 = """ - SELECT (COUNT(?person) AS ?totalPeople) WHERE { - ?person ?name . - } - """ + query1 = ( + select("(COUNT(?person) AS ?totalPeople)") + .where("?person", ex.name, "?name") + .build() + ) result1 = await repo.query(query1) print_select_results(result1, "Total Number of People") # Query 2: AVG - average age - query2 = """ - SELECT (AVG(?age) AS ?averageAge) WHERE { - ?person ?age . - } - """ + query2 = ( + select("(AVG(?age) AS ?averageAge)") + .where("?person", ex.age, "?age") + .build() + ) result2 = await repo.query(query2) print_select_results(result2, "Average Age") # Query 3: MIN/MAX - youngest and oldest person - query3 = """ - SELECT (MIN(?age) AS ?minAge) (MAX(?age) AS ?maxAge) WHERE { - ?person ?age . - } - """ + query3 = ( + select("(MIN(?age) AS ?minAge)", "(MAX(?age) AS ?maxAge)") + .where("?person", ex.age, "?age") + .build() + ) result3 = await repo.query(query3) print_select_results(result3, "Age Range (Min/Max)") From a41c6eaedf00c9af22175d2a6a68b150869963f2 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 28 Feb 2026 12:48:30 -0500 Subject: [PATCH 3/4] refactor: simplify query builder with shared base class Consolidate _WhereClauseMixin and _PrefixMixin into a single _QueryBase that handles shared init, prefix rendering, WHERE delegation, copy(), and __str__(). Remove unused re import and _PREFIXED_NAME_RE. Extract _ensure_var helper in GraphPattern to deduplicate var normalization. --- rdf4j_python/query/_builder.py | 178 ++++++++++++--------------------- rdf4j_python/query/_pattern.py | 42 ++++---- rdf4j_python/query/_term.py | 22 ++-- 3 files changed, 88 insertions(+), 154 deletions(-) diff --git a/rdf4j_python/query/_builder.py b/rdf4j_python/query/_builder.py index 74b106b..dfb414f 100644 --- a/rdf4j_python/query/_builder.py +++ b/rdf4j_python/query/_builder.py @@ -11,13 +11,37 @@ from ._term import Term, serialize_term -# ── mixin for WHERE-clause delegation ──────────────────────────────── +class _QueryBase: + """Shared state and behaviour for all query builders. + Provides prefix management, WHERE-clause delegation, ``copy()``, and + ``__str__()`` so concrete builders only need to implement ``build()``. + """ -class _WhereClauseMixin: - """Methods that delegate to the internal ``_pattern: GraphPattern``.""" + def __init__(self) -> None: + self._pattern = GraphPattern() + self._prefixes: dict[str, str] = {} + + # ── prefix handling ────────────────────────────────────────────── + + def prefix(self, name_or_ns: str | Namespace, uri: str | None = None) -> Any: + """Register a prefix. + + - ``prefix("ex", "http://example.org/")`` — string pair + - ``prefix(ns)`` — extract from a ``Namespace`` object + """ + if isinstance(name_or_ns, Namespace): + self._prefixes[name_or_ns.prefix] = name_or_ns.namespace.value + else: + if uri is None: + raise ValueError("uri is required when name_or_ns is a string") + self._prefixes[name_or_ns] = uri + return self - _pattern: GraphPattern + def _render_prefixes(self) -> str: + return "\n".join(f"PREFIX {k}: <{v}>" for k, v in self._prefixes.items()) + + # ── WHERE-clause delegation ────────────────────────────────────── def where(self, s: Term, p: Term, o: Term) -> Any: self._pattern.where(s, p, o) @@ -52,45 +76,42 @@ def sub_query(self, builder: SelectQuery) -> Any: self._pattern.sub_query(builder) return self + # ── common helpers ─────────────────────────────────────────────── -# ── prefix handling mixin ──────────────────────────────────────────── - - -class _PrefixMixin: - """Shared prefix management.""" + def _render_where(self, parts: list[str]) -> None: + """Append ``WHERE { … }`` to *parts* if patterns exist.""" + if len(self._pattern) > 0: + parts.append("WHERE {") + parts.append(self._pattern.to_sparql()) + parts.append("}") - _prefixes: dict[str, str] + def _build_parts(self) -> list[str]: + """Return the prefix lines (if any) as a starting list.""" + parts: list[str] = [] + if self._prefixes: + parts.append(self._render_prefixes()) + return parts - def prefix(self, name_or_ns: str | Namespace, uri: str | None = None) -> Any: - """Register a prefix. + def copy(self): + return copy.deepcopy(self) - - ``prefix("ex", "http://example.org/")`` — string pair - - ``prefix(ns)`` — extract from a ``Namespace`` object - """ - if isinstance(name_or_ns, Namespace): - self._prefixes[name_or_ns.prefix] = name_or_ns.namespace.value - else: - if uri is None: - raise ValueError("uri is required when name_or_ns is a string") - self._prefixes[name_or_ns] = uri - return self + def build(self) -> str: + raise NotImplementedError - def _render_prefixes(self) -> str: - lines = [f"PREFIX {k}: <{v}>" for k, v in self._prefixes.items()] - return "\n".join(lines) + def __str__(self) -> str: + return self.build() -# ── SelectQuery ────────────────────────────────────────────────────── +# ── concrete builders ──────────────────────────────────────────────── -class SelectQuery(_WhereClauseMixin, _PrefixMixin): +class SelectQuery(_QueryBase): """Builder for ``SELECT`` queries.""" def __init__(self, *variables: str) -> None: + super().__init__() self._variables = list(variables) self._distinct = False - self._pattern = GraphPattern() - self._prefixes: dict[str, str] = {} self._order_by: list[str] = [] self._group_by: list[str] = [] self._having: str | None = None @@ -121,31 +142,19 @@ def offset(self, n: int) -> SelectQuery: self._offset = n return self - def copy(self) -> SelectQuery: - return copy.deepcopy(self) - def build(self) -> str: if not self._variables: raise ValueError("SELECT query requires at least one variable") if len(self._pattern) == 0: raise ValueError("SELECT query requires at least one WHERE pattern") - parts: list[str] = [] + parts = self._build_parts() - # prefixes - if self._prefixes: - parts.append(self._render_prefixes()) - - # SELECT line keyword = "SELECT DISTINCT" if self._distinct else "SELECT" parts.append(f"{keyword} {' '.join(self._variables)}") - # WHERE - parts.append("WHERE {") - parts.append(self._pattern.to_sparql()) - parts.append("}") + self._render_where(parts) - # modifiers if self._group_by: parts.append(f"GROUP BY {' '.join(self._group_by)}") if self._having: @@ -159,114 +168,57 @@ def build(self) -> str: return "\n".join(parts) - def __str__(self) -> str: - return self.build() - - -# ── AskQuery ───────────────────────────────────────────────────────── - -class AskQuery(_WhereClauseMixin, _PrefixMixin): +class AskQuery(_QueryBase): """Builder for ``ASK`` queries.""" - def __init__(self) -> None: - self._pattern = GraphPattern() - self._prefixes: dict[str, str] = {} - - def copy(self) -> AskQuery: - return copy.deepcopy(self) - def build(self) -> str: if len(self._pattern) == 0: raise ValueError("ASK query requires at least one WHERE pattern") - parts: list[str] = [] - if self._prefixes: - parts.append(self._render_prefixes()) + parts = self._build_parts() parts.append("ASK {") parts.append(self._pattern.to_sparql()) parts.append("}") return "\n".join(parts) - def __str__(self) -> str: - return self.build() - - -# ── ConstructQuery ─────────────────────────────────────────────────── - -class ConstructQuery(_WhereClauseMixin, _PrefixMixin): +class ConstructQuery(_QueryBase): """Builder for ``CONSTRUCT`` queries.""" def __init__(self, *templates: tuple[Term, Term, Term]) -> None: + super().__init__() self._templates = list(templates) - self._pattern = GraphPattern() - self._prefixes: dict[str, str] = {} - - def copy(self) -> ConstructQuery: - return copy.deepcopy(self) def build(self) -> str: if not self._templates: raise ValueError("CONSTRUCT query requires at least one template triple") - parts: list[str] = [] - if self._prefixes: - parts.append(self._render_prefixes()) + parts = self._build_parts() - # CONSTRUCT template - template_lines = [] + parts.append("CONSTRUCT {") for s, p, o in self._templates: - template_lines.append( + parts.append( f" {serialize_term(s)} {serialize_term(p)} {serialize_term(o)} ." ) - parts.append("CONSTRUCT {") - parts.extend(template_lines) parts.append("}") - # WHERE (optional for CONSTRUCT but we always emit it if patterns exist) - if len(self._pattern) > 0: - parts.append("WHERE {") - parts.append(self._pattern.to_sparql()) - parts.append("}") - + self._render_where(parts) return "\n".join(parts) - def __str__(self) -> str: - return self.build() - - -# ── DescribeQuery ──────────────────────────────────────────────────── - -class DescribeQuery(_WhereClauseMixin, _PrefixMixin): +class DescribeQuery(_QueryBase): """Builder for ``DESCRIBE`` queries.""" def __init__(self, *resources: Term) -> None: + super().__init__() self._resources = list(resources) - self._pattern = GraphPattern() - self._prefixes: dict[str, str] = {} - - def copy(self) -> DescribeQuery: - return copy.deepcopy(self) def build(self) -> str: if not self._resources: raise ValueError("DESCRIBE query requires at least one resource") - parts: list[str] = [] - if self._prefixes: - parts.append(self._render_prefixes()) - - resources_str = " ".join(serialize_term(r) for r in self._resources) - parts.append(f"DESCRIBE {resources_str}") - - if len(self._pattern) > 0: - parts.append("WHERE {") - parts.append(self._pattern.to_sparql()) - parts.append("}") - + parts = self._build_parts() + parts.append(f"DESCRIBE {' '.join(serialize_term(r) for r in self._resources)}") + self._render_where(parts) return "\n".join(parts) - - def __str__(self) -> str: - return self.build() diff --git a/rdf4j_python/query/_pattern.py b/rdf4j_python/query/_pattern.py index 6b27fdc..114ee57 100644 --- a/rdf4j_python/query/_pattern.py +++ b/rdf4j_python/query/_pattern.py @@ -11,18 +11,20 @@ from ._builder import SelectQuery +def _ensure_var(var: str) -> str: + """Prefix *var* with ``?`` if missing.""" + return var if var.startswith("?") else f"?{var}" + + class GraphPattern: - """A composable block of SPARQL graph patterns (triples, filters, etc.). + """A composable block of SPARQL graph patterns. - Used as the body of a WHERE clause and inside OPTIONAL / UNION blocks. Every mutating method returns ``self`` for fluent chaining. """ def __init__(self) -> None: self._elements: list[str] = [] - # ── triple patterns ────────────────────────────────────────────── - def where(self, s: Term, p: Term, o: Term) -> GraphPattern: """Add a triple pattern.""" self._elements.append( @@ -30,19 +32,19 @@ def where(self, s: Term, p: Term, o: Term) -> GraphPattern: ) return self - # ── FILTER ─────────────────────────────────────────────────────── - def filter(self, expr: str) -> GraphPattern: """Add a ``FILTER(expr)`` clause.""" self._elements.append(f"FILTER({expr})") return self - # ── OPTIONAL ───────────────────────────────────────────────────── - - def optional(self, s_or_pattern: Term | GraphPattern, p: Term | None = None, o: Term | None = None) -> GraphPattern: + def optional( + self, + s_or_pattern: Term | GraphPattern, + p: Term | None = None, + o: Term | None = None, + ) -> GraphPattern: """Add an ``OPTIONAL { … }`` block. - Two calling conventions: - ``optional(s, p, o)`` — single triple shorthand - ``optional(GraphPattern())`` — complex pattern block """ @@ -51,13 +53,13 @@ def optional(self, s_or_pattern: Term | GraphPattern, p: Term | None = None, o: self._elements.append(f"OPTIONAL {{\n{body}\n }}") else: if p is None or o is None: - raise ValueError("optional() requires either a GraphPattern or three term arguments (s, p, o)") + raise ValueError( + "optional() requires either a GraphPattern or three term arguments (s, p, o)" + ) triple = f"{serialize_term(s_or_pattern)} {serialize_term(p)} {serialize_term(o)} ." self._elements.append(f"OPTIONAL {{ {triple} }}") return self - # ── UNION ──────────────────────────────────────────────────────── - def union(self, *patterns: GraphPattern) -> GraphPattern: """Add ``{ … } UNION { … }`` blocks.""" if len(patterns) < 2: @@ -69,25 +71,17 @@ def union(self, *patterns: GraphPattern) -> GraphPattern: self._elements.append(" UNION ".join(parts)) return self - # ── BIND ───────────────────────────────────────────────────────── - def bind(self, expr: str, var: str) -> GraphPattern: """Add a ``BIND(expr AS ?var)`` clause.""" - v = var if var.startswith("?") else f"?{var}" - self._elements.append(f"BIND({expr} AS {v})") + self._elements.append(f"BIND({expr} AS {_ensure_var(var)})") return self - # ── VALUES ─────────────────────────────────────────────────────── - def values(self, var: str, vals: list[Any]) -> GraphPattern: """Add a ``VALUES ?var { … }`` clause.""" - v = var if var.startswith("?") else f"?{var}" serialized = " ".join(serialize_term(val) for val in vals) - self._elements.append(f"VALUES {v} {{ {serialized} }}") + self._elements.append(f"VALUES {_ensure_var(var)} {{ {serialized} }}") return self - # ── sub-query ──────────────────────────────────────────────────── - def sub_query(self, builder: SelectQuery) -> GraphPattern: """Embed a sub-SELECT inside this pattern.""" inner = builder.build() @@ -95,8 +89,6 @@ def sub_query(self, builder: SelectQuery) -> GraphPattern: self._elements.append(f"{{\n{indented}\n }}") return self - # ── rendering ──────────────────────────────────────────────────── - def to_sparql(self, indent: int = 2) -> str: """Render the pattern body (without the outer braces).""" prefix = " " * indent diff --git a/rdf4j_python/query/_term.py b/rdf4j_python/query/_term.py index ca27db5..631dd15 100644 --- a/rdf4j_python/query/_term.py +++ b/rdf4j_python/query/_term.py @@ -2,31 +2,23 @@ from __future__ import annotations -import re from typing import Union import pyoxigraph as og Term = Union[str, og.NamedNode, og.Variable, og.Literal, og.BlankNode] -_PREFIXED_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.-]*:[a-zA-Z_][a-zA-Z0-9_.-]*$") +_XSD_STRING = "http://www.w3.org/2001/XMLSchema#string" def serialize_term(term: Term) -> str: """Convert a term to its SPARQL string representation. - Handles: - - str starting with ``?`` — pass through as variable - - str ``"a"`` — pass through (rdf:type shorthand) - - str matching ``prefix:local`` — pass through - - str wrapped in ``<>`` — pass through as full IRI - - str wrapped in ``"`` or ``'`` — pass through as literal - - str that is a SPARQL expression (e.g. aggregates) — pass through - - ``IRI`` (NamedNode) — ```` - - ``Variable`` — ``?value`` - - ``Literal`` — ``"value"`` with optional ``@lang`` or ``^^`` - - ``BlankNode`` — ``_:value`` + Strings are passed through as-is (variables, prefixed names, full IRIs, etc.). + Typed objects (NamedNode, Variable, Literal, BlankNode) are formatted accordingly. """ + if isinstance(term, str): + return term if isinstance(term, og.NamedNode): return f"<{term.value}>" if isinstance(term, og.Variable): @@ -35,11 +27,9 @@ def serialize_term(term: Term) -> str: value = term.value.replace("\\", "\\\\").replace('"', '\\"') if term.language: return f'"{value}"@{term.language}' - if term.datatype and term.datatype.value != "http://www.w3.org/2001/XMLSchema#string": + if term.datatype and term.datatype.value != _XSD_STRING: return f'"{value}"^^<{term.datatype.value}>' return f'"{value}"' if isinstance(term, og.BlankNode): return f"_:{term.value}" - if isinstance(term, str): - return term raise TypeError(f"Unsupported term type: {type(term)}") From 78215929cb771cbb70f9a5678ef5288b528f5fd2 Mon Sep 17 00:00:00 2001 From: Chengxu Bian Date: Sat, 28 Feb 2026 12:49:23 -0500 Subject: [PATCH 4/4] fix: remove unused GraphPattern import in query_and_print example --- examples/query_and_print.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/query_and_print.py b/examples/query_and_print.py index d30a41d..cf2551b 100644 --- a/examples/query_and_print.py +++ b/examples/query_and_print.py @@ -12,7 +12,7 @@ from pyoxigraph import QuerySolutions, QueryTriples -from rdf4j_python import AsyncRdf4j, GraphPattern, ask, construct, select +from rdf4j_python import AsyncRdf4j, ask, construct, select from rdf4j_python.model._namespace import Namespace from rdf4j_python.model.repository_config import ( MemoryStoreConfig,