Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
RELEASE_TYPE: patch

This release improves shrinking for a very specific category of generator:
If you have a primitive strategy such as :func:`~hypothesis.strategies.text()`
and write ``my_primitive_strategy | some_more_complicated_strategy``, values
produced by the second strategy can now be shrunk as if they had come
from the first strategy.
105 changes: 86 additions & 19 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ def structural_coverage(label: int) -> StructuralCoverageTag:
# performance. We lose scan resistance, but that's probably fine here.
POOLED_CONSTRAINTS_CACHE: LRUCache[tuple[Any, ...], ChoiceConstraintsT] = LRUCache(4096)

# The Python types corresponding to our choice-node primitive types. A strategy
# whose generated value has one of these exact types gets recorded on its span
# as the generated primitive value. We use a tuple rather than a set because
# ``in`` on a tuple relies only on equality, so it still works if the generated
# value's class has an unhashable metaclass.
_PRIMITIVE_CHOICE_TYPES: tuple[type, ...] = (int, bool, str, bytes, float)


class Span:
"""A span tracks the hierarchical structure of choices within a single test run.
Expand Down Expand Up @@ -239,6 +246,15 @@ def children(self) -> "list[Span]":
order."""
return [self.owner[i] for i in self.owner.children[self.index]]

@property
def generated_primitive_value(self) -> Any:
"""The primitive value the corresponding strategy produced, or
``None`` if there is none. Spans produced by a strategy generating a
primitive value (one of the types we have a choice node for -
``int``, ``bool``, ``str``, ``bytes``, or ``float``) record the
generated value here."""
return self.owner.generated_primitive_values.get(self.index)


class SpanProperty:
"""There are many properties of spans that we calculate by
Expand Down Expand Up @@ -321,6 +337,9 @@ def __init__(self) -> None:
self.__index_of_labels: dict[int, int] | None = {}
self.trail = IntList()
self.nodes: list[ChoiceNode] = []
self.generated_primitive_values: dict[int, Any] = {}
self.__open_spans: list[int] = []
self.__span_count = 0

def freeze(self) -> None:
self.__index_of_labels = None
Expand All @@ -336,12 +355,24 @@ def start_span(self, label: int) -> None:
i = self.__index_of_labels.setdefault(label, len(self.labels))
self.labels.append(label)
self.trail.append(TrailType.CHOICE + 1 + i)
self.__open_spans.append(self.__span_count)
self.__span_count += 1

def stop_span(self, *, discard: bool) -> None:
if discard:
self.trail.append(TrailType.STOP_SPAN_DISCARD)
else:
self.trail.append(TrailType.STOP_SPAN_NO_DISCARD)
self.__open_spans.pop()

def record_value_for_span(self, value: Any) -> None:
# Record ``value`` against the most recently started span, but only if
# it is one of the primitive choice-node types. Called by
# ConjectureData.draw to capture the value a strategy produced.
if type(value) not in _PRIMITIVE_CHOICE_TYPES:
return
assert self.__open_spans, "Cannot record a value without an open span"
self.generated_primitive_values[self.__open_spans[-1]] = value


class _starts_and_ends(SpanProperty):
Expand Down Expand Up @@ -442,6 +473,9 @@ class Spans:
def __init__(self, record: SpanRecord) -> None:
self.trail = record.trail
self.labels = record.labels
self.generated_primitive_values: dict[int, Any] = (
record.generated_primitive_values
)
self.__length = self.trail.count(
TrailType.STOP_SPAN_DISCARD
) + record.trail.count(TrailType.STOP_SPAN_NO_DISCARD)
Expand Down Expand Up @@ -997,6 +1031,33 @@ def draw_boolean(
constraints: BooleanConstraints = self._pooled_constraints("boolean", {"p": p})
return self._draw("boolean", constraints, observe=observe, forced=forced)

def add_choice_node_for(self, value: Any) -> None:
"""Record ``value`` in the choice sequence as a forced choice.

Strategies like :func:`just` and :func:`sampled_from` produce a value
without consulting the choice sequence. This method places a forced
choice so that the span is non-empty and visible to span-level
machinery (the recorded generated primitive value on the span,
shrinking widenings).

For primitive values, the forced choice is of the corresponding type.
For non-primitive values it is a forced boolean ``False`` - the
simplest choice we can add, so the span doesn't look any more
complex than an empty one would.
"""
if type(value) is bool:
self.draw_boolean(forced=value)
elif type(value) is int:
self.draw_integer(forced=value)
elif type(value) is float:
self.draw_float(forced=value)
elif type(value) is str:
self.draw_string(IntervalSet.from_string(value), forced=value)
elif type(value) is bytes:
self.draw_bytes(max_size=len(value), forced=value)
else:
self.draw_boolean(forced=False)

@overload
def _pooled_constraints(
self, choice_type: Literal["integer"], constraints: IntegerConstraints
Expand Down Expand Up @@ -1202,26 +1263,32 @@ def draw(
self.start_span(label=label)
try:
if not at_top_level:
return unwrapped.do_draw(self)
assert start_time is not None
key = observe_as or f"generate:unlabeled_{len(self.draw_times)}"
try:
v = unwrapped.do_draw(self)
else:
assert start_time is not None
key = observe_as or f"generate:unlabeled_{len(self.draw_times)}"
try:
v = unwrapped.do_draw(self)
finally:
# Subtract the time spent in GC to avoid overcounting, as it is
# accounted for at the overall example level.
in_gctime = gc_cumulative_time() - gc_start_time
self.draw_times[key] = time.perf_counter() - start_time - in_gctime
except Exception as err:
add_note(
err,
f"while generating {key.removeprefix('generate:')!r} from {strategy!r}",
)
raise
if observability_enabled():
avoid = self.provider.avoid_realization
self._observability_args[key] = to_jsonable(v, avoid_realization=avoid)
try:
v = unwrapped.do_draw(self)
finally:
# Subtract the time spent in GC to avoid overcounting, as
# it is accounted for at the overall example level.
in_gctime = gc_cumulative_time() - gc_start_time
self.draw_times[key] = (
time.perf_counter() - start_time - in_gctime
)
except Exception as err:
add_note(
err,
f"while generating {key.removeprefix('generate:')!r} from {strategy!r}",
)
raise
if observability_enabled():
avoid = self.provider.avoid_realization
self._observability_args[key] = to_jsonable(
v, avoid_realization=avoid
)
self.__span_record.record_value_for_span(v)
return v
finally:
self.stop_span()
Expand Down
109 changes: 108 additions & 1 deletion hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
prefix_selection_order,
random_selection_order,
)
from hypothesis.internal.floats import MAX_PRECISE_INTEGER
from hypothesis.internal.floats import MAX_PRECISE_INTEGER, SMALLEST_SUBNORMAL
from hypothesis.internal.intervalsets import IntervalSet

if TYPE_CHECKING:
from random import Random
Expand Down Expand Up @@ -89,6 +90,60 @@ def sort_key(nodes: Sequence[ChoiceNode]) -> tuple[int, tuple[int, ...]]:
)


def _choice_node_for_value(value: ChoiceT) -> ChoiceNode:
"""Return a ``ChoiceNode`` wrapping a primitive value, with permissive
constraints that accept the value. Used by
``widen_to_span_with_generated_primitive_value`` to synthesise a single
choice from a span's recorded generated primitive value."""
if type(value) is bool:
return ChoiceNode(
type="boolean", value=value, constraints={"p": 0.5}, was_forced=False
)
if type(value) is int:
return ChoiceNode(
type="integer",
value=value,
constraints={
"min_value": None,
"max_value": None,
"weights": None,
"shrink_towards": 0,
},
was_forced=False,
)
if type(value) is float:
return ChoiceNode(
type="float",
value=value,
constraints={
"min_value": -math.inf,
"max_value": math.inf,
"allow_nan": True,
"smallest_nonzero_magnitude": SMALLEST_SUBNORMAL,
},
was_forced=False,
)
if type(value) is str:
return ChoiceNode(
type="string",
value=value,
constraints={
"intervals": IntervalSet.from_string(value),
"min_size": 0,
"max_size": len(value),
},
was_forced=False,
)
if type(value) is bytes:
return ChoiceNode(
type="bytes",
value=value,
constraints={"min_size": 0, "max_size": len(value)},
was_forced=False,
)
raise AssertionError(f"non-primitive value {value!r} of type {type(value)}")


@dataclass(slots=True, frozen=False)
class ShrinkPass:
function: Any
Expand Down Expand Up @@ -321,6 +376,7 @@ def __init__(
ShrinkPass(self.redistribute_numeric_pairs),
ShrinkPass(self.lower_integers_together),
ShrinkPass(self.lower_duplicated_characters),
ShrinkPass(self.widen_to_span_with_generated_primitive_value),
]

# Because the shrinker is also used to `pareto_optimise` in the target phase,
Expand Down Expand Up @@ -501,6 +557,12 @@ def explain(self) -> None:
):
continue

# Skip slices with no non-forced nodes - there's nothing we can
# vary, so the "or any other generated value" note would be
# misleading (the value is in fact fully determined).
if all(nodes[i].was_forced for i in range(start, end)):
continue # pragma: no cover # only reachable via explain phase

# Run our experiments
n_same_failures = 0
note = "or any other generated value"
Expand Down Expand Up @@ -1508,6 +1570,51 @@ def copy_node(node, n):
),
)

def widen_to_span_with_generated_primitive_value(self, chooser):
"""Try to navigate away from a specific ``one_of`` alternative into
an earlier one by using the span's recorded generated primitive value.

If we have an integer choice with ``min_value == 0`` currently set to
a non-zero value, and it is immediately followed by a span whose
corresponding strategy produced a primitive value, we replace the
integer with ``0`` and the span's choices with a single choice
holding that primitive value. The engine then re-runs the test
against the earlier alternative with that value.

This is useful for ``basic_strategy | specific_strategy``, where
the specific branch produced a primitive that the basic branch could
also have produced: we slip the primitive across into the basic
branch so that normal shrinking can take it the rest of the way.
Comment on lines +1574 to +1587
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels uncomfortably brittle; it's obviously useful if the strategy is exactly like this, but there are some very sharp edges in how it works. (for example: you're just out of luck if you wrote branches in any other order)

Plausibly still worth doing, but it weighs against the PR for me.

"""
node = chooser.choose(
self.nodes,
lambda n: (
n.type == "integer"
and not n.was_forced
and n.constraints["min_value"] == 0
and n.value != 0
),
)

following = node.index + 1
if following >= len(self.spans_starting_at):
return

candidate_spans = self.spans_starting_at[following]
span_idx = chooser.choose(
candidate_spans,
lambda i: self.spans[i].generated_primitive_value is not None,
)
span = self.spans[span_idx]
replacement = _choice_node_for_value(span.generated_primitive_value)

self.consider_new_nodes(
self.nodes[: node.index]
+ (node.copy(with_value=0),)
+ (replacement,)
+ self.nodes[span.end :]
)

def minimize_nodes(self, nodes):
choice_type = nodes[0].type
value = nodes[0].value
Expand Down
9 changes: 2 additions & 7 deletions hypothesis-python/src/hypothesis/strategies/_internal/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,6 @@ class JustStrategy(SampledFromStrategy[Ex]):

It's implemented as a length-one SampledFromStrategy so that all our
special-case logic for filtering and sets applies also to just(x).

The important difference from a SampledFromStrategy with only one
element to choose is that JustStrategy *never* touches the underlying
choice sequence, i.e. drawing neither reads from nor writes to `data`.
This is a reasonably important optimisation (or semantic distinction!)
for both JustStrategy and SampledFromStrategy.
"""

@property
Expand All @@ -60,7 +54,8 @@ def calc_is_cacheable(self, recur: RecurT) -> bool:
def do_filtered_draw(self, data: ConjectureData) -> Ex | UniqueIdentifier:
# The parent class's `do_draw` implementation delegates directly to
# `do_filtered_draw`, which we can greatly simplify in this case since
# we have exactly one value. (This also avoids drawing any data.)
# we have exactly one value. The parent's ``do_draw`` will record the
# resulting value with ``data.add_choice_node_for``.
return self._transform(self.value)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def calc_label(self) -> int:
# The worst case performance of this scheme is
# itertools.chain(range(2**100), [st.none()]), where it degrades to
# hashing every int in the range.
(elements_is_hashable, hash_value) = _is_hashable(self.elements)
elements_is_hashable, hash_value = _is_hashable(self.elements)
if isinstance(self.elements, range) or (
elements_is_hashable
and not any(isinstance(e, SearchStrategy) for e in self.elements)
Expand Down Expand Up @@ -719,6 +719,10 @@ def do_draw(self, data: ConjectureData) -> Ex:
if result is filter_not_satisfied:
data.mark_invalid(f"Aborted test because unable to satisfy {self!r}")
assert not isinstance(result, UniqueIdentifier)
# Record the generated value as a forced choice so the span is
# non-empty and (for primitive values) annotated with the generated
# value for the shrinker's widening pass.
data.add_choice_node_for(result)
return result

def get_element(self, i: int) -> Ex | UniqueIdentifier:
Expand Down Expand Up @@ -843,12 +847,30 @@ def calc_label(self) -> int:
)

def do_draw(self, data: ConjectureData) -> Ex:
strategy = data.draw(
SampledFromStrategy(self.element_strategies).filter(
lambda s: not s.is_currently_empty(data)
strategies = self.element_strategies
n = len(strategies)
# Draw an index to pick an alternative, retrying a few times if the
# chosen strategy is currently empty (e.g. an empty Bundle in stateful
# testing).
for attempt in range(3):
i = data.draw_integer(0, n - 1)
if not strategies[i].is_currently_empty(data):
return data.draw(strategies[i])
if attempt == 0:
data.events[
f"Retried draw from {self!r} to avoid empty alternative"
] = ""

# If the retries all landed on empty alternatives, fall back to
# exhaustively picking a non-empty one.
allowed = [i for i in range(n) if not strategies[i].is_currently_empty(data)]
if not allowed:
data.mark_invalid(
f"Aborted test because all alternatives of {self!r} were empty"
)
)
return data.draw(strategy)
i = data.choice(allowed)
data.draw_integer(0, n - 1, forced=i)
return data.draw(strategies[i])

def __repr__(self) -> str:
return "one_of({})".format(", ".join(map(repr, self.original_strategies)))
Expand Down
Loading
Loading