Skip to content

Commit

Permalink
Add initial coarse reduction pass for reducing alternatives
Browse files Browse the repository at this point in the history
This adds an initial phase to shrinking that is allowed to make
changes that would be bad to make as part of the main shrink pass,
with the main goal of producing better results for ``one_of``.
  • Loading branch information
DRMacIver committed Dec 28, 2024
1 parent e6f4519 commit 9d15a6d
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 7 deletions.
4 changes: 4 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RELEASE_TYPE: patch

This release further improves shrinking of strategies using :func:`~hypothesis.strategies.one_of`,
allowing the shrinker to more reliably move between branches of the strategy.
113 changes: 113 additions & 0 deletions hypothesis-python/src/hypothesis/internal/conjecture/shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ def shrink(self):
self.incorporate_test_data(data.as_result())

try:
self.initial_coarse_reduction()
self.greedy_shrink()
except StopShrinking:
# If we stopped shrinking because we're making slow progress (instead of
Expand Down Expand Up @@ -704,6 +705,118 @@ def greedy_shrink(self):
]
)

def initial_coarse_reduction(self):
"""Performs some preliminary reductions that should not be
repeated as part of the main shrink passes.
The main reason why these can't be included as part of shrink
passes is that they have much more ability to make the test
case "worse". e.g. they might rerandomise part of it, significantly
increasing the value of individual nodes, which works in direct
opposition to the lexical shrinking and will frequently undo
its work.
"""
self.reduce_each_alternative()

@derived_value
def examples_starting_at(self):
result = [[] for _ in self.shrink_target.ir_nodes]
for i, ex in enumerate(self.examples):
# We can have zero-length examples that start at the end
if ex.ir_start < len(result):
result[ex.ir_start].append(i)
return tuple(map(tuple, result))

def reduce_each_alternative(self):
"""This is a pass that is designed to rerandomise use of the
one_of strategy or things that look like it, in order to try
to move from later strategies to earlier ones in the branch
order.
It does this by trying to systematically lower each value it
finds that looks like it might be the branch decision for
one_of, and then attempts to repair any changes in shape that
this causes.
"""
i = 0
while i < len(self.shrink_target.ir_nodes):
nodes = self.shrink_target.ir_nodes
node = nodes[i]
if node.ir_type == "integer" and not node.was_forced and node.value <= 10:
assert isinstance(node.value, int)

# We've found a plausible candidate for a ``one_of`` choice.
# We now want to see if the shape of the test case actually depends
# on it. If it doesn't, then we don't need to do this (comparatively
# costly) pass, and can let much simpler lexicographic reduction
# handle it later.
#
# We test this by trying to set the value to zero and seeing if the
# shape changes, as measured by either changing the number of subsequent
# nodes, or changing the nodes in such a way as to cause one of the
# previous values to no longer be valid in its position.
zero_attempt = self.cached_test_function_ir(
nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :]
)
if (
zero_attempt is not self.shrink_target
and zero_attempt is not None
and zero_attempt.status >= Status.VALID
):
changed_shape = len(zero_attempt.ir_nodes) != len(nodes)

if not changed_shape:
for j in range(i + 1, len(nodes)):
zero_node = zero_attempt.ir_nodes[j]
orig_node = nodes[j]
if (
zero_node.ir_type != orig_node.ir_type
or not ir_value_permitted(
orig_node.value, zero_node.ir_type, zero_node.kwargs
)
):
changed_shape = True
break
if changed_shape:
for v in range(node.value):
if self.try_lower_node_as_alternative(i, v):
break
i += 1

def try_lower_node_as_alternative(self, i, v):
"""Attempt to lower `self.shrink_target.ir_nodes[i]` to `v`,
while rerandomising and attempting to repair any subsequent
changes to the shape of the test case that this causes."""
nodes = self.shrink_target.ir_nodes
initial_attempt = self.cached_test_function_ir(
nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :]
)
if initial_attempt is self.shrink_target:
return True

prefix = nodes[:i] + (nodes[i].copy(with_value=v),)
initial = self.shrink_target
examples = self.examples_starting_at[i]
for _ in range(3):
random_attempt = self.engine.cached_test_function_ir(
prefix, extend=len(nodes) * 2
)
if random_attempt.status < Status.VALID:
continue
self.incorporate_test_data(random_attempt)
for j in examples:
initial_ex = initial.examples[j]
attempt_ex = random_attempt.examples[j]
contents = random_attempt.ir_nodes[
attempt_ex.ir_start : attempt_ex.ir_end
]
self.consider_new_tree(
nodes[:i] + contents + nodes[initial_ex.ir_end :]
)
if initial is not self.shrink_target:
return True
return False

@derived_value # type: ignore
def shrink_pass_choice_trees(self):
return defaultdict(ChoiceTree)
Expand Down
1 change: 1 addition & 0 deletions hypothesis-python/tests/conjecture/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def generate_new_examples(self):
runner.run()
(last_data,) = runner.interesting_examples.values()
assert last_data.status == Status.INTERESTING
assert runner.exit_reason == ExitReason.max_shrinks
assert runner.shrinks == n
in_db = set(db.data[runner.secondary_key])
assert len(in_db) == n
Expand Down
27 changes: 27 additions & 0 deletions hypothesis-python/tests/conjecture/test_shrinker.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,3 +518,30 @@ def shrinker(data: ConjectureData):
# shrinking. Since the second draw is forced, this isn't possible to shrink
# with just this pass.
assert shrinker.choices == (15, 10)


def test_alternative_shrinking_will_lower_to_alternate_value():
# We want to reject the first integer value we see when shrinking
# this alternative, because it will be the result of transmuting the
# bytes value, and we want to ensure that we can find other values
# there when we detect the shape change.
seen_int = None

@shrinking_from(ir(1, b"hello world"))
def shrinker(data: ConjectureData):
nonlocal seen_int
i = data.draw_integer(min_value=0, max_value=1)
if i == 1:
if data.draw_bytes():
data.mark_interesting()
else:
n = data.draw_integer(0, 100)
if n == 0:
return
if seen_int is None:
seen_int = n
elif n != seen_int:
data.mark_interesting()

shrinker.initial_coarse_reduction()
assert shrinker.choices[0] == 0
48 changes: 41 additions & 7 deletions hypothesis-python/tests/nocover/test_precise_shrinking.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,12 @@ def test_function(data):


@lru_cache
def minimal_for_strategy(s):
return precisely_shrink(s, end_marker=st.none())


def minimal_buffer_for_strategy(s):
return precisely_shrink(s, end_marker=st.none())[0].buffer
return minimal_for_strategy(s)[0].buffer


def test_strategy_list_is_in_sorted_order():
Expand Down Expand Up @@ -274,12 +278,11 @@ def shortlex(s):
result_list = []

for k, v in sorted(results.items(), key=lambda x: shortlex(x[0])):
if shortlex(k) < shortlex(buffer):
t = repr(v)
if t in seen:
continue
seen.add(t)
result_list.append((k, v))
t = repr(v)
if t in seen:
continue
seen.add(t)
result_list.append((k, v))
return result_list


Expand All @@ -296,3 +299,34 @@ def test_always_shrinks_to_none(a, seed, block_falsey, allow_sloppy):
combined_strategy, result.buffer, allow_sloppy=allow_sloppy, seed=seed
)
assert shrunk_values[0][1] is None


@pytest.mark.parametrize(
"i,alts", [(i, alt) for alt in alternatives for i in range(1, len(alt))]
)
@pytest.mark.parametrize("force_small", [False, True])
@pytest.mark.parametrize("seed", [0, 2452, 99085240570])
def test_can_shrink_to_every_smaller_alternative(i, alts, seed, force_small):
types = [t for t, _ in alts]
strats = [s for _, s in alts]
combined_strategy = st.one_of(*strats)
if force_small:
result, value = precisely_shrink(
combined_strategy, is_interesting=lambda x: type(x) is types[i], seed=seed
)
else:
result, value = find_random(
combined_strategy, lambda x: type(x) is types[i], seed=seed
)

shrunk = shrinks(
combined_strategy,
result.buffer,
allow_sloppy=False,
# Arbitrary change so we don't use the same seed for each Random.
seed=seed * 17,
)
shrunk_values = [t for _, t in shrunk]

for j in range(i):
assert any(isinstance(x, types[j]) for x in shrunk_values)

0 comments on commit 9d15a6d

Please sign in to comment.