Merge pull request #4694 from HypothesisWorks/DRMacIver/better-reprs-for-all

Zac-HD · web-flow · commit 306a57a26555 · 2026-04-22T11:56:28.000-04:00
Various improvements in printing test cases
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,11 @@
+RELEASE_TYPE: patch
+
+This release further improves printing of generated values, building on the changes
+in  :version:`6.151.11`.
+
+Principle changes:
+
+* In many cases where we would have printed a complex expression
+  producing a value, we now print the repr (or a pretty-printed version of it).
+* Additionally, in some cases where we would print a complex expression that involved
+  a lambda, we are now able to simplify that expression into a more readable one.
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -455,6 +455,14 @@ def __next__(self):
     def __repr__(self) -> str:
         return f"iter({self._values!r})"
 
+    def _repr_pretty_(self, printer, cycle):
+        if cycle:
+            printer.text("iter(...)")
+        else:
+            printer.text("iter(")
+            printer.pretty(self._values)
+            printer.text(")")
+
 
 @defines_strategy()
 def iterables(
diff --git a/hypothesis-python/src/hypothesis/vendor/pretty.py b/hypothesis-python/src/hypothesis/vendor/pretty.py
@@ -93,15 +93,6 @@ def _repr_pretty_(self, p, cycle):
     "pretty",
 ]
 
-PRIMITIVE_TYPES_ALWAYS_USE_REPR = (
-    int,
-    float,
-    str,
-    bytes,
-    bool,
-    type(None),
-)
-
 
 def _safe_getattr(obj: object, attr: str, default: Any | None = None) -> Any:
     """Safe version of getattr.
@@ -134,6 +125,98 @@ def __eq__(self, __o: object) -> bool:
         return isinstance(__o, type(self)) and id(self.value) == id(__o.value)
 
 
+def _try_inline_lambda(
+    func_name: str,
+    args: Sequence[object],
+    kwargs: dict[str, object],
+    printer: "RepresentationPrinter",
+) -> bool:
+    """Try to inline single-use lambda arguments into the body expression.
+
+    Given e.g. func_name="lambda b: hashlib.sha256(b).hexdigest()" with
+    args=(b'',), returns the printer output for "hashlib.sha256(b'').hexdigest()"
+    by substituting the argument repr into the AST.
+
+    Returns True if inlining succeeded (the printer has been written to),
+    False if inlining is not possible (parse failure, multi-use params, etc).
+    """
+    try:
+        tree = ast.parse(func_name, mode="eval")
+    except Exception:
+        return False
+    lam = tree.body
+    if not isinstance(lam, ast.Lambda):
+        return False
+
+    # Build param name -> argument repr mapping, matching Python call semantics
+    params = lam.args
+    if params.vararg or params.kwonlyargs or params.kw_defaults or params.kwarg:
+        return False
+
+    param_names = [p.arg for p in params.args]
+    # params.defaults are right-aligned: if there are 3 params and 1 default,
+    # params.defaults applies to the last param only.
+    n_defaults = len(params.defaults)
+    has_default = (
+        set(param_names[len(param_names) - n_defaults :]) if n_defaults else set()
+    )
+
+    # Bail if there are more positional args than parameters, or if any
+    # kwarg doesn't match a parameter name — these can't be inlined.
+    if len(args) > len(param_names):
+        return False
+    if any(k not in param_names for k in kwargs):
+        return False
+
+    arg_reprs: dict[str, str] = {}
+    for i, name in enumerate(param_names):
+        if i < len(args):
+            arg_reprs[name] = pretty(args[i])
+        elif name in kwargs:
+            arg_reprs[name] = pretty(kwargs[name])
+        elif name in has_default:
+            pass  # not passed, will use its default — just skip
+        else:
+            return False
+
+    # Bail if any repr is not valid Python (e.g. "HypothesisRandom(generated data)")
+    for repr_str in arg_reprs.values():
+        try:
+            ast.parse(repr_str, mode="eval")
+        except Exception:
+            return False
+
+    use_counts = dict.fromkeys(param_names, 0)
+    for node in ast.walk(lam.body):
+        if isinstance(node, ast.Name) and node.id in use_counts:
+            use_counts[node.id] += 1
+
+    # Bail if any parameter is used more than once (avoid duplicating expressions)
+    if any(count > 1 for count in use_counts.values()):
+        return False
+
+    # Substitute argument reprs into the body AST
+    class _Inliner(ast.NodeTransformer):
+        def visit_Name(self, node: ast.Name) -> ast.AST:
+            if node.id in arg_reprs:
+                # Parse the repr as an expression and splice it in.
+                # Wrap in parens to preserve precedence in all contexts.
+                replacement = ast.parse(arg_reprs[node.id], mode="eval").body
+                return ast.copy_location(replacement, node)
+            return node
+
+    new_body = _Inliner().visit(lam.body)
+    ast.fix_missing_locations(new_body)
+
+    try:
+        result = ast.unparse(new_body)
+    except Exception:
+        return False
+
+    printer.text(result)
+    return True
+
+
 class RepresentationPrinter:
     """Special pretty printer that has a `pretty` method that calls the pretty
     printer for a python object.
@@ -428,11 +511,12 @@ def maybe_repr_known_object_as_call(
         kwargs: dict[str, object],
         arg_labels: ArgLabelsT | None = None,
     ) -> None:
-        if isinstance(obj, PRIMITIVE_TYPES_ALWAYS_USE_REPR):
-            return _repr_pprint(obj, self, cycle)
-
-        # pprint this object as a call, _unless_ the call would be invalid syntax
-        # and the repr would be valid and there are not comments on arguments.
+        # pprint this object as a call if it seems like a good idea to do so,
+        # otherwise pprint as repr.
+        # Rules:
+        # 1. If there are comments, we *must* print as a call.
+        # 2. Prefer valid syntax to invalid syntax.
+        # 3. Prefer shorter expressions.
         if cycle:
             return self.text("<...>")
         # Look up comments from slice_comments if we have arg_labels
@@ -452,6 +536,8 @@ def maybe_repr_known_object_as_call(
                 p.known_object_printers = self.known_object_printers
                 p.repr_call(name, args, kwargs)
                 # If the call is not valid syntax, use the repr
+                if len(repr(obj)) < len(p.getvalue()):
+                    return _repr_pprint(obj, self, cycle)
                 try:
                     ast.parse(p.getvalue())
                 except Exception:
@@ -479,6 +565,19 @@ def repr_call(
         """
         assert isinstance(func_name, str)
         if func_name.startswith(("lambda:", "lambda ")):
+            # Before wrapping the lambda in parens for a call, try to inline
+            # arguments that are used exactly once in the body. If all args
+            # get inlined, we can emit just the body expression with no call.
+            # Skip inlining only when there are actual comments on arguments,
+            # since comments need the call-style repr to attach to.
+            has_comments = arg_slices and any(
+                sr in self.slice_comments and sr not in self._commented_slices
+                for sr in arg_slices.values()
+            )
+            if not has_comments:
+                inlined = _try_inline_lambda(func_name, args, kwargs, self)
+                if inlined:
+                    return
             func_name = f"({func_name})"
         self.text(func_name)
         # Build list of (label, value) pairs. Labels are "arg[i]" for positional
diff --git a/hypothesis-python/tests/cover/__snapshots__/test_custom_reprs.ambr b/hypothesis-python/tests/cover/__snapshots__/test_custom_reprs.ambr
@@ -1,8 +1,15 @@
 # serializer version: 1
+# name: test_invalid_call_syntax_falls_back_to_repr
+  '''
+  Falsifying example: inner(
+      x='hello world',
+  )
+  '''
+# ---
 # name: test_map_to_bytes_prints_as_repr
   '''
   Falsifying example: inner(
-      b=b"\xe3\xb0\xc4B\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99o\xb9$'\xaeA\xe4d\x9b\x93L\xa4\x95\x99\x1bxR\xb8U",
+      b=hashlib.sha256(b'').digest(),
   )
   '''
 # ---
diff --git a/hypothesis-python/tests/cover/test_custom_reprs.py b/hypothesis-python/tests/cover/test_custom_reprs.py
@@ -192,3 +192,20 @@ def inner(b):
         raise AssertionError
 
     assert _get_output(inner) == snapshot
+
+
+def test_invalid_call_syntax_falls_back_to_repr(snapshot):
+    # When the call-style repr (e.g. "a b()") is invalid Python syntax but
+    # repr(obj) is valid, we should fall back to using repr(obj).
+    _BadName = type(
+        "a b",
+        (),
+        {"__repr__": lambda self: "'hello world'"},
+    )
+
+    @given(x=st.builds(_BadName))
+    @settings(phases=[Phase.generate, Phase.shrink], print_blob=False)
+    def inner(x):
+        raise AssertionError
+
+    assert _get_output(inner) == snapshot
diff --git a/hypothesis-python/tests/cover/test_lambda_inlining.py b/hypothesis-python/tests/cover/test_lambda_inlining.py
diff --git a/hypothesis-python/tests/snapshots/__snapshots__/test_always_failing.ambr b/hypothesis-python/tests/snapshots/__snapshots__/test_always_failing.ambr