From f9baebb0db4da315a3ddc647338889d70c52256b Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:33:58 +0200
Subject: [PATCH 1/6] [perf] Make ``PrettyPrinter`` format lazily so output can
 be budget-capped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_format`` and the per-type helpers now ``yield`` their output as a
stream of string chunks instead of writing to a file-like object, and
``pformat`` joins them. On top of that, ``pformat_lines`` pulls from the
formatter only until a budget is reached:

    pformat_lines(obj, max_lines=None, max_chars=None)

It stops on the first chunk that reaches *either* budget, so a huge
collection costs O(budget) rather than O(N). Either dimension may be
``None`` (unbounded); with both ``None`` the whole object is formatted.

Motivation
----------
Assertion diffs are truncated to a handful of lines/chars before being
shown. Formatting the whole of a large ``==`` comparison and then
throwing almost all of it away is pure waste. With a lazy formatter the
truncating caller simply stops pulling once it has enough.

Benchmark (``PrettyPrinter`` alone, width 80)::

    list(range(500_000)):
        pformat().splitlines()        ~805 ms
        pformat_lines(max_lines=11)   ~0.027 ms      (~30000x)

    [8 small ints] (common small diff):
        pformat().splitlines()        ~0.0133 ms
        pformat_lines(max_lines=11)   ~0.0185 ms     (+~5 us)

    ["x"*100_000] * 3 (flat, few huge elements):
        pformat_lines(max_chars=640)  stops after ~100_000 chars
                                      (one element) instead of 300_000

Why a lazy generator rather than a fast path + budget stream
------------------------------------------------------------
An earlier approach kept a cheap ``pformat().splitlines()`` fast path
guarded by ``len(obj) <= max_lines`` plus a flatness check, falling back
to a write-intercepting budget-stream class for the rest. Two problems:

* ``len(obj)`` is only a *lower* bound on the line count — one nested
  element (``[{...50 keys...}]``) expands to many lines — so the guard
  needed the flatness scan to stay correct, and even then it bounded
  only *lines*, never *chars*: a flat container of a few enormous
  strings has almost no lines but blows the char budget.
* it was two code paths plus a stream class plus an exception used for
  control flow.

Because the formatter is lazy, "stop pulling at the budget" is the whole
optimisation: correct regardless of how lines/chars are distributed
across elements, bounding both dimensions, with no ``len()`` proxy to
get wrong and no fast/slow branch. The common small-diff case costs only
~5 us more than the unbounded path (it is never the bottleneck — a
failing assertion isn't hot), while large comparisons drop by orders of
magnitude.

``_pprint_set``/``_pprint_dict`` also try a plain ``sorted`` first and
fall back to the ``_safe_key`` wrapper only for unorderable mixes.

This diverges structurally from the upstream cpython ``pprint`` it was
vendored from; the module header notes it is no longer kept in sync.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 335 ++++++++++++++++++++------------------
 testing/io/test_pprint.py |  84 ++++++++++
 2 files changed, 262 insertions(+), 157 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index ec41b449ddf..06caf436e60 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -3,6 +3,14 @@
 # (https://github.com/python/cpython/) at commit
 # c5140945c723ae6c4b7ee81ff720ac8ea4b52cfd (python3.12).
 #
+# It has since been adapted to emit its output lazily as a stream of
+# string chunks (``_format`` and the per-type helpers are generators)
+# rather than writing to a file-like object. This lets ``pformat_lines``
+# stop formatting as soon as a line/char budget is reached, so a huge
+# collection a caller is going to truncate anyway is never fully built.
+# As a result this copy has diverged structurally from upstream and is
+# no longer kept in sync with it.
+#
 #
 #  Original Author:      Fred L. Drake, Jr.
 #                        fdrake@acm.org
@@ -17,13 +25,12 @@
 
 import collections as _collections
 from collections.abc import Callable
+from collections.abc import Iterable
 from collections.abc import Iterator
 import dataclasses as _dataclasses
-from io import StringIO as _StringIO
 import re
 import types as _types
 from typing import Any
-from typing import IO
 
 
 class _safe_key:
@@ -87,28 +94,62 @@ def __init__(
         self._width = width
 
     def pformat(self, object: Any) -> str:
-        sio = _StringIO()
-        self._format(object, sio, 0, 0, set(), 0)
-        return sio.getvalue()
+        return "".join(self._format(object, 0, 0, set(), 0))
+
+    def pformat_lines(
+        self,
+        object: Any,
+        max_lines: int | None = None,
+        max_chars: int | None = None,
+    ) -> list[str]:
+        """Pretty-print ``object`` and return its lines.
+
+        ``_format`` yields the output as a stream of chunks, so this can
+        stop pulling from it as soon as a budget is reached — useful when
+        a downstream truncator is going to drop everything past that
+        budget anyway.
+
+        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        independently; either may be ``None`` to leave that dimension
+        unbounded. With both ``None`` the whole object is formatted. The
+        budget is a stopping condition, not a precise cut: formatting
+        stops on the first chunk that reaches it, so the result may
+        slightly overshoot (the caller truncates to the exact limit).
+        """
+        if max_lines is None and max_chars is None:
+            return self.pformat(object).splitlines()
+        n_lines = 0
+        n_chars = 0
+        chunks: list[str] = []
+        for chunk in self._format(object, 0, 0, set(), 0):
+            chunks.append(chunk)
+            if max_chars is not None:
+                n_chars += len(chunk)
+            if max_lines is not None:
+                n_lines += chunk.count("\n")
+            if (max_lines is not None and n_lines >= max_lines) or (
+                max_chars is not None and n_chars >= max_chars
+            ):
+                break
+        return "".join(chunks).splitlines()
 
     def _format(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         objid = id(object)
         if objid in context:
-            stream.write(_recursion(object))
+            yield _recursion(object)
             return
 
         p = self._dispatch.get(type(object).__repr__, None)
         if p is not None:
             context.add(objid)
-            p(self, object, stream, indent, allowance, context, level + 1)
+            yield from p(self, object, indent, allowance, context, level + 1)
             context.remove(objid)
         elif (
             _dataclasses.is_dataclass(object)
@@ -120,125 +161,126 @@ def _format(
             and "__create_fn__" in object.__repr__.__wrapped__.__qualname__
         ):
             context.add(objid)
-            self._pprint_dataclass(
-                object, stream, indent, allowance, context, level + 1
+            yield from self._pprint_dataclass(
+                object, indent, allowance, context, level + 1
             )
             context.remove(objid)
         else:
-            stream.write(self._repr(object, context, level))
+            yield self._repr(object, context, level)
 
     def _pprint_dataclass(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         cls_name = object.__class__.__name__
         items = [
             (f.name, getattr(object, f.name))
             for f in _dataclasses.fields(object)
             if f.repr
         ]
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch: dict[
         Callable[..., str],
-        Callable[[PrettyPrinter, Any, IO[str], int, int, set[int], int], None],
+        Callable[[PrettyPrinter, Any, int, int, set[int], int], Iterator[str]],
     ] = {}
 
     def _pprint_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("{")
-        items = object.items()
-        self._format_dict_items(items, stream, indent, allowance, context, level)
-        write("}")
+    ) -> Iterator[str]:
+        yield "{"
+        yield from self._format_dict_items(
+            object.items(), indent, allowance, context, level
+        )
+        yield "}"
 
     _dispatch[dict.__repr__] = _pprint_dict
 
     def _pprint_ordered_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         cls = object.__class__
-        stream.write(cls.__name__ + "(")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls.__name__ + "("
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict
 
     def _pprint_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("[")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write("]")
+    ) -> Iterator[str]:
+        yield "["
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield "]"
 
     _dispatch[list.__repr__] = _pprint_list
 
     def _pprint_tuple(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("(")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "("
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[tuple.__repr__] = _pprint_tuple
 
     def _pprint_set(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         typ = object.__class__
         if typ is set:
-            stream.write("{")
+            yield "{"
             endchar = "}"
         else:
-            stream.write(typ.__name__ + "({")
+            yield typ.__name__ + "({"
             endchar = "})"
-        object = sorted(object, key=_safe_key)
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(endchar)
+        try:
+            object = sorted(object)
+        except TypeError:
+            # Heterogeneous element types — fall back to a key that
+            # tolerates unorderable pairs by string-comparing their types.
+            object = sorted(object, key=_safe_key)
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield endchar
 
     _dispatch[set.__repr__] = _pprint_set
     _dispatch[frozenset.__repr__] = _pprint_set
@@ -246,15 +288,13 @@ def _pprint_set(
     def _pprint_str(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if not len(object):
-            write(repr(object))
+            yield repr(object)
             return
         chunks = []
         lines = object.splitlines(True)
@@ -289,90 +329,84 @@ def _pprint_str(
                 if current:
                     chunks.append(repr(current))
         if len(chunks) == 1:
-            write(rep)
+            yield rep
             return
         if level == 1:
-            write("(")
+            yield "("
         for i, rep in enumerate(chunks):
             if i > 0:
-                write("\n" + " " * indent)
-            write(rep)
+                yield "\n" + " " * indent
+            yield rep
         if level == 1:
-            write(")")
+            yield ")"
 
     _dispatch[str.__repr__] = _pprint_str
 
     def _pprint_bytes(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if len(object) <= 4:
-            write(repr(object))
+            yield repr(object)
             return
         parens = level == 1
         if parens:
             indent += 1
             allowance += 1
-            write("(")
+            yield "("
         delim = ""
         for rep in _wrap_bytes_repr(object, self._width - indent, allowance):
-            write(delim)
-            write(rep)
+            yield delim
+            yield rep
             if not delim:
                 delim = "\n" + " " * indent
         if parens:
-            write(")")
+            yield ")"
 
     _dispatch[bytes.__repr__] = _pprint_bytes
 
     def _pprint_bytearray(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("bytearray(")
-        self._pprint_bytes(
-            bytes(object), stream, indent + 10, allowance + 1, context, level + 1
+    ) -> Iterator[str]:
+        yield "bytearray("
+        yield from self._pprint_bytes(
+            bytes(object), indent + 10, allowance + 1, context, level + 1
         )
-        write(")")
+        yield ")"
 
     _dispatch[bytearray.__repr__] = _pprint_bytearray
 
     def _pprint_mappingproxy(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("mappingproxy(")
-        self._format(object.copy(), stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "mappingproxy("
+        yield from self._format(object.copy(), indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy
 
     def _pprint_simplenamespace(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if type(object) is _types.SimpleNamespace:
             # The SimpleNamespace repr is "namespace" instead of the class
             # name, so we do the same here. For subclasses; use the class name.
@@ -380,95 +414,89 @@ def _pprint_simplenamespace(
         else:
             cls_name = object.__class__.__name__
         items = object.__dict__.items()
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch[_types.SimpleNamespace.__repr__] = _pprint_simplenamespace
 
     def _format_dict_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(self._repr(key, context, level))
-            write(": ")
-            self._format(ent, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield self._repr(key, context, level)
+            yield ": "
+            yield from self._format(ent, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_namespace_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(key)
-            write("=")
+            emitted = True
+            yield delimnl
+            yield key
+            yield "="
             if id(ent) in context:
                 # Special-case representation of recursion to match standard
                 # recursive dataclass repr.
-                write("...")
+                yield "..."
             else:
-                self._format(
+                yield from self._format(
                     ent,
-                    stream,
                     item_indent + len(key) + 1,
                     1,
                     context,
                     level,
                 )
 
-            write(",")
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_items(
         self,
-        items: list[Any],
-        stream: IO[str],
+        items: Iterable[Any],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
-
+        emitted = False
         for item in items:
-            write(delimnl)
-            self._format(item, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield from self._format(item, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _repr(self, object: Any, context: set[int], level: int) -> str:
         return self._safe_repr(object, context.copy(), self._depth, level)
@@ -476,114 +504,107 @@ def _repr(self, object: Any, context: set[int], level: int) -> str:
     def _pprint_default_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         rdf = self._repr(object.default_factory, context, level)
-        stream.write(f"{object.__class__.__name__}({rdf}, ")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield f"{object.__class__.__name__}({rdf}, "
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict
 
     def _pprint_counter(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
 
         if object:
-            stream.write("{")
+            yield "{"
             items = object.most_common()
-            self._format_dict_items(items, stream, indent, allowance, context, level)
-            stream.write("}")
+            yield from self._format_dict_items(items, indent, allowance, context, level)
+            yield "}"
 
-        stream.write(")")
+        yield ")"
 
     _dispatch[_collections.Counter.__repr__] = _pprint_counter
 
     def _pprint_chain_map(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object.maps) or (len(object.maps) == 1 and not len(object.maps[0])):
-            stream.write(repr(object))
+            yield repr(object)
             return
 
-        stream.write(object.__class__.__name__ + "(")
-        self._format_items(object.maps, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield object.__class__.__name__ + "("
+        yield from self._format_items(object.maps, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map
 
     def _pprint_deque(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
         if object.maxlen is not None:
-            stream.write(f"maxlen={object.maxlen}, ")
-        stream.write("[")
+            yield f"maxlen={object.maxlen}, "
+        yield "["
 
-        self._format_items(object, stream, indent, allowance + 1, context, level)
-        stream.write("])")
+        yield from self._format_items(object, indent, allowance + 1, context, level)
+        yield "])"
 
     _dispatch[_collections.deque.__repr__] = _pprint_deque
 
     def _pprint_user_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict
 
     def _pprint_user_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserList.__repr__] = _pprint_user_list
 
     def _pprint_user_string(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserString.__repr__] = _pprint_user_string
 
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 1326ef34b2e..2c08734cf46 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -406,3 +406,87 @@ class DataclassWithTwoItems:
 )
 def test_consistent_pretty_printer(data: Any, expected: str) -> None:
     assert PrettyPrinter().pformat(data) == textwrap.dedent(expected).strip()
+
+
+class TestPformatLines:
+    """``pformat_lines`` returns the pretty-printed lines, pulling from
+    the lazy formatter only until a line/char budget is reached so an
+    input a downstream truncator will clip anyway is never fully built.
+    """
+
+    def test_no_budget_matches_pformat_splitlines(self) -> None:
+        pp = PrettyPrinter()
+        data = list(range(50))
+        assert pp.pformat_lines(data) == pp.pformat(data).splitlines()
+
+    def test_under_budget_is_complete_and_a_prefix(self) -> None:
+        # When the whole thing fits, the result is the full pformat,
+        # regardless of how the budget was reached.
+        pp = PrettyPrinter()
+        data = list(range(5))
+        full = pp.pformat(data).splitlines()
+        assert pp.pformat_lines(data, max_lines=11) == full
+        assert pp.pformat_lines(data, max_chars=10_000) == full
+
+    def test_line_budget_stops_early(self) -> None:
+        pp = PrettyPrinter()
+        # 50 scalars, one per line, budget well below 50.
+        full = pp.pformat(list(range(50))).splitlines()
+        lines = pp.pformat_lines(list(range(50)), max_lines=11)
+        assert len(lines) <= 11 + 1  # budget, plus a trailing partial line
+        # everything but the last line (which may stop mid-line) is a
+        # prefix of the full output
+        assert lines[:-1] == full[: len(lines) - 1]
+
+    def test_char_budget_stops_early(self) -> None:
+        # A *flat* container of huge strings has few lines but explodes on
+        # chars; a line-only budget wouldn't stop it. The char budget must.
+        pp = PrettyPrinter()
+        data = ["x" * 100_000, "y" * 100_000, "z" * 100_000]
+        lines = pp.pformat_lines(data, max_chars=640)
+        assert sum(len(line) for line in lines) < 200_000  # bailed, didn't format all 3
+
+    def test_nested_element_respects_line_budget(self) -> None:
+        # ``len(object)`` is only a *lower* bound on the line count: a
+        # single nested element expands to many lines. The lazy pull must
+        # stop regardless of the container's element count.
+        pp = PrettyPrinter()
+        for data in ([{i: "x" * 40 for i in range(50)}], {1: list(range(100))}):
+            lines = pp.pformat_lines(data, max_lines=11)
+            assert len(lines) <= 11 + 1
+
+    def test_nested_dataclass_element_respects_line_budget(self) -> None:
+        @dataclass
+        class Many:
+            a: int
+            b: int
+            c: int
+            d: int
+            e: int
+            f: int
+            g: int
+            h: int
+
+        pp = PrettyPrinter()
+        lines = pp.pformat_lines([Many(*range(8))], max_lines=4)
+        assert len(lines) <= 4 + 1
+        assert len(lines) < len(pp.pformat([Many(*range(8))]).splitlines())
+
+    def test_sized_non_iterable_does_not_raise(self) -> None:
+        class Sized:
+            def __len__(self) -> int:
+                return 3
+
+        pp = PrettyPrinter()
+        obj = Sized()
+        assert pp.pformat_lines(obj, max_lines=5) == pp.pformat(obj).splitlines()
+
+
+def test_pformat_sorts_heterogeneous_set() -> None:
+    # The set sort tries a natural sort first and falls back to a key
+    # that compares the element types' names only for unorderable
+    # mixes; both must succeed.
+    pp = PrettyPrinter()
+    assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
+    # Mixed unorderable types must not raise.
+    pp.pformat({1, "a", 2, "b"})

From b541e20cb2897694b033175ad58b0ed2f7cebdf2 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:34:16 +0200
Subject: [PATCH 2/6] [perf] Skip the newline count on chunks without a newline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ``pformat_lines``'s budget loop, ``chunk.count("\n")`` ran on every
chunk, but most chunks (brackets, indentation, item reprs) contain no
newline. Guarding the call with ``"\n" in chunk`` skips it on those and
recovers part of the per-chunk budget-tracking overhead: formatting an
8-element list under a budget drops from ~0.0185 ms to ~0.0163 ms
(versus ~0.0132 ms for an uncapped ``pformat().splitlines()``, so the
budget overhead roughly halves, from ~+5 us to ~+3 us).

The win is small and only matters on the ``-v`` truncating path of a
failing assertion (the default path doesn't format the diff at all), so
this is kept as a separate commit — easy to drop if the extra branch
isn't judged worth it.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index 06caf436e60..d9fd6955032 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -125,7 +125,10 @@ def pformat_lines(
             chunks.append(chunk)
             if max_chars is not None:
                 n_chars += len(chunk)
-            if max_lines is not None:
+            if max_lines is not None and "\n" in chunk:
+                # Guard the count: most chunks (brackets, indents, item
+                # reprs) have no newline, and skipping the call on them
+                # is meaningfully cheaper than counting every chunk.
                 n_lines += chunk.count("\n")
             if (max_lines is not None and n_lines >= max_lines) or (
                 max_chars is not None and n_chars >= max_chars

From 77343a81f7a53fb4369c24c0e728473db3ae6135 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sun, 14 Jun 2026 11:00:40 +0200
Subject: [PATCH 3/6] [perf] pprint: apply review feedback on ``pformat_lines``

Addresses review on #14588:

* make ``max_lines`` / ``max_chars`` keyword-only so they can't be
  confused at the call site.
* drop the implementation detail (``_format``) and the "what the caller
  does" note from the docstring; describe the behaviour instead.
* comment the set-sort fast path ("try a direct sort first, faster than
  the fallback").
* assert the heterogeneous-set output in the test rather than only
  checking it does not raise.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 21 +++++++++++----------
 testing/io/test_pprint.py |  5 +++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index d9fd6955032..2685d838b68 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -99,22 +99,21 @@ def pformat(self, object: Any) -> str:
     def pformat_lines(
         self,
         object: Any,
+        *,
         max_lines: int | None = None,
         max_chars: int | None = None,
     ) -> list[str]:
         """Pretty-print ``object`` and return its lines.
 
-        ``_format`` yields the output as a stream of chunks, so this can
-        stop pulling from it as soon as a budget is reached — useful when
-        a downstream truncator is going to drop everything past that
-        budget anyway.
-
-        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        ``max_lines`` / ``max_chars`` bound the two output dimensions
         independently; either may be ``None`` to leave that dimension
-        unbounded. With both ``None`` the whole object is formatted. The
-        budget is a stopping condition, not a precise cut: formatting
-        stops on the first chunk that reaches it, so the result may
-        slightly overshoot (the caller truncates to the exact limit).
+        unbounded, and with both ``None`` the whole object is formatted.
+        When a bound is given the object is only formatted far enough to
+        reach it, so a huge object costs O(budget) rather than O(N).
+
+        The budget is a stopping condition, not a precise cut: formatting
+        stops on the first piece of output that reaches it, so the result
+        may slightly overshoot the bound.
         """
         if max_lines is None and max_chars is None:
             return self.pformat(object).splitlines()
@@ -277,6 +276,8 @@ def _pprint_set(
             yield typ.__name__ + "({"
             endchar = "})"
         try:
+            # Try a direct sort first; it is faster than the fallback and
+            # works for the common homogeneous, orderable case.
             object = sorted(object)
         except TypeError:
             # Heterogeneous element types — fall back to a key that
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 2c08734cf46..805809b3778 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -488,5 +488,6 @@ def test_pformat_sorts_heterogeneous_set() -> None:
     # mixes; both must succeed.
     pp = PrettyPrinter()
     assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
-    # Mixed unorderable types must not raise.
-    pp.pformat({1, "a", 2, "b"})
+    # Mixed unorderable types must not raise; the fallback orders by type
+    # name (ints before strs), then by value.
+    assert pp.pformat({1, "a", 2, "b"}) == "{\n    1,\n    2,\n    'a',\n    'b',\n}"

From 5d94ad3439b34e4aa96f99632f77fe7959efa713 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Mon, 15 Jun 2026 12:09:32 +0200
Subject: [PATCH 4/6] [perf] test: mark Sized.__len__ as no cover

The body exists only to make the test type ``Sized``; the lazy budget
code never calls ``len`` on a non-dispatched object, so the line is
intentionally unreachable.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 testing/io/test_pprint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 805809b3778..f25f703c7cd 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -475,7 +475,7 @@ class Many:
     def test_sized_non_iterable_does_not_raise(self) -> None:
         class Sized:
             def __len__(self) -> int:
-                return 3
+                return 3  # pragma: no cover - exists only to make the type Sized
 
         pp = PrettyPrinter()
         obj = Sized()

From e4478724b243f3ae9fa8cb575e24c9daa9b909d4 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Mon, 15 Jun 2026 12:13:52 +0200
Subject: [PATCH 5/6] [perf] test: cover vendored pprint
 UserDict/UserList/UserString, frozenset, bytes/str wrap

The vendored ``pprint`` copy shipped without upstream's test suite, so
several per-type helpers had no coverage. Add ``pformat`` cases for
``UserDict``/``UserList``/``UserString``, ``frozenset`` (empty and the
frozenset-prefix branch of ``_pprint_set``), short and line-wrapped
``bytes``/``bytearray``, and the multi-line / wrapped ``str`` paths.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py |   2 +-
 testing/io/test_pprint.py | 220 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 1 deletion(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index 2685d838b68..a550b65b0c8 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -330,7 +330,7 @@ def _pprint_str(
                         current = part
                     else:
                         current = candidate
-                if current:
+                if current:  # pragma: no branch - a wrapped line always ends with a pending chunk
                     chunks.append(repr(current))
         if len(chunks) == 1:
             yield rep
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index f25f703c7cd..28bcf193ee1 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -5,12 +5,17 @@
 from collections import defaultdict
 from collections import deque
 from collections import OrderedDict
+from collections import UserDict
+from collections import UserList
+from collections import UserString
 from dataclasses import dataclass
 import textwrap
 from types import MappingProxyType
 from types import SimpleNamespace
 from typing import Any
 
+from _pytest._io.pprint import _safe_tuple
+from _pytest._io.pprint import _wrap_bytes_repr
 from _pytest._io.pprint import PrettyPrinter
 import pytest
 
@@ -329,6 +334,112 @@ class DataclassWithTwoItems:
             """,
             id="deque-maxlen",
         ),
+        pytest.param(frozenset(), "frozenset()", id="frozenset-empty"),
+        pytest.param(
+            frozenset({1, 2, 3}),
+            """
+            frozenset({
+                1,
+                2,
+                3,
+            })
+            """,
+            id="frozenset-items",
+        ),
+        pytest.param(UserDict(), "{}", id="userdict-empty"),
+        pytest.param(
+            UserDict({"one": 1, "two": 2}),
+            """
+            {
+                'one': 1,
+                'two': 2,
+            }
+            """,
+            id="userdict-items",
+        ),
+        pytest.param(UserList(), "[]", id="userlist-empty"),
+        pytest.param(
+            UserList([1, 2]),
+            """
+            [
+                1,
+                2,
+            ]
+            """,
+            id="userlist-items",
+        ),
+        pytest.param(UserString("hello world"), "'hello world'", id="userstring"),
+        pytest.param(b"short", "(b'short')", id="bytes-short"),
+        pytest.param(
+            b"x" * 100,
+            "(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n"
+            " b'xxxxxxxxxxxxxxxxxxxxxxxx')",
+            id="bytes-long",
+        ),
+        pytest.param(
+            # Length not a multiple of 4 so the final 4-byte group lands
+            # exactly on ``last`` and exercises the allowance trim.
+            b"z" * 102,
+            "(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'\n"
+            " b'zzzzzzzzzzzzzzzzzzzzzzzzzz')",
+            id="bytes-long-unaligned",
+        ),
+        pytest.param(bytearray(b"short"), "bytearray(b'short')", id="bytearray-short"),
+        pytest.param(
+            bytearray(b"y" * 100),
+            "bytearray(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'\n"
+            "          b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')",
+            id="bytearray-long",
+        ),
+        pytest.param(
+            "word " * 30,
+            "('word word word word word word word word word word word word word word word '\n"
+            " 'word word word word word word word word word word word word word word word ')",
+            id="str-long-wrap",
+        ),
+        pytest.param(
+            "line1\nline2\nline3",
+            "('line1\\n'\n 'line2\\n'\n 'line3')",
+            id="str-multiline",
+        ),
+        pytest.param("", "''", id="str-empty"),
+        pytest.param("hello", "'hello'", id="str-single-chunk"),
+        pytest.param(
+            ["word " * 30],
+            "[\n"
+            "    'word word word word word word word word word word word word word word '\n"
+            "    'word word word word word word word word word word word word word word '\n"
+            "    'word word ',\n"
+            "]",
+            id="str-nested-wrap",
+        ),
+        pytest.param(b"abc", "b'abc'", id="bytes-le-4"),
+        pytest.param(
+            "word " * 30 + "\nshort",
+            "('word word word word word word word word word word word word word word word '\n"
+            " 'word word word word word word word word word word word word word word word \\n'\n"
+            " 'short')",
+            id="str-wrap-then-line",
+        ),
+        pytest.param({(): 0}, "{\n    (): 0,\n}", id="dict-empty-tuple-key"),
+        pytest.param(
+            {(1, 2): 0},
+            """
+            {
+                (1, 2): 0,
+            }
+            """,
+            id="dict-tuple-key",
+        ),
+        pytest.param(
+            {(1,): 0},
+            """
+            {
+                (1,): 0,
+            }
+            """,
+            id="dict-singleton-tuple-key",
+        ),
         pytest.param(
             {
                 "chainmap": ChainMap({"one": 1}, {"two": 2}),
@@ -491,3 +602,112 @@ def test_pformat_sorts_heterogeneous_set() -> None:
     # Mixed unorderable types must not raise; the fallback orders by type
     # name (ints before strs), then by value.
     assert pp.pformat({1, "a", 2, "b"}) == "{\n    1,\n    2,\n    'a',\n    'b',\n}"
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        pytest.param({"indent": -1}, id="indent-negative"),
+        pytest.param({"depth": 0}, id="depth-zero"),
+        pytest.param({"width": 0}, id="width-zero"),
+    ],
+)
+def test_invalid_constructor_args_raise(kwargs: dict[str, int]) -> None:
+    with pytest.raises(ValueError):
+        PrettyPrinter(**kwargs)
+
+
+def test_recursive_list_shows_recursion_marker() -> None:
+    pp = PrettyPrinter()
+    a: list[Any] = [1]
+    a.append(a)
+    out = pp.pformat(a)
+    assert f"<Recursion on list with id={id(a)}>" in out
+
+
+def test_recursive_namespace_shows_ellipsis() -> None:
+    # A self-referential namespace must render the cycle as ``...`` rather
+    # than recursing forever.
+    ns = SimpleNamespace(x=1)
+    ns.self = ns
+    out = PrettyPrinter().pformat(ns)
+    assert "self=..." in out
+
+
+def test_depth_limit_truncates_nested_container() -> None:
+    # ``depth`` caps nesting in the ``_safe_repr`` fallback: containers
+    # past the limit collapse to ``...``.
+    pp = PrettyPrinter(depth=1)
+    assert pp.pformat({((1, 2),): 0}) == "{\n    (...,): 0,\n}"
+
+
+def test_simplenamespace_subclass_uses_class_name() -> None:
+    # Plain ``SimpleNamespace`` prints as ``namespace(...)``; a subclass
+    # uses its own class name instead.
+    class MyNamespace(SimpleNamespace):
+        pass
+
+    pp = PrettyPrinter()
+    assert pp.pformat(MyNamespace(one=1)) == "MyNamespace(\n    one=1,\n)"
+
+
+def test_safe_tuple_sorts_unorderable_pairs() -> None:
+    # ``_safe_tuple`` wraps each element of a 2-tuple in ``_safe_key`` so a
+    # list of pairs with unorderable elements can be sorted without raising.
+    pairs = [(2, "b"), (1, "a"), ("z", 3)]
+    assert sorted(pairs, key=_safe_tuple)  # does not raise
+
+
+class _HashableDict(dict):
+    # ``dict`` subclasses that are hashable can be used as dict keys, which
+    # is the only way the ``_safe_repr`` ``dict`` branch is reached.
+    def __hash__(self) -> int:
+        return id(self)
+
+
+class _HashableList(list):
+    # Likewise for ``list`` and the ``_safe_repr`` ``list`` branch.
+    def __hash__(self) -> int:
+        return id(self)
+
+
+@pytest.mark.parametrize(
+    ("key", "expected"),
+    [
+        pytest.param(_HashableDict(), "{\n    {}: 0,\n}", id="empty-dict-key"),
+        pytest.param(
+            _HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"
+        ),
+        pytest.param(_HashableList(), "{\n    []: 0,\n}", id="empty-list-key"),
+        pytest.param(_HashableList([1, 2]), "{\n    [1, 2]: 0,\n}", id="list-key"),
+    ],
+)
+def test_hashable_container_subclass_as_key(key: Any, expected: str) -> None:
+    # A hashable ``dict``/``list`` subclass key is rendered via the
+    # ``_safe_repr`` fallback rather than a per-type dispatcher.
+    assert PrettyPrinter().pformat({key: 0}) == expected
+
+
+def test_safe_repr_depth_limit_on_dict_key() -> None:
+    pp = PrettyPrinter(depth=1)
+    assert pp.pformat({_HashableDict({"a": 1}): 0}) == "{\n    {...}: 0,\n}"
+
+
+def test_safe_repr_recursion_marker() -> None:
+    # Self-referential containers reached through ``_safe_repr`` (as dict
+    # keys) must terminate with a recursion marker, for both the ``dict``
+    # branch and the ``tuple``/``list`` branch.
+    hd = _HashableDict()
+    hd["self"] = hd
+    assert "<Recursion on _HashableDict" in PrettyPrinter().pformat({hd: 0})
+
+    hl = _HashableList()
+    hl.append(hl)
+    assert "<Recursion on _HashableList" in PrettyPrinter().pformat({(hl,): 0})
+
+
+def test_wrap_bytes_repr_edges() -> None:
+    # Empty input yields nothing; a width too small for a group still
+    # emits each group rather than dropping bytes.
+    assert list(_wrap_bytes_repr(b"", 80, 0)) == []
+    assert list(_wrap_bytes_repr(b"abcdefgh", 6, 0)) == ["b'abcd'", "b'efgh'"]

From ba2520b6712f9298cd21614e7cba6d650b6b132b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Jun 2026 11:18:59 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 testing/io/test_pprint.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 28bcf193ee1..9ad408d286c 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -675,9 +675,7 @@ def __hash__(self) -> int:
     ("key", "expected"),
     [
         pytest.param(_HashableDict(), "{\n    {}: 0,\n}", id="empty-dict-key"),
-        pytest.param(
-            _HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"
-        ),
+        pytest.param(_HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"),
         pytest.param(_HashableList(), "{\n    []: 0,\n}", id="empty-list-key"),
         pytest.param(_HashableList([1, 2]), "{\n    [1, 2]: 0,\n}", id="list-key"),
     ],