From 12bb16da8fbadac34e2de318cc79d7d765f35a96 Mon Sep 17 00:00:00 2001 From: farhan Date: Sat, 13 Dec 2025 23:33:33 +0500 Subject: [PATCH 1/3] Fixed #36293 -- Avoided buffering streaming responses in GZipMiddleware. This avoids latency and/or blocking. The example of streaming a CSV file was rewritten to employ batching for greater efficiency in all layers (db, HTTP, etc.). The improved performance from batching should outweigh the drag introduced by an additional byte for each flush. Co-authored-by: huoyinghui --- django/utils/text.py | 2 ++ docs/howto/outputting-csv.txt | 12 ++++++++++-- tests/decorators/test_gzip.py | 32 +++++++++++++++++++++++++++++++- tests/utils_tests/test_text.py | 20 +++++++++++++------- 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index cfe6ceca9e4b..d1306f9c6fed 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -382,6 +382,7 @@ def compress_sequence(sequence, *, max_random_bytes=None): yield buf.read() for item in sequence: zfile.write(item) + zfile.flush() data = buf.read() if data: yield data @@ -398,6 +399,7 @@ async def acompress_sequence(sequence, *, max_random_bytes=None): yield buf.read() async for item in sequence: zfile.write(item) + zfile.flush() data = buf.read() if data: yield data diff --git a/docs/howto/outputting-csv.txt b/docs/howto/outputting-csv.txt index c5ae7094d296..10de00503eaa 100644 --- a/docs/howto/outputting-csv.txt +++ b/docs/howto/outputting-csv.txt @@ -67,9 +67,12 @@ avoid a load balancer dropping a connection that might have otherwise timed out while the server was generating the response. In this example, we make full use of Python generators to efficiently handle -the assembly and transmission of a large CSV file:: +the assembly and transmission of a large CSV file. Rows are batched together +to reduce HTTP overhead and improve compression efficiency when used with +:class:`~django.middleware.gzip.GZipMiddleware`:: import csv + from itertools import batched from django.http import StreamingHttpResponse @@ -92,8 +95,13 @@ the assembly and transmission of a large CSV file:: rows = (["Row {}".format(idx), str(idx)] for idx in range(65536)) pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer) + + def stream_batched_rows(): + for batch in batched(rows, 100): + yield "".join(writer.writerow(row) for row in batch) + return StreamingHttpResponse( - (writer.writerow(row) for row in rows), + stream_batched_rows(), content_type="text/csv", headers={"Content-Disposition": 'attachment; filename="somefilename.csv"'}, ) diff --git a/tests/decorators/test_gzip.py b/tests/decorators/test_gzip.py index 2d64c171f70e..8cd0869b5368 100644 --- a/tests/decorators/test_gzip.py +++ b/tests/decorators/test_gzip.py @@ -1,6 +1,6 @@ from inspect import iscoroutinefunction -from django.http import HttpRequest, HttpResponse +from django.http import HttpRequest, HttpResponse, StreamingHttpResponse from django.test import SimpleTestCase from django.views.decorators.gzip import gzip_page @@ -44,3 +44,33 @@ async def async_view(request): response = await async_view(request) self.assertEqual(response.status_code, 200) self.assertEqual(response.get("Content-Encoding"), "gzip") + + def test_streaming_response_yields_chunks_incrementally(self): + @gzip_page + def stream_view(request): + return StreamingHttpResponse(self.content.encode() for _ in range(5)) + + request = HttpRequest() + request.META["HTTP_ACCEPT_ENCODING"] = "gzip" + response = stream_view(request) + compressed_chunks = list(response) + # Each input chunk should produce compressed output, not buffer + # everything into a single chunk. + self.assertGreater(len(compressed_chunks), 2) + + async def test_async_streaming_response_yields_chunks_incrementally(self): + @gzip_page + async def stream_view(request): + async def content(): + for _ in range(5): + yield self.content.encode() + + return StreamingHttpResponse(content()) + + request = HttpRequest() + request.META["HTTP_ACCEPT_ENCODING"] = "gzip" + response = await stream_view(request) + compressed_chunks = [chunk async for chunk in response] + # Each input chunk should produce compressed output, not buffer + # everything into a single chunk. + self.assertGreater(len(compressed_chunks), 2) diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py index 50e205a25449..101943957c0c 100644 --- a/tests/utils_tests/test_text.py +++ b/tests/utils_tests/test_text.py @@ -1,3 +1,4 @@ +import gzip import json import sys @@ -404,13 +405,18 @@ def test_get_valid_filename(self): text.get_valid_filename("$.$.$") def test_compress_sequence(self): - data = [{"key": i} for i in range(10)] - seq = list(json.JSONEncoder().iterencode(data)) - seq = [s.encode() for s in seq] - actual_length = len(b"".join(seq)) - out = text.compress_sequence(seq) - compressed_length = len(b"".join(out)) - self.assertLess(compressed_length, actual_length) + data = [{"key": i} for i in range(100)] + seq = [s.encode() for s in json.JSONEncoder().iterencode(data)] + original = b"".join(seq) + batch_size = 256 + batched_seq = ( + original[i : i + batch_size] for i in range(0, len(original), batch_size) + ) + compressed_chunks = list(text.compress_sequence(batched_seq)) + out = b"".join(compressed_chunks) + self.assertEqual(gzip.decompress(out), original) + self.assertLess(len(out), len(original)) + self.assertGreater(len(compressed_chunks), 2) def test_format_lazy(self): self.assertEqual("django/test", format_lazy("{}/{}", "django", lazystr("test"))) From e8ab2bb83fc6d3c0f5d998d1a41ebaebacaa1a28 Mon Sep 17 00:00:00 2001 From: Amar <100243770+aadeina@users.noreply.github.com> Date: Fri, 6 Mar 2026 06:17:30 +0000 Subject: [PATCH 2/3] Fixed #36600 -- Clarified the use cases for dispatch_uid in signal connection. Co-authored-by: Jacob Walls --- docs/topics/signals.txt | 43 ++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/docs/topics/signals.txt b/docs/topics/signals.txt index c14def278b47..568b259068ae 100644 --- a/docs/topics/signals.txt +++ b/docs/topics/signals.txt @@ -172,8 +172,8 @@ Now, our ``my_receiver`` function will be called each time a request finishes. The :meth:`~django.apps.AppConfig.ready` method may be executed more than once during testing, so you may want to :ref:`guard your signals from - duplication `, especially if you're planning - to send them within tests. + duplication ` if your receiver is a bound + method on an instance that may be recreated. .. _connecting-to-specific-signals: @@ -211,20 +211,31 @@ each particular signal. Preventing duplicate signals ---------------------------- -In some circumstances, the code connecting receivers to signals may run -multiple times. This can cause your receiver function to be registered more -than once, and thus called as many times for a signal event. For example, the -:meth:`~django.apps.AppConfig.ready` method may be executed more than once -during testing. More generally, this occurs everywhere your project imports the -module where you define the signals, because signal registration runs as many -times as it is imported. - -If this behavior is problematic (such as when using signals to -send an email whenever a model is saved), pass a unique identifier as -the ``dispatch_uid`` argument to identify your receiver function. This -identifier will usually be a string, although any hashable object will -suffice. The end result is that your receiver function will only be -bound to the signal once for each unique ``dispatch_uid`` value:: +When ``dispatch_uid`` is not provided, Django identifies each receiver using +its Python object identity and registers it only once. For module-level +functions, static methods, and class methods, the identity is stable, so +connecting the same receiver more than once has no effect:: + + def my_handler(sender, **kwargs): ... + + + my_signal.connect(my_handler) # Running this code again is a no-op. + +Bound methods, which take a ``self`` argument, are different. Their identity +is tied to the specific instance, so connecting the same method from a new +instance registers it as an additional receiver:: + + def connect_signals(): + backend = Backend() + my_signal.connect(backend.my_handler) # A distinct receiver. + + + connect_signals() # Running this code again registers another receiver. + +When using a bound method as a receiver, multiple registrations can be +prevented by supplying a unique ``dispatch_uid``. This identifier will usually +be a string, although any hashable object will suffice. The receiver will only +be bound to the signal once for each unique ``dispatch_uid`` value:: from django.core.signals import request_finished From 9c21dd91a3bc5d4278adfff98c7a5225947cd049 Mon Sep 17 00:00:00 2001 From: jun Date: Mon, 9 Mar 2026 22:03:32 +0900 Subject: [PATCH 3/3] Fixed a typo in tests/conditional_processing/tests.py. --- tests/conditional_processing/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conditional_processing/tests.py b/tests/conditional_processing/tests.py index 67007b5d66ee..6b1f4b5bddab 100644 --- a/tests/conditional_processing/tests.py +++ b/tests/conditional_processing/tests.py @@ -257,7 +257,7 @@ def test_unquoted(self): self.assertEqual(response_quoted["ETag"], response_unquoted["ETag"]) # It's possible that the matching algorithm could use the wrong value even - # if the ETag header is set correctly correctly (as tested by + # if the ETag header is set correctly (as tested by # test_unquoted()), so check that the unquoted value is matched. def test_unquoted_if_none_match(self): self.client.defaults["HTTP_IF_NONE_MATCH"] = ETAG