diff --git a/django/utils/text.py b/django/utils/text.py index cfe6ceca9e4b..d1306f9c6fed 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -382,6 +382,7 @@ def compress_sequence(sequence, *, max_random_bytes=None): yield buf.read() for item in sequence: zfile.write(item) + zfile.flush() data = buf.read() if data: yield data @@ -398,6 +399,7 @@ async def acompress_sequence(sequence, *, max_random_bytes=None): yield buf.read() async for item in sequence: zfile.write(item) + zfile.flush() data = buf.read() if data: yield data diff --git a/docs/howto/outputting-csv.txt b/docs/howto/outputting-csv.txt index c5ae7094d296..10de00503eaa 100644 --- a/docs/howto/outputting-csv.txt +++ b/docs/howto/outputting-csv.txt @@ -67,9 +67,12 @@ avoid a load balancer dropping a connection that might have otherwise timed out while the server was generating the response. In this example, we make full use of Python generators to efficiently handle -the assembly and transmission of a large CSV file:: +the assembly and transmission of a large CSV file. Rows are batched together +to reduce HTTP overhead and improve compression efficiency when used with +:class:`~django.middleware.gzip.GZipMiddleware`:: import csv + from itertools import batched from django.http import StreamingHttpResponse @@ -92,8 +95,13 @@ the assembly and transmission of a large CSV file:: rows = (["Row {}".format(idx), str(idx)] for idx in range(65536)) pseudo_buffer = Echo() writer = csv.writer(pseudo_buffer) + + def stream_batched_rows(): + for batch in batched(rows, 100): + yield "".join(writer.writerow(row) for row in batch) + return StreamingHttpResponse( - (writer.writerow(row) for row in rows), + stream_batched_rows(), content_type="text/csv", headers={"Content-Disposition": 'attachment; filename="somefilename.csv"'}, ) diff --git a/docs/topics/signals.txt b/docs/topics/signals.txt index c14def278b47..568b259068ae 100644 --- a/docs/topics/signals.txt +++ b/docs/topics/signals.txt @@ -172,8 +172,8 @@ Now, our ``my_receiver`` function will be called each time a request finishes. The :meth:`~django.apps.AppConfig.ready` method may be executed more than once during testing, so you may want to :ref:`guard your signals from - duplication `, especially if you're planning - to send them within tests. + duplication ` if your receiver is a bound + method on an instance that may be recreated. .. _connecting-to-specific-signals: @@ -211,20 +211,31 @@ each particular signal. Preventing duplicate signals ---------------------------- -In some circumstances, the code connecting receivers to signals may run -multiple times. This can cause your receiver function to be registered more -than once, and thus called as many times for a signal event. For example, the -:meth:`~django.apps.AppConfig.ready` method may be executed more than once -during testing. More generally, this occurs everywhere your project imports the -module where you define the signals, because signal registration runs as many -times as it is imported. - -If this behavior is problematic (such as when using signals to -send an email whenever a model is saved), pass a unique identifier as -the ``dispatch_uid`` argument to identify your receiver function. This -identifier will usually be a string, although any hashable object will -suffice. The end result is that your receiver function will only be -bound to the signal once for each unique ``dispatch_uid`` value:: +When ``dispatch_uid`` is not provided, Django identifies each receiver using +its Python object identity and registers it only once. For module-level +functions, static methods, and class methods, the identity is stable, so +connecting the same receiver more than once has no effect:: + + def my_handler(sender, **kwargs): ... + + + my_signal.connect(my_handler) # Running this code again is a no-op. + +Bound methods, which take a ``self`` argument, are different. Their identity +is tied to the specific instance, so connecting the same method from a new +instance registers it as an additional receiver:: + + def connect_signals(): + backend = Backend() + my_signal.connect(backend.my_handler) # A distinct receiver. + + + connect_signals() # Running this code again registers another receiver. + +When using a bound method as a receiver, multiple registrations can be +prevented by supplying a unique ``dispatch_uid``. This identifier will usually +be a string, although any hashable object will suffice. The receiver will only +be bound to the signal once for each unique ``dispatch_uid`` value:: from django.core.signals import request_finished diff --git a/tests/conditional_processing/tests.py b/tests/conditional_processing/tests.py index 67007b5d66ee..6b1f4b5bddab 100644 --- a/tests/conditional_processing/tests.py +++ b/tests/conditional_processing/tests.py @@ -257,7 +257,7 @@ def test_unquoted(self): self.assertEqual(response_quoted["ETag"], response_unquoted["ETag"]) # It's possible that the matching algorithm could use the wrong value even - # if the ETag header is set correctly correctly (as tested by + # if the ETag header is set correctly (as tested by # test_unquoted()), so check that the unquoted value is matched. def test_unquoted_if_none_match(self): self.client.defaults["HTTP_IF_NONE_MATCH"] = ETAG diff --git a/tests/decorators/test_gzip.py b/tests/decorators/test_gzip.py index 2d64c171f70e..8cd0869b5368 100644 --- a/tests/decorators/test_gzip.py +++ b/tests/decorators/test_gzip.py @@ -1,6 +1,6 @@ from inspect import iscoroutinefunction -from django.http import HttpRequest, HttpResponse +from django.http import HttpRequest, HttpResponse, StreamingHttpResponse from django.test import SimpleTestCase from django.views.decorators.gzip import gzip_page @@ -44,3 +44,33 @@ async def async_view(request): response = await async_view(request) self.assertEqual(response.status_code, 200) self.assertEqual(response.get("Content-Encoding"), "gzip") + + def test_streaming_response_yields_chunks_incrementally(self): + @gzip_page + def stream_view(request): + return StreamingHttpResponse(self.content.encode() for _ in range(5)) + + request = HttpRequest() + request.META["HTTP_ACCEPT_ENCODING"] = "gzip" + response = stream_view(request) + compressed_chunks = list(response) + # Each input chunk should produce compressed output, not buffer + # everything into a single chunk. + self.assertGreater(len(compressed_chunks), 2) + + async def test_async_streaming_response_yields_chunks_incrementally(self): + @gzip_page + async def stream_view(request): + async def content(): + for _ in range(5): + yield self.content.encode() + + return StreamingHttpResponse(content()) + + request = HttpRequest() + request.META["HTTP_ACCEPT_ENCODING"] = "gzip" + response = await stream_view(request) + compressed_chunks = [chunk async for chunk in response] + # Each input chunk should produce compressed output, not buffer + # everything into a single chunk. + self.assertGreater(len(compressed_chunks), 2) diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py index 50e205a25449..101943957c0c 100644 --- a/tests/utils_tests/test_text.py +++ b/tests/utils_tests/test_text.py @@ -1,3 +1,4 @@ +import gzip import json import sys @@ -404,13 +405,18 @@ def test_get_valid_filename(self): text.get_valid_filename("$.$.$") def test_compress_sequence(self): - data = [{"key": i} for i in range(10)] - seq = list(json.JSONEncoder().iterencode(data)) - seq = [s.encode() for s in seq] - actual_length = len(b"".join(seq)) - out = text.compress_sequence(seq) - compressed_length = len(b"".join(out)) - self.assertLess(compressed_length, actual_length) + data = [{"key": i} for i in range(100)] + seq = [s.encode() for s in json.JSONEncoder().iterencode(data)] + original = b"".join(seq) + batch_size = 256 + batched_seq = ( + original[i : i + batch_size] for i in range(0, len(original), batch_size) + ) + compressed_chunks = list(text.compress_sequence(batched_seq)) + out = b"".join(compressed_chunks) + self.assertEqual(gzip.decompress(out), original) + self.assertLess(len(out), len(original)) + self.assertGreater(len(compressed_chunks), 2) def test_format_lazy(self): self.assertEqual("django/test", format_lazy("{}/{}", "django", lazystr("test")))