From 285d6e85b43b4a79e6ee21f280ec444e0e1cfb05 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 21 Nov 2024 09:09:11 -0600 Subject: [PATCH 01/25] Increment version to 3.11.8.dev0 (#10021) --- aiohttp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 8c5b96c99de..838c31a5fcd 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.11.7" +__version__ = "3.11.8.dev0" from typing import TYPE_CHECKING, Tuple From c8426a8900a1ac13b7f4adbb7de7982018ea6348 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:40:44 +0000 Subject: [PATCH 02/25] [PR #10018/e79b2d5d backport][3.11] Add url dispatcher benchmark for resolving root route for github simulated routes tree (#10023) Co-authored-by: Andrew Svetlov --- tests/test_benchmarks_web_urldispatcher.py | 78 +++++++++++++++------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/tests/test_benchmarks_web_urldispatcher.py b/tests/test_benchmarks_web_urldispatcher.py index 5d151d984af..936ed6320ed 100644 --- a/tests/test_benchmarks_web_urldispatcher.py +++ b/tests/test_benchmarks_web_urldispatcher.py @@ -6,9 +6,10 @@ import random import string from pathlib import Path -from typing import NoReturn, Optional +from typing import NoReturn, Optional, cast from unittest import mock +import pytest from multidict import CIMultiDict, CIMultiDictProxy from pytest_codspeed import BenchmarkFixture from yarl import URL @@ -18,6 +19,20 @@ from aiohttp.http import HttpVersion, RawRequestMessage +@pytest.fixture +def github_urls() -> list[str]: + """GitHub api urls.""" + # The fixture provides OpenAPI generated info for github. + # To update the local data file please run the following command: + # $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json + + here = Path(__file__).parent + with (here / "github-urls.json").open() as f: + urls = json.load(f) + + return cast(list[str], urls) + + def _mock_request(method: str, path: str) -> web.Request: message = RawRequestMessage( method, @@ -366,23 +381,15 @@ def _run() -> None: def test_resolve_gitapi( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, + github_urls: list[str], ) -> None: - """Resolve DynamicResource for simulated github API. - - The benchmark uses OpenAPI generated info for github. - To update the local data file please run the following command: - $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json - """ + """Resolve DynamicResource for simulated github API.""" async def handler(request: web.Request) -> NoReturn: assert False - here = Path(__file__).parent - with (here / "github-urls.json").open() as f: - urls = json.load(f) - app = web.Application() - for url in urls: + for url in github_urls: app.router.add_get(url, handler) app.freeze() router = app.router @@ -425,21 +432,13 @@ def _run() -> None: def test_resolve_gitapi_subapps( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, + github_urls: list[str], ) -> None: - """Resolve DynamicResource for simulated github API, grouped in subapps. - - The benchmark uses OpenAPI generated info for github. - To update the local data file please run the following command: - $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json - """ + """Resolve DynamicResource for simulated github API, grouped in subapps.""" async def handler(request: web.Request) -> NoReturn: assert False - here = Path(__file__).parent - with (here / "github-urls.json").open() as f: - urls = json.load(f) - subapps = { "gists": web.Application(), "orgs": web.Application(), @@ -451,7 +450,7 @@ async def handler(request: web.Request) -> NoReturn: } app = web.Application() - for url in urls: + for url in github_urls: parts = url.split("/") subapp = subapps.get(parts[1]) if subapp is not None: @@ -501,6 +500,39 @@ def _run() -> None: loop.run_until_complete(run_url_dispatcher_benchmark()) +def test_resolve_gitapi_root( + loop: asyncio.AbstractEventLoop, + benchmark: BenchmarkFixture, + github_urls: list[str], +) -> None: + """Resolve the plain root for simulated github API.""" + + async def handler(request: web.Request) -> NoReturn: + assert False + + app = web.Application() + for url in github_urls: + app.router.add_get(url, handler) + app.freeze() + router = app.router + + request = _mock_request(method="GET", path="/") + + async def run_url_dispatcher_benchmark() -> Optional[web.UrlMappingMatchInfo]: + ret = None + for i in range(250): + ret = await router.resolve(request) + return ret + + ret = loop.run_until_complete(run_url_dispatcher_benchmark()) + assert ret is not None + assert ret.get_info()["path"] == "/", ret.get_info() + + @benchmark + def _run() -> None: + loop.run_until_complete(run_url_dispatcher_benchmark()) + + def test_resolve_prefix_resources_many_prefix_many_plain( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, From 53992e924bf805677f559cb736d6aed58210ed4c Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:54:07 +0000 Subject: [PATCH 03/25] [PR #10018/e79b2d5d backport][3.12] Add url dispatcher benchmark for resolving root route for github simulated routes tree (#10024) Co-authored-by: Andrew Svetlov --- tests/test_benchmarks_web_urldispatcher.py | 78 +++++++++++++++------- 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/tests/test_benchmarks_web_urldispatcher.py b/tests/test_benchmarks_web_urldispatcher.py index 5d151d984af..936ed6320ed 100644 --- a/tests/test_benchmarks_web_urldispatcher.py +++ b/tests/test_benchmarks_web_urldispatcher.py @@ -6,9 +6,10 @@ import random import string from pathlib import Path -from typing import NoReturn, Optional +from typing import NoReturn, Optional, cast from unittest import mock +import pytest from multidict import CIMultiDict, CIMultiDictProxy from pytest_codspeed import BenchmarkFixture from yarl import URL @@ -18,6 +19,20 @@ from aiohttp.http import HttpVersion, RawRequestMessage +@pytest.fixture +def github_urls() -> list[str]: + """GitHub api urls.""" + # The fixture provides OpenAPI generated info for github. + # To update the local data file please run the following command: + # $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json + + here = Path(__file__).parent + with (here / "github-urls.json").open() as f: + urls = json.load(f) + + return cast(list[str], urls) + + def _mock_request(method: str, path: str) -> web.Request: message = RawRequestMessage( method, @@ -366,23 +381,15 @@ def _run() -> None: def test_resolve_gitapi( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, + github_urls: list[str], ) -> None: - """Resolve DynamicResource for simulated github API. - - The benchmark uses OpenAPI generated info for github. - To update the local data file please run the following command: - $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json - """ + """Resolve DynamicResource for simulated github API.""" async def handler(request: web.Request) -> NoReturn: assert False - here = Path(__file__).parent - with (here / "github-urls.json").open() as f: - urls = json.load(f) - app = web.Application() - for url in urls: + for url in github_urls: app.router.add_get(url, handler) app.freeze() router = app.router @@ -425,21 +432,13 @@ def _run() -> None: def test_resolve_gitapi_subapps( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, + github_urls: list[str], ) -> None: - """Resolve DynamicResource for simulated github API, grouped in subapps. - - The benchmark uses OpenAPI generated info for github. - To update the local data file please run the following command: - $ curl https://raw.githubusercontent.com/github/rest-api-description/refs/heads/main/descriptions/api.github.com/api.github.com.json | jq ".paths | keys" > github-urls.json - """ + """Resolve DynamicResource for simulated github API, grouped in subapps.""" async def handler(request: web.Request) -> NoReturn: assert False - here = Path(__file__).parent - with (here / "github-urls.json").open() as f: - urls = json.load(f) - subapps = { "gists": web.Application(), "orgs": web.Application(), @@ -451,7 +450,7 @@ async def handler(request: web.Request) -> NoReturn: } app = web.Application() - for url in urls: + for url in github_urls: parts = url.split("/") subapp = subapps.get(parts[1]) if subapp is not None: @@ -501,6 +500,39 @@ def _run() -> None: loop.run_until_complete(run_url_dispatcher_benchmark()) +def test_resolve_gitapi_root( + loop: asyncio.AbstractEventLoop, + benchmark: BenchmarkFixture, + github_urls: list[str], +) -> None: + """Resolve the plain root for simulated github API.""" + + async def handler(request: web.Request) -> NoReturn: + assert False + + app = web.Application() + for url in github_urls: + app.router.add_get(url, handler) + app.freeze() + router = app.router + + request = _mock_request(method="GET", path="/") + + async def run_url_dispatcher_benchmark() -> Optional[web.UrlMappingMatchInfo]: + ret = None + for i in range(250): + ret = await router.resolve(request) + return ret + + ret = loop.run_until_complete(run_url_dispatcher_benchmark()) + assert ret is not None + assert ret.get_info()["path"] == "/", ret.get_info() + + @benchmark + def _run() -> None: + loop.run_until_complete(run_url_dispatcher_benchmark()) + + def test_resolve_prefix_resources_many_prefix_many_plain( loop: asyncio.AbstractEventLoop, benchmark: BenchmarkFixture, From dc61477e3a85345c1fac8cfb9d3619c11ef65ce3 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 24 Nov 2024 10:33:32 -0800 Subject: [PATCH 04/25] [PR #10030/cdc01cd backport][3.11] Reduce initialization overhead in `ClientResponse` (#10031) --- CHANGES/10030.misc.rst | 1 + aiohttp/client_reqrep.py | 34 ++++++++++++++++++---------------- tests/test_client_response.py | 18 ++++++++++++++++++ 3 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 CHANGES/10030.misc.rst diff --git a/CHANGES/10030.misc.rst b/CHANGES/10030.misc.rst new file mode 100644 index 00000000000..68ed7d058d6 --- /dev/null +++ b/CHANGES/10030.misc.rst @@ -0,0 +1 @@ +Improved performance of creating :class:`aiohttp.ClientResponse` objects -- by :user:`bdraco`. diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index a0fa093d92e..ef9c95f0f59 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -819,18 +819,25 @@ class ClientResponse(HeadersMixin): status: int = None # type: ignore[assignment] # Status-Code reason: Optional[str] = None # Reason-Phrase - content: StreamReader = None # type: ignore[assignment] # Payload stream + content: StreamReader = None # type: ignore[assignment] # Payload stream + _body: Optional[bytes] = None _headers: CIMultiDictProxy[str] = None # type: ignore[assignment] + _history: Tuple["ClientResponse", ...] = () _raw_headers: RawHeaders = None # type: ignore[assignment] - _connection = None # current connection + _connection: Optional["Connection"] = None # current connection + _continue: Optional["asyncio.Future[bool]"] = None _source_traceback: Optional[traceback.StackSummary] = None + _session: Optional["ClientSession"] = None # set up by ClientRequest after ClientResponse object creation # post-init stage allows to not change ctor signature _closed = True # to allow __del__ for non-initialized properly response _released = False _in_context = False - __writer = None + + _resolve_charset: Callable[["ClientResponse", bytes], str] = lambda *_: "utf-8" + + __writer: Optional["asyncio.Task[None]"] = None def __init__( self, @@ -845,34 +852,29 @@ def __init__( loop: asyncio.AbstractEventLoop, session: "ClientSession", ) -> None: - assert isinstance(url, URL) + # URL forbids subclasses, so a simple type check is enough. + assert type(url) is URL self.method = method self.cookies = SimpleCookie() self._real_url = url self._url = url.with_fragment(None) if url.raw_fragment else url - self._body: Optional[bytes] = None if writer is not None: self._writer = writer - self._continue = continue100 # None by default - self._closed = True - self._history: Tuple[ClientResponse, ...] = () + if continue100 is not None: + self._continue = continue100 self._request_info = request_info self._timer = timer if timer is not None else TimerNoop() self._cache: Dict[str, Any] = {} self._traces = traces self._loop = loop - # store a reference to session #1985 - self._session: Optional[ClientSession] = session # Save reference to _resolve_charset, so that get_encoding() will still # work after the response has finished reading the body. - if session is None: - # TODO: Fix session=None in tests (see ClientRequest.__init__). - self._resolve_charset: Callable[["ClientResponse", bytes], str] = ( - lambda *_: "utf-8" - ) - else: + # TODO: Fix session=None in tests (see ClientRequest.__init__). + if session is not None: + # store a reference to session #1985 + self._session = session self._resolve_charset = session._resolve_charset if loop.get_debug(): self._source_traceback = traceback.extract_stack(sys._getframe(1)) diff --git a/tests/test_client_response.py b/tests/test_client_response.py index be25a87e425..24b3c667326 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -981,6 +981,24 @@ def test_content_disposition_no_header() -> None: assert response.content_disposition is None +def test_default_encoding_is_utf8() -> None: + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + request_info=mock.Mock(), + writer=WriterMock(), + continue100=None, + timer=TimerNoop(), + traces=[], + loop=mock.Mock(), + session=None, # type: ignore[arg-type] + ) + response._headers = CIMultiDictProxy(CIMultiDict({})) + response._body = b"" + + assert response.get_encoding() == "utf-8" + + def test_response_request_info() -> None: url = "http://def-cl-resp.org" headers = {"Content-Type": "application/json;charset=cp1251"} From 426c6cc68025847a6ee324bfee1d4cf69c43566e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 24 Nov 2024 10:34:51 -0800 Subject: [PATCH 05/25] [PR #10030/cdc01cd backport][3.12] Reduce initialization overhead in `ClientResponse` (#10032) --- CHANGES/10030.misc.rst | 1 + aiohttp/client_reqrep.py | 34 ++++++++++++++++++---------------- tests/test_client_response.py | 18 ++++++++++++++++++ 3 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 CHANGES/10030.misc.rst diff --git a/CHANGES/10030.misc.rst b/CHANGES/10030.misc.rst new file mode 100644 index 00000000000..68ed7d058d6 --- /dev/null +++ b/CHANGES/10030.misc.rst @@ -0,0 +1 @@ +Improved performance of creating :class:`aiohttp.ClientResponse` objects -- by :user:`bdraco`. diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index a0fa093d92e..ef9c95f0f59 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -819,18 +819,25 @@ class ClientResponse(HeadersMixin): status: int = None # type: ignore[assignment] # Status-Code reason: Optional[str] = None # Reason-Phrase - content: StreamReader = None # type: ignore[assignment] # Payload stream + content: StreamReader = None # type: ignore[assignment] # Payload stream + _body: Optional[bytes] = None _headers: CIMultiDictProxy[str] = None # type: ignore[assignment] + _history: Tuple["ClientResponse", ...] = () _raw_headers: RawHeaders = None # type: ignore[assignment] - _connection = None # current connection + _connection: Optional["Connection"] = None # current connection + _continue: Optional["asyncio.Future[bool]"] = None _source_traceback: Optional[traceback.StackSummary] = None + _session: Optional["ClientSession"] = None # set up by ClientRequest after ClientResponse object creation # post-init stage allows to not change ctor signature _closed = True # to allow __del__ for non-initialized properly response _released = False _in_context = False - __writer = None + + _resolve_charset: Callable[["ClientResponse", bytes], str] = lambda *_: "utf-8" + + __writer: Optional["asyncio.Task[None]"] = None def __init__( self, @@ -845,34 +852,29 @@ def __init__( loop: asyncio.AbstractEventLoop, session: "ClientSession", ) -> None: - assert isinstance(url, URL) + # URL forbids subclasses, so a simple type check is enough. + assert type(url) is URL self.method = method self.cookies = SimpleCookie() self._real_url = url self._url = url.with_fragment(None) if url.raw_fragment else url - self._body: Optional[bytes] = None if writer is not None: self._writer = writer - self._continue = continue100 # None by default - self._closed = True - self._history: Tuple[ClientResponse, ...] = () + if continue100 is not None: + self._continue = continue100 self._request_info = request_info self._timer = timer if timer is not None else TimerNoop() self._cache: Dict[str, Any] = {} self._traces = traces self._loop = loop - # store a reference to session #1985 - self._session: Optional[ClientSession] = session # Save reference to _resolve_charset, so that get_encoding() will still # work after the response has finished reading the body. - if session is None: - # TODO: Fix session=None in tests (see ClientRequest.__init__). - self._resolve_charset: Callable[["ClientResponse", bytes], str] = ( - lambda *_: "utf-8" - ) - else: + # TODO: Fix session=None in tests (see ClientRequest.__init__). + if session is not None: + # store a reference to session #1985 + self._session = session self._resolve_charset = session._resolve_charset if loop.get_debug(): self._source_traceback = traceback.extract_stack(sys._getframe(1)) diff --git a/tests/test_client_response.py b/tests/test_client_response.py index be25a87e425..24b3c667326 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -981,6 +981,24 @@ def test_content_disposition_no_header() -> None: assert response.content_disposition is None +def test_default_encoding_is_utf8() -> None: + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + request_info=mock.Mock(), + writer=WriterMock(), + continue100=None, + timer=TimerNoop(), + traces=[], + loop=mock.Mock(), + session=None, # type: ignore[arg-type] + ) + response._headers = CIMultiDictProxy(CIMultiDict({})) + response._body = b"" + + assert response.get_encoding() == "utf-8" + + def test_response_request_info() -> None: url = "http://def-cl-resp.org" headers = {"Content-Type": "application/json;charset=cp1251"} From 3d2b8290287c6d5eee2df3da77209dc7d41106c4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 24 Nov 2024 11:16:40 -0800 Subject: [PATCH 06/25] [PR #10029/5f5729d backport][3.11] Defer creation of cookies for client responses until needed (#10033) --- CHANGES/10029.misc.rst | 1 + aiohttp/client.py | 2 +- aiohttp/client_reqrep.py | 25 +++++++++++++++++++------ tests/test_client_response.py | 26 ++++++++++++++++++++++++-- 4 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 CHANGES/10029.misc.rst diff --git a/CHANGES/10029.misc.rst b/CHANGES/10029.misc.rst new file mode 100644 index 00000000000..d98729ecac8 --- /dev/null +++ b/CHANGES/10029.misc.rst @@ -0,0 +1 @@ +Improved performance of creating :class:`aiohttp.ClientResponse` objects when there are no cookies -- by :user:`bdraco`. diff --git a/aiohttp/client.py b/aiohttp/client.py index 56fd3925fa1..e04a6ff989a 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -744,7 +744,7 @@ async def _request( raise raise ClientOSError(*exc.args) from exc - if cookies := resp.cookies: + if cookies := resp._cookies: self._cookie_jar.update_cookies(cookies, resp.url) # redirects diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index ef9c95f0f59..a072ad37d76 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -826,6 +826,7 @@ class ClientResponse(HeadersMixin): _raw_headers: RawHeaders = None # type: ignore[assignment] _connection: Optional["Connection"] = None # current connection + _cookies: Optional[SimpleCookie] = None _continue: Optional["asyncio.Future[bool]"] = None _source_traceback: Optional[traceback.StackSummary] = None _session: Optional["ClientSession"] = None @@ -856,7 +857,6 @@ def __init__( assert type(url) is URL self.method = method - self.cookies = SimpleCookie() self._real_url = url self._url = url.with_fragment(None) if url.raw_fragment else url @@ -905,6 +905,16 @@ def _writer(self, writer: Optional["asyncio.Task[None]"]) -> None: else: writer.add_done_callback(self.__reset_writer) + @property + def cookies(self) -> SimpleCookie: + if self._cookies is None: + self._cookies = SimpleCookie() + return self._cookies + + @cookies.setter + def cookies(self, cookies: SimpleCookie) -> None: + self._cookies = cookies + @reify def url(self) -> URL: return self._url @@ -1068,11 +1078,14 @@ async def start(self, connection: "Connection") -> "ClientResponse": self.content = payload # cookies - for hdr in self.headers.getall(hdrs.SET_COOKIE, ()): - try: - self.cookies.load(hdr) - except CookieError as exc: - client_logger.warning("Can not load response cookies: %s", exc) + if cookie_hdrs := self.headers.getall(hdrs.SET_COOKIE, ()): + cookies = SimpleCookie() + for hdr in cookie_hdrs: + try: + cookies.load(hdr) + except CookieError as exc: + client_logger.warning("Can not load response cookies: %s", exc) + self._cookies = cookies return self def _response_eof(self) -> None: diff --git a/tests/test_client_response.py b/tests/test_client_response.py index 24b3c667326..18ba6c5149d 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -1,5 +1,6 @@ # Tests for aiohttp/client.py +import asyncio import gc import sys from typing import Callable @@ -10,7 +11,7 @@ from yarl import URL import aiohttp -from aiohttp import http +from aiohttp import ClientSession, http from aiohttp.client_reqrep import ClientResponse, RequestInfo from aiohttp.helpers import TimerNoop from aiohttp.test_utils import make_mocked_coro @@ -1139,7 +1140,28 @@ def side_effect(*args, **kwargs): ) -def test_response_real_url(loop, session) -> None: +def test_response_cookies( + loop: asyncio.AbstractEventLoop, session: ClientSession +) -> None: + response = ClientResponse( + "get", + URL("http://python.org"), + request_info=mock.Mock(), + writer=WriterMock(), + continue100=None, + timer=TimerNoop(), + traces=[], + loop=loop, + session=session, + ) + cookies = response.cookies + # Ensure the same cookies object is returned each time + assert response.cookies is cookies + + +def test_response_real_url( + loop: asyncio.AbstractEventLoop, session: ClientSession +) -> None: url = URL("http://def-cl-resp.org/#urlfragment") response = ClientResponse( "get", From 24eb11deed7f7fdcac3d47b4e3c299c8cac580ac Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 24 Nov 2024 11:21:25 -0800 Subject: [PATCH 07/25] [PR #10029/5f5729d backport][3.12] Defer creation of cookies for client responses until needed (#10034) --- CHANGES/10029.misc.rst | 1 + aiohttp/client.py | 2 +- aiohttp/client_reqrep.py | 25 +++++++++++++++++++------ tests/test_client_response.py | 26 ++++++++++++++++++++++++-- 4 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 CHANGES/10029.misc.rst diff --git a/CHANGES/10029.misc.rst b/CHANGES/10029.misc.rst new file mode 100644 index 00000000000..d98729ecac8 --- /dev/null +++ b/CHANGES/10029.misc.rst @@ -0,0 +1 @@ +Improved performance of creating :class:`aiohttp.ClientResponse` objects when there are no cookies -- by :user:`bdraco`. diff --git a/aiohttp/client.py b/aiohttp/client.py index 56fd3925fa1..e04a6ff989a 100644 --- a/aiohttp/client.py +++ b/aiohttp/client.py @@ -744,7 +744,7 @@ async def _request( raise raise ClientOSError(*exc.args) from exc - if cookies := resp.cookies: + if cookies := resp._cookies: self._cookie_jar.update_cookies(cookies, resp.url) # redirects diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index ef9c95f0f59..a072ad37d76 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -826,6 +826,7 @@ class ClientResponse(HeadersMixin): _raw_headers: RawHeaders = None # type: ignore[assignment] _connection: Optional["Connection"] = None # current connection + _cookies: Optional[SimpleCookie] = None _continue: Optional["asyncio.Future[bool]"] = None _source_traceback: Optional[traceback.StackSummary] = None _session: Optional["ClientSession"] = None @@ -856,7 +857,6 @@ def __init__( assert type(url) is URL self.method = method - self.cookies = SimpleCookie() self._real_url = url self._url = url.with_fragment(None) if url.raw_fragment else url @@ -905,6 +905,16 @@ def _writer(self, writer: Optional["asyncio.Task[None]"]) -> None: else: writer.add_done_callback(self.__reset_writer) + @property + def cookies(self) -> SimpleCookie: + if self._cookies is None: + self._cookies = SimpleCookie() + return self._cookies + + @cookies.setter + def cookies(self, cookies: SimpleCookie) -> None: + self._cookies = cookies + @reify def url(self) -> URL: return self._url @@ -1068,11 +1078,14 @@ async def start(self, connection: "Connection") -> "ClientResponse": self.content = payload # cookies - for hdr in self.headers.getall(hdrs.SET_COOKIE, ()): - try: - self.cookies.load(hdr) - except CookieError as exc: - client_logger.warning("Can not load response cookies: %s", exc) + if cookie_hdrs := self.headers.getall(hdrs.SET_COOKIE, ()): + cookies = SimpleCookie() + for hdr in cookie_hdrs: + try: + cookies.load(hdr) + except CookieError as exc: + client_logger.warning("Can not load response cookies: %s", exc) + self._cookies = cookies return self def _response_eof(self) -> None: diff --git a/tests/test_client_response.py b/tests/test_client_response.py index 24b3c667326..18ba6c5149d 100644 --- a/tests/test_client_response.py +++ b/tests/test_client_response.py @@ -1,5 +1,6 @@ # Tests for aiohttp/client.py +import asyncio import gc import sys from typing import Callable @@ -10,7 +11,7 @@ from yarl import URL import aiohttp -from aiohttp import http +from aiohttp import ClientSession, http from aiohttp.client_reqrep import ClientResponse, RequestInfo from aiohttp.helpers import TimerNoop from aiohttp.test_utils import make_mocked_coro @@ -1139,7 +1140,28 @@ def side_effect(*args, **kwargs): ) -def test_response_real_url(loop, session) -> None: +def test_response_cookies( + loop: asyncio.AbstractEventLoop, session: ClientSession +) -> None: + response = ClientResponse( + "get", + URL("http://python.org"), + request_info=mock.Mock(), + writer=WriterMock(), + continue100=None, + timer=TimerNoop(), + traces=[], + loop=loop, + session=session, + ) + cookies = response.cookies + # Ensure the same cookies object is returned each time + assert response.cookies is cookies + + +def test_response_real_url( + loop: asyncio.AbstractEventLoop, session: ClientSession +) -> None: url = URL("http://def-cl-resp.org/#urlfragment") response = ClientResponse( "get", From 5e4ad95e394305fc29aff21b1470cd94e6e2e237 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 00:18:39 +0000 Subject: [PATCH 08/25] [PR #10038/6f4e9615 backport][3.12] Small speed up to `StreamWriter.__init__` (#10040) Co-authored-by: J. Nick Koston --- aiohttp/abc.py | 4 ++-- aiohttp/http_writer.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/aiohttp/abc.py b/aiohttp/abc.py index 868f0e94898..d6f9f782b0f 100644 --- a/aiohttp/abc.py +++ b/aiohttp/abc.py @@ -195,8 +195,8 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": class AbstractStreamWriter(ABC): """Abstract stream writer.""" - buffer_size = 0 - output_size = 0 + buffer_size: int = 0 + output_size: int = 0 length: Optional[int] = 0 @abstractmethod diff --git a/aiohttp/http_writer.py b/aiohttp/http_writer.py index c6c80edc3c4..c66fda3d8d0 100644 --- a/aiohttp/http_writer.py +++ b/aiohttp/http_writer.py @@ -38,6 +38,12 @@ class HttpVersion(NamedTuple): class StreamWriter(AbstractStreamWriter): + + length: Optional[int] = None + chunked: bool = False + _eof: bool = False + _compress: Optional[ZLibCompressor] = None + def __init__( self, protocol: BaseProtocol, @@ -46,17 +52,7 @@ def __init__( on_headers_sent: _T_OnHeadersSent = None, ) -> None: self._protocol = protocol - self.loop = loop - self.length = None - self.chunked = False - self.buffer_size = 0 - self.output_size = 0 - - self._eof = False - self._compress: Optional[ZLibCompressor] = None - self._drain_waiter = None - self._on_chunk_sent: _T_OnChunkSent = on_chunk_sent self._on_headers_sent: _T_OnHeadersSent = on_headers_sent From 65dab0ee40109b97fca2b938ba76c8f68968ce49 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 00:30:10 +0000 Subject: [PATCH 09/25] [PR #10038/6f4e9615 backport][3.11] Small speed up to `StreamWriter.__init__` (#10039) Co-authored-by: J. Nick Koston --- aiohttp/abc.py | 4 ++-- aiohttp/http_writer.py | 16 ++++++---------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/aiohttp/abc.py b/aiohttp/abc.py index 868f0e94898..d6f9f782b0f 100644 --- a/aiohttp/abc.py +++ b/aiohttp/abc.py @@ -195,8 +195,8 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": class AbstractStreamWriter(ABC): """Abstract stream writer.""" - buffer_size = 0 - output_size = 0 + buffer_size: int = 0 + output_size: int = 0 length: Optional[int] = 0 @abstractmethod diff --git a/aiohttp/http_writer.py b/aiohttp/http_writer.py index c6c80edc3c4..c66fda3d8d0 100644 --- a/aiohttp/http_writer.py +++ b/aiohttp/http_writer.py @@ -38,6 +38,12 @@ class HttpVersion(NamedTuple): class StreamWriter(AbstractStreamWriter): + + length: Optional[int] = None + chunked: bool = False + _eof: bool = False + _compress: Optional[ZLibCompressor] = None + def __init__( self, protocol: BaseProtocol, @@ -46,17 +52,7 @@ def __init__( on_headers_sent: _T_OnHeadersSent = None, ) -> None: self._protocol = protocol - self.loop = loop - self.length = None - self.chunked = False - self.buffer_size = 0 - self.output_size = 0 - - self._eof = False - self._compress: Optional[ZLibCompressor] = None - self._drain_waiter = None - self._on_chunk_sent: _T_OnChunkSent = on_chunk_sent self._on_headers_sent: _T_OnHeadersSent = on_headers_sent From 3dfd7ae86413e164d3c924cac925980fa9ceddff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:23:19 +0000 Subject: [PATCH 10/25] Bump pypa/cibuildwheel from 2.21.3 to 2.22.0 (#10042) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.21.3 to 2.22.0.
Release notes

Sourced from pypa/cibuildwheel's releases.

Version 2.22.0

  • 🌟 Added a new CIBW_ENABLE/enable feature that replaces CIBW_FREETHREADED_SUPPORT/free-threaded-support and CIBW_PRERELEASE_PYTHONS with a system that supports both. In cibuildwheel 3, this will also include a PyPy setting and the deprecated options will be removed. (#2048)
  • 🌟 Dependency groups are now supported for tests. Use CIBW_TEST_GROUPS/test-groups to specify groups in [dependency-groups] for testing. (#2063)
  • 🌟 Support for the experimental Ubuntu-based ARMv7l manylinux image (#2052)
  • ✨ Show a warning when cibuildwheel is run from Python 3.10 or older; cibuildwheel 3.0 will require Python 3.11 or newer as host (#2050)
  • 🐛 Fix issue with stderr interfering with checking the docker version (#2074)
  • 🛠 Python 3.9 is now used in CIBW_BEFORE_ALL/before-all on linux, replacing 3.8, which is now EoL (#2043)
  • 🛠 Error messages for producing a pure-Python wheel are slightly more informative (#2044)
  • 🛠 Better error when uname -m fails on ARM (#2049)
  • 🛠 Better error when repair fails and docs for abi3audit on Windows (#2058)
  • 🛠 Better error when manylinux-interpreters ensure fails (#2066)
  • 🛠 Update Pyodide to 0.26.4, and adapt to the unbundled pyodide-build (now 0.29) (#2090)
  • 🛠 Now cibuildwheel uses dependency-groups for development dependencies (#2064, #2085)
  • 📚 Docs updates and tidy ups (#2061, #2067, #2072)
Changelog

Sourced from pypa/cibuildwheel's changelog.

v2.22.0

23 November 2024

  • 🌟 Added a new CIBW_ENABLE/enable feature that replaces CIBW_FREETHREADED_SUPPORT/free-threaded-support and CIBW_PRERELEASE_PYTHONS with a system that supports both. In cibuildwheel 3, this will also include a PyPy setting and the deprecated options will be removed. (#2048)
  • 🌟 Dependency groups are now supported for tests. Use CIBW_TEST_GROUPS/test-groups to specify groups in [dependency-groups] for testing. (#2063)
  • 🌟 Support for the experimental Ubuntu-based ARMv7l manylinux image (#2052)
  • ✨ Show a warning when cibuildwheel is run from Python 3.10 or older; cibuildwheel 3.0 will require Python 3.11 or newer as host (#2050)
  • 🐛 Fix issue with stderr interfering with checking the docker version (#2074)
  • 🛠 Python 3.9 is now used in CIBW_BEFORE_ALL/before-all on linux, replacing 3.8, which is now EoL (#2043)
  • 🛠 Error messages for producing a pure-Python wheel are slightly more informative (#2044)
  • 🛠 Better error when uname -m fails on ARM (#2049)
  • 🛠 Better error when repair fails and docs for abi3audit on Windows (#2058)
  • 🛠 Better error when manylinux-interpreters ensure fails (#2066)
  • 🛠 Update Pyodide to 0.26.4, and adapt to the unbundled pyodide-build (now 0.29) (#2090)
  • 🛠 Now cibuildwheel uses dependency-groups for development dependencies (#2064, #2085)
  • 📚 Docs updates and tidy ups (#2061, #2067, #2072)
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=pypa/cibuildwheel&package-manager=github_actions&previous-version=2.21.3&new-version=2.22.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci-cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index b468a3f3f4c..765047b933f 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -396,7 +396,7 @@ jobs: run: | make cythonize - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 + uses: pypa/cibuildwheel@v2.22.0 env: CIBW_ARCHS_MACOS: x86_64 arm64 universal2 - uses: actions/upload-artifact@v3 From d411bc511d4348e543a5690786c7100dc47d8eb9 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 25 Nov 2024 14:29:45 -0800 Subject: [PATCH 11/25] [PR #10043/5255cec backport][3.11] Avoid constructing headers mulitidict twice for ``web.Response`` (#10045) --- CHANGES/10043.misc.rst | 1 + aiohttp/web_response.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 CHANGES/10043.misc.rst diff --git a/CHANGES/10043.misc.rst b/CHANGES/10043.misc.rst new file mode 100644 index 00000000000..cfd4e88ee24 --- /dev/null +++ b/CHANGES/10043.misc.rst @@ -0,0 +1 @@ +Improved performance of constructing :class:`aiohttp.web.Response` with headers -- by :user:`bdraco`. diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index 59c9b54784a..d3d8afe5433 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -86,7 +86,15 @@ def __init__( status: int = 200, reason: Optional[str] = None, headers: Optional[LooseHeaders] = None, + _real_headers: Optional[CIMultiDict[str]] = None, ) -> None: + """Initialize a new stream response object. + + _real_headers is an internal parameter used to pass a pre-populated + headers object. It is used by the `Response` class to avoid copying + the headers when creating a new response object. It is not intended + to be used by external code. + """ self._body = None self._keep_alive: Optional[bool] = None self._chunked = False @@ -102,7 +110,9 @@ def __init__( self._body_length = 0 self._state: Dict[str, Any] = {} - if headers is not None: + if _real_headers is not None: + self._headers = _real_headers + elif headers is not None: self._headers: CIMultiDict[str] = CIMultiDict(headers) else: self._headers = CIMultiDict() @@ -660,7 +670,7 @@ def __init__( content_type += "; charset=" + charset real_headers[hdrs.CONTENT_TYPE] = content_type - super().__init__(status=status, reason=reason, headers=real_headers) + super().__init__(status=status, reason=reason, _real_headers=real_headers) if text is not None: self.text = text From d8e9a6b784e248da823db3f1ed57fbb21212ee9b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 25 Nov 2024 14:42:56 -0800 Subject: [PATCH 12/25] [PR #10043/5255cec backport][3.12] Avoid constructing headers mulitidict twice for ``web.Response`` (#10046) --- CHANGES/10043.misc.rst | 1 + aiohttp/web_response.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 CHANGES/10043.misc.rst diff --git a/CHANGES/10043.misc.rst b/CHANGES/10043.misc.rst new file mode 100644 index 00000000000..cfd4e88ee24 --- /dev/null +++ b/CHANGES/10043.misc.rst @@ -0,0 +1 @@ +Improved performance of constructing :class:`aiohttp.web.Response` with headers -- by :user:`bdraco`. diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index 59c9b54784a..d3d8afe5433 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -86,7 +86,15 @@ def __init__( status: int = 200, reason: Optional[str] = None, headers: Optional[LooseHeaders] = None, + _real_headers: Optional[CIMultiDict[str]] = None, ) -> None: + """Initialize a new stream response object. + + _real_headers is an internal parameter used to pass a pre-populated + headers object. It is used by the `Response` class to avoid copying + the headers when creating a new response object. It is not intended + to be used by external code. + """ self._body = None self._keep_alive: Optional[bool] = None self._chunked = False @@ -102,7 +110,9 @@ def __init__( self._body_length = 0 self._state: Dict[str, Any] = {} - if headers is not None: + if _real_headers is not None: + self._headers = _real_headers + elif headers is not None: self._headers: CIMultiDict[str] = CIMultiDict(headers) else: self._headers = CIMultiDict() @@ -660,7 +670,7 @@ def __init__( content_type += "; charset=" + charset real_headers[hdrs.CONTENT_TYPE] = content_type - super().__init__(status=status, reason=reason, headers=real_headers) + super().__init__(status=status, reason=reason, _real_headers=real_headers) if text is not None: self.text = text From 653302e225428a07e15e758b28422262bd98d5de Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Tue, 26 Nov 2024 17:07:38 -0600 Subject: [PATCH 13/25] [PR #10049/006fbc37 backport][3.11] Improve client performance when there are no auto headers to skip (#10051) Co-authored-by: J. Nick Koston --- CHANGES/10049.misc.rst | 1 + aiohttp/client_reqrep.py | 37 +++++++++++++++++++++--------------- tests/test_client_request.py | 1 + 3 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 CHANGES/10049.misc.rst diff --git a/CHANGES/10049.misc.rst b/CHANGES/10049.misc.rst new file mode 100644 index 00000000000..58f61d48420 --- /dev/null +++ b/CHANGES/10049.misc.rst @@ -0,0 +1 @@ +Improved performance of making requests when there are no auto headers to skip -- by :user:`bdraco`. diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index a072ad37d76..e97c40ce0e5 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -271,6 +271,8 @@ class ClientRequest: __writer = None # async task for streaming data _continue = None # waiter future for '100 Continue' response + _skip_auto_headers: Optional["CIMultiDict[None]"] = None + # N.B. # Adding __del__ method with self._writer closing doesn't make sense # because _writer is instance method, thus it keeps a reference to self. @@ -358,6 +360,10 @@ def __init__( def __reset_writer(self, _: object = None) -> None: self.__writer = None + @property + def skip_auto_headers(self) -> CIMultiDict[None]: + return self._skip_auto_headers or CIMultiDict() + @property def _writer(self) -> Optional["asyncio.Task[None]"]: return self.__writer @@ -469,20 +475,19 @@ def update_headers(self, headers: Optional[LooseHeaders]) -> None: def update_auto_headers(self, skip_auto_headers: Optional[Iterable[str]]) -> None: if skip_auto_headers is not None: - self.skip_auto_headers = CIMultiDict( + self._skip_auto_headers = CIMultiDict( (hdr, None) for hdr in sorted(skip_auto_headers) ) used_headers = self.headers.copy() - used_headers.extend(self.skip_auto_headers) # type: ignore[arg-type] + used_headers.extend(self._skip_auto_headers) # type: ignore[arg-type] else: # Fast path when there are no headers to skip # which is the most common case. - self.skip_auto_headers = CIMultiDict() used_headers = self.headers for hdr, val in self.DEFAULT_HEADERS.items(): if hdr not in used_headers: - self.headers.add(hdr, val) + self.headers[hdr] = val if hdrs.USER_AGENT not in used_headers: self.headers[hdrs.USER_AGENT] = SERVER_SOFTWARE @@ -584,21 +589,20 @@ def update_body_from_data(self, body: Any) -> None: self.body = body # enable chunked encoding if needed - if not self.chunked: - if hdrs.CONTENT_LENGTH not in self.headers: - size = body.size - if size is None: - self.chunked = True - else: - if hdrs.CONTENT_LENGTH not in self.headers: - self.headers[hdrs.CONTENT_LENGTH] = str(size) + if not self.chunked and hdrs.CONTENT_LENGTH not in self.headers: + if (size := body.size) is not None: + self.headers[hdrs.CONTENT_LENGTH] = str(size) + else: + self.chunked = True # copy payload headers assert body.headers + headers = self.headers + skip_headers = self._skip_auto_headers for key, value in body.headers.items(): - if key in self.headers or key in self.skip_auto_headers: + if key in headers or (skip_headers is not None and key in skip_headers): continue - self.headers[key] = value + headers[key] = value def update_expect_continue(self, expect: bool = False) -> None: if expect: @@ -723,7 +727,10 @@ async def send(self, conn: "Connection") -> "ClientResponse": # set default content-type if ( self.method in self.POST_METHODS - and hdrs.CONTENT_TYPE not in self.skip_auto_headers + and ( + self._skip_auto_headers is None + or hdrs.CONTENT_TYPE not in self._skip_auto_headers + ) and hdrs.CONTENT_TYPE not in self.headers ): self.headers[hdrs.CONTENT_TYPE] = "application/octet-stream" diff --git a/tests/test_client_request.py b/tests/test_client_request.py index 324eddf7f6e..f86ff5d7587 100644 --- a/tests/test_client_request.py +++ b/tests/test_client_request.py @@ -687,6 +687,7 @@ async def test_content_type_skip_auto_header_bytes(loop, conn) -> None: skip_auto_headers={"Content-Type"}, loop=loop, ) + assert req.skip_auto_headers == CIMultiDict({"CONTENT-TYPE": None}) resp = await req.send(conn) assert "CONTENT-TYPE" not in req.headers resp.close() From c9eb8e7b080999249651380c99f9f404c7ca25b6 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Tue, 26 Nov 2024 23:14:29 +0000 Subject: [PATCH 14/25] [PR #10049/006fbc37 backport][3.12] Improve client performance when there are no auto headers to skip (#10052) Co-authored-by: J. Nick Koston --- CHANGES/10049.misc.rst | 1 + aiohttp/client_reqrep.py | 37 +++++++++++++++++++++--------------- tests/test_client_request.py | 1 + 3 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 CHANGES/10049.misc.rst diff --git a/CHANGES/10049.misc.rst b/CHANGES/10049.misc.rst new file mode 100644 index 00000000000..58f61d48420 --- /dev/null +++ b/CHANGES/10049.misc.rst @@ -0,0 +1 @@ +Improved performance of making requests when there are no auto headers to skip -- by :user:`bdraco`. diff --git a/aiohttp/client_reqrep.py b/aiohttp/client_reqrep.py index a072ad37d76..e97c40ce0e5 100644 --- a/aiohttp/client_reqrep.py +++ b/aiohttp/client_reqrep.py @@ -271,6 +271,8 @@ class ClientRequest: __writer = None # async task for streaming data _continue = None # waiter future for '100 Continue' response + _skip_auto_headers: Optional["CIMultiDict[None]"] = None + # N.B. # Adding __del__ method with self._writer closing doesn't make sense # because _writer is instance method, thus it keeps a reference to self. @@ -358,6 +360,10 @@ def __init__( def __reset_writer(self, _: object = None) -> None: self.__writer = None + @property + def skip_auto_headers(self) -> CIMultiDict[None]: + return self._skip_auto_headers or CIMultiDict() + @property def _writer(self) -> Optional["asyncio.Task[None]"]: return self.__writer @@ -469,20 +475,19 @@ def update_headers(self, headers: Optional[LooseHeaders]) -> None: def update_auto_headers(self, skip_auto_headers: Optional[Iterable[str]]) -> None: if skip_auto_headers is not None: - self.skip_auto_headers = CIMultiDict( + self._skip_auto_headers = CIMultiDict( (hdr, None) for hdr in sorted(skip_auto_headers) ) used_headers = self.headers.copy() - used_headers.extend(self.skip_auto_headers) # type: ignore[arg-type] + used_headers.extend(self._skip_auto_headers) # type: ignore[arg-type] else: # Fast path when there are no headers to skip # which is the most common case. - self.skip_auto_headers = CIMultiDict() used_headers = self.headers for hdr, val in self.DEFAULT_HEADERS.items(): if hdr not in used_headers: - self.headers.add(hdr, val) + self.headers[hdr] = val if hdrs.USER_AGENT not in used_headers: self.headers[hdrs.USER_AGENT] = SERVER_SOFTWARE @@ -584,21 +589,20 @@ def update_body_from_data(self, body: Any) -> None: self.body = body # enable chunked encoding if needed - if not self.chunked: - if hdrs.CONTENT_LENGTH not in self.headers: - size = body.size - if size is None: - self.chunked = True - else: - if hdrs.CONTENT_LENGTH not in self.headers: - self.headers[hdrs.CONTENT_LENGTH] = str(size) + if not self.chunked and hdrs.CONTENT_LENGTH not in self.headers: + if (size := body.size) is not None: + self.headers[hdrs.CONTENT_LENGTH] = str(size) + else: + self.chunked = True # copy payload headers assert body.headers + headers = self.headers + skip_headers = self._skip_auto_headers for key, value in body.headers.items(): - if key in self.headers or key in self.skip_auto_headers: + if key in headers or (skip_headers is not None and key in skip_headers): continue - self.headers[key] = value + headers[key] = value def update_expect_continue(self, expect: bool = False) -> None: if expect: @@ -723,7 +727,10 @@ async def send(self, conn: "Connection") -> "ClientResponse": # set default content-type if ( self.method in self.POST_METHODS - and hdrs.CONTENT_TYPE not in self.skip_auto_headers + and ( + self._skip_auto_headers is None + or hdrs.CONTENT_TYPE not in self._skip_auto_headers + ) and hdrs.CONTENT_TYPE not in self.headers ): self.headers[hdrs.CONTENT_TYPE] = "application/octet-stream" diff --git a/tests/test_client_request.py b/tests/test_client_request.py index 324eddf7f6e..f86ff5d7587 100644 --- a/tests/test_client_request.py +++ b/tests/test_client_request.py @@ -687,6 +687,7 @@ async def test_content_type_skip_auto_header_bytes(loop, conn) -> None: skip_auto_headers={"Content-Type"}, loop=loop, ) + assert req.skip_auto_headers == CIMultiDict({"CONTENT-TYPE": None}) resp = await req.send(conn) assert "CONTENT-TYPE" not in req.headers resp.close() From 1a6fafe8f05acb705029e16c9ba43df7aaba2473 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 16:00:37 -0800 Subject: [PATCH 15/25] [PR #10037/2e369db backport][3.11] Refactor requests and responses to use classvar defaults to avoid multiple `__init__`s (#10053) --- CHANGES/10037.misc.rst | 1 + aiohttp/helpers.py | 3 ++- aiohttp/web_app.py | 2 ++ aiohttp/web_request.py | 21 ++++++--------------- aiohttp/web_response.py | 32 ++++++++++++++++---------------- aiohttp/web_ws.py | 32 ++++++++++++++++---------------- 6 files changed, 43 insertions(+), 48 deletions(-) create mode 100644 CHANGES/10037.misc.rst diff --git a/CHANGES/10037.misc.rst b/CHANGES/10037.misc.rst new file mode 100644 index 00000000000..655c804c995 --- /dev/null +++ b/CHANGES/10037.misc.rst @@ -0,0 +1 @@ +Improved performances of creating objects during the HTTP request lifecycle -- by :user:`bdraco`. diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 522cce2972b..8038931ebec 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -694,10 +694,11 @@ def ceil_timeout( class HeadersMixin: + """Mixin for handling headers.""" + ATTRS = frozenset(["_content_type", "_content_dict", "_stored_content_type"]) _headers: MultiMapping[str] - _content_type: Optional[str] = None _content_dict: Optional[Dict[str, str]] = None _stored_content_type: Union[str, None, _SENTINEL] = sentinel diff --git a/aiohttp/web_app.py b/aiohttp/web_app.py index 5d542ab9222..4bdc54034de 100644 --- a/aiohttp/web_app.py +++ b/aiohttp/web_app.py @@ -498,6 +498,8 @@ def _make_request( task: "asyncio.Task[None]", _cls: Type[Request] = Request, ) -> Request: + if TYPE_CHECKING: + assert self._loop is not None return _cls( message, payload, diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 502a93d247a..f11d49020a0 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -146,6 +146,8 @@ class BaseRequest(MutableMapping[str, Any], HeadersMixin): "_transport_peername", ] ) + _post: Optional[MultiDictProxy[Union[str, bytes, FileField]]] = None + _read_bytes: Optional[bytes] = None def __init__( self, @@ -162,8 +164,6 @@ def __init__( host: Optional[str] = None, remote: Optional[str] = None, ) -> None: - if state is None: - state = {} self._message = message self._protocol = protocol self._payload_writer = payload_writer @@ -187,20 +187,18 @@ def __init__( self._cache["scheme"] = url.scheme self._rel_url = url.relative() else: - self._rel_url = message.url + self._rel_url = url if scheme is not None: self._cache["scheme"] = scheme if host is not None: self._cache["host"] = host - self._post: Optional[MultiDictProxy[Union[str, bytes, FileField]]] = None - self._read_bytes: Optional[bytes] = None - self._state = state + self._state = {} if state is None else state self._task = task self._client_max_size = client_max_size self._loop = loop - transport = self._protocol.transport + transport = protocol.transport assert transport is not None self._transport_sslcontext = transport.get_extra_info("sslcontext") self._transport_peername = transport.get_extra_info("peername") @@ -847,14 +845,7 @@ class Request(BaseRequest): ATTRS = BaseRequest.ATTRS | frozenset(["_match_info"]) - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - - # matchdict, route_name, handler - # or information about traversal lookup - - # initialized after route resolving - self._match_info: Optional[UrlMappingMatchInfo] = None + _match_info: Optional["UrlMappingMatchInfo"] = None if DEBUG: diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index d3d8afe5433..e05799ca7b7 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -76,9 +76,20 @@ class ContentCoding(enum.Enum): class StreamResponse(BaseClass, HeadersMixin): - _length_check = True - _body: Union[None, bytes, bytearray, Payload] + _length_check = True + _body = None + _keep_alive: Optional[bool] = None + _chunked: bool = False + _compression: bool = False + _compression_strategy: int = zlib.Z_DEFAULT_STRATEGY + _compression_force: Optional[ContentCoding] = None + _req: Optional["BaseRequest"] = None + _payload_writer: Optional[AbstractStreamWriter] = None + _eof_sent: bool = False + _must_be_empty_body: Optional[bool] = None + _body_length = 0 + _cookies: Optional[SimpleCookie] = None def __init__( self, @@ -95,19 +106,6 @@ def __init__( the headers when creating a new response object. It is not intended to be used by external code. """ - self._body = None - self._keep_alive: Optional[bool] = None - self._chunked = False - self._compression = False - self._compression_strategy: int = zlib.Z_DEFAULT_STRATEGY - self._compression_force: Optional[ContentCoding] = None - self._cookies: Optional[SimpleCookie] = None - - self._req: Optional[BaseRequest] = None - self._payload_writer: Optional[AbstractStreamWriter] = None - self._eof_sent = False - self._must_be_empty_body: Optional[bool] = None - self._body_length = 0 self._state: Dict[str, Any] = {} if _real_headers is not None: @@ -613,6 +611,9 @@ def __eq__(self, other: object) -> bool: class Response(StreamResponse): + + _compressed_body: Optional[bytes] = None + def __init__( self, *, @@ -677,7 +678,6 @@ def __init__( else: self.body = body - self._compressed_body: Optional[bytes] = None self._zlib_executor_size = zlib_executor_size self._zlib_executor = zlib_executor diff --git a/aiohttp/web_ws.py b/aiohttp/web_ws.py index c18f88eaf00..0fb1549a3aa 100644 --- a/aiohttp/web_ws.py +++ b/aiohttp/web_ws.py @@ -61,7 +61,22 @@ def __bool__(self) -> bool: class WebSocketResponse(StreamResponse): - _length_check = False + _length_check: bool = False + _ws_protocol: Optional[str] = None + _writer: Optional[WebSocketWriter] = None + _reader: Optional[WebSocketDataQueue] = None + _closed: bool = False + _closing: bool = False + _conn_lost: int = 0 + _close_code: Optional[int] = None + _loop: Optional[asyncio.AbstractEventLoop] = None + _waiting: bool = False + _close_wait: Optional[asyncio.Future[None]] = None + _exception: Optional[BaseException] = None + _heartbeat_when: float = 0.0 + _heartbeat_cb: Optional[asyncio.TimerHandle] = None + _pong_response_cb: Optional[asyncio.TimerHandle] = None + _ping_task: Optional[asyncio.Task[None]] = None def __init__( self, @@ -78,30 +93,15 @@ def __init__( ) -> None: super().__init__(status=101) self._protocols = protocols - self._ws_protocol: Optional[str] = None - self._writer: Optional[WebSocketWriter] = None - self._reader: Optional[WebSocketDataQueue] = None - self._closed = False - self._closing = False - self._conn_lost = 0 - self._close_code: Optional[int] = None - self._loop: Optional[asyncio.AbstractEventLoop] = None - self._waiting: bool = False - self._close_wait: Optional[asyncio.Future[None]] = None - self._exception: Optional[BaseException] = None self._timeout = timeout self._receive_timeout = receive_timeout self._autoclose = autoclose self._autoping = autoping self._heartbeat = heartbeat - self._heartbeat_when = 0.0 - self._heartbeat_cb: Optional[asyncio.TimerHandle] = None if heartbeat is not None: self._pong_heartbeat = heartbeat / 2.0 - self._pong_response_cb: Optional[asyncio.TimerHandle] = None self._compress: Union[bool, int] = compress self._max_msg_size = max_msg_size - self._ping_task: Optional[asyncio.Task[None]] = None self._writer_limit = writer_limit def _cancel_heartbeat(self) -> None: From e5dd82a16b413b0cebeb97fad3be563935313f11 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 16:04:27 -0800 Subject: [PATCH 16/25] [PR #10037/2e369db backport][3.12] Refactor requests and responses to use classvar defaults to avoid multiple `__init__`s (#10054) --- CHANGES/10037.misc.rst | 1 + aiohttp/helpers.py | 3 ++- aiohttp/web_app.py | 2 ++ aiohttp/web_request.py | 21 ++++++--------------- aiohttp/web_response.py | 32 ++++++++++++++++---------------- aiohttp/web_ws.py | 32 ++++++++++++++++---------------- 6 files changed, 43 insertions(+), 48 deletions(-) create mode 100644 CHANGES/10037.misc.rst diff --git a/CHANGES/10037.misc.rst b/CHANGES/10037.misc.rst new file mode 100644 index 00000000000..655c804c995 --- /dev/null +++ b/CHANGES/10037.misc.rst @@ -0,0 +1 @@ +Improved performances of creating objects during the HTTP request lifecycle -- by :user:`bdraco`. diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py index 522cce2972b..8038931ebec 100644 --- a/aiohttp/helpers.py +++ b/aiohttp/helpers.py @@ -694,10 +694,11 @@ def ceil_timeout( class HeadersMixin: + """Mixin for handling headers.""" + ATTRS = frozenset(["_content_type", "_content_dict", "_stored_content_type"]) _headers: MultiMapping[str] - _content_type: Optional[str] = None _content_dict: Optional[Dict[str, str]] = None _stored_content_type: Union[str, None, _SENTINEL] = sentinel diff --git a/aiohttp/web_app.py b/aiohttp/web_app.py index 5d542ab9222..4bdc54034de 100644 --- a/aiohttp/web_app.py +++ b/aiohttp/web_app.py @@ -498,6 +498,8 @@ def _make_request( task: "asyncio.Task[None]", _cls: Type[Request] = Request, ) -> Request: + if TYPE_CHECKING: + assert self._loop is not None return _cls( message, payload, diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 502a93d247a..f11d49020a0 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -146,6 +146,8 @@ class BaseRequest(MutableMapping[str, Any], HeadersMixin): "_transport_peername", ] ) + _post: Optional[MultiDictProxy[Union[str, bytes, FileField]]] = None + _read_bytes: Optional[bytes] = None def __init__( self, @@ -162,8 +164,6 @@ def __init__( host: Optional[str] = None, remote: Optional[str] = None, ) -> None: - if state is None: - state = {} self._message = message self._protocol = protocol self._payload_writer = payload_writer @@ -187,20 +187,18 @@ def __init__( self._cache["scheme"] = url.scheme self._rel_url = url.relative() else: - self._rel_url = message.url + self._rel_url = url if scheme is not None: self._cache["scheme"] = scheme if host is not None: self._cache["host"] = host - self._post: Optional[MultiDictProxy[Union[str, bytes, FileField]]] = None - self._read_bytes: Optional[bytes] = None - self._state = state + self._state = {} if state is None else state self._task = task self._client_max_size = client_max_size self._loop = loop - transport = self._protocol.transport + transport = protocol.transport assert transport is not None self._transport_sslcontext = transport.get_extra_info("sslcontext") self._transport_peername = transport.get_extra_info("peername") @@ -847,14 +845,7 @@ class Request(BaseRequest): ATTRS = BaseRequest.ATTRS | frozenset(["_match_info"]) - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - - # matchdict, route_name, handler - # or information about traversal lookup - - # initialized after route resolving - self._match_info: Optional[UrlMappingMatchInfo] = None + _match_info: Optional["UrlMappingMatchInfo"] = None if DEBUG: diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index d3d8afe5433..e05799ca7b7 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -76,9 +76,20 @@ class ContentCoding(enum.Enum): class StreamResponse(BaseClass, HeadersMixin): - _length_check = True - _body: Union[None, bytes, bytearray, Payload] + _length_check = True + _body = None + _keep_alive: Optional[bool] = None + _chunked: bool = False + _compression: bool = False + _compression_strategy: int = zlib.Z_DEFAULT_STRATEGY + _compression_force: Optional[ContentCoding] = None + _req: Optional["BaseRequest"] = None + _payload_writer: Optional[AbstractStreamWriter] = None + _eof_sent: bool = False + _must_be_empty_body: Optional[bool] = None + _body_length = 0 + _cookies: Optional[SimpleCookie] = None def __init__( self, @@ -95,19 +106,6 @@ def __init__( the headers when creating a new response object. It is not intended to be used by external code. """ - self._body = None - self._keep_alive: Optional[bool] = None - self._chunked = False - self._compression = False - self._compression_strategy: int = zlib.Z_DEFAULT_STRATEGY - self._compression_force: Optional[ContentCoding] = None - self._cookies: Optional[SimpleCookie] = None - - self._req: Optional[BaseRequest] = None - self._payload_writer: Optional[AbstractStreamWriter] = None - self._eof_sent = False - self._must_be_empty_body: Optional[bool] = None - self._body_length = 0 self._state: Dict[str, Any] = {} if _real_headers is not None: @@ -613,6 +611,9 @@ def __eq__(self, other: object) -> bool: class Response(StreamResponse): + + _compressed_body: Optional[bytes] = None + def __init__( self, *, @@ -677,7 +678,6 @@ def __init__( else: self.body = body - self._compressed_body: Optional[bytes] = None self._zlib_executor_size = zlib_executor_size self._zlib_executor = zlib_executor diff --git a/aiohttp/web_ws.py b/aiohttp/web_ws.py index c18f88eaf00..0fb1549a3aa 100644 --- a/aiohttp/web_ws.py +++ b/aiohttp/web_ws.py @@ -61,7 +61,22 @@ def __bool__(self) -> bool: class WebSocketResponse(StreamResponse): - _length_check = False + _length_check: bool = False + _ws_protocol: Optional[str] = None + _writer: Optional[WebSocketWriter] = None + _reader: Optional[WebSocketDataQueue] = None + _closed: bool = False + _closing: bool = False + _conn_lost: int = 0 + _close_code: Optional[int] = None + _loop: Optional[asyncio.AbstractEventLoop] = None + _waiting: bool = False + _close_wait: Optional[asyncio.Future[None]] = None + _exception: Optional[BaseException] = None + _heartbeat_when: float = 0.0 + _heartbeat_cb: Optional[asyncio.TimerHandle] = None + _pong_response_cb: Optional[asyncio.TimerHandle] = None + _ping_task: Optional[asyncio.Task[None]] = None def __init__( self, @@ -78,30 +93,15 @@ def __init__( ) -> None: super().__init__(status=101) self._protocols = protocols - self._ws_protocol: Optional[str] = None - self._writer: Optional[WebSocketWriter] = None - self._reader: Optional[WebSocketDataQueue] = None - self._closed = False - self._closing = False - self._conn_lost = 0 - self._close_code: Optional[int] = None - self._loop: Optional[asyncio.AbstractEventLoop] = None - self._waiting: bool = False - self._close_wait: Optional[asyncio.Future[None]] = None - self._exception: Optional[BaseException] = None self._timeout = timeout self._receive_timeout = receive_timeout self._autoclose = autoclose self._autoping = autoping self._heartbeat = heartbeat - self._heartbeat_when = 0.0 - self._heartbeat_cb: Optional[asyncio.TimerHandle] = None if heartbeat is not None: self._pong_heartbeat = heartbeat / 2.0 - self._pong_response_cb: Optional[asyncio.TimerHandle] = None self._compress: Union[bool, int] = compress self._max_msg_size = max_msg_size - self._ping_task: Optional[asyncio.Task[None]] = None self._writer_limit = writer_limit def _cancel_heartbeat(self) -> None: From 7e628f4edaa6a529381d0e2d39b97c756dae5341 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 20:12:46 -0800 Subject: [PATCH 17/25] [PR #8699/11f0e7f backport][3.11] Reduce code indent in ResponseHandler.data_received (#10056) --- aiohttp/client_proto.py | 105 ++++++++++++++++++------------------- tests/test_client_proto.py | 84 ++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 54 deletions(-) diff --git a/aiohttp/client_proto.py b/aiohttp/client_proto.py index b899908d786..79f033e3e12 100644 --- a/aiohttp/client_proto.py +++ b/aiohttp/client_proto.py @@ -242,7 +242,7 @@ def data_received(self, data: bytes) -> None: if not data: return - # custom payload parser + # custom payload parser - currently always WebSocketReader if self._payload_parser is not None: eof, tail = self._payload_parser.feed_data(data) if eof: @@ -252,57 +252,56 @@ def data_received(self, data: bytes) -> None: if tail: self.data_received(tail) return - else: - if self._upgraded or self._parser is None: - # i.e. websocket connection, websocket parser is not set yet - self._tail += data + + if self._upgraded or self._parser is None: + # i.e. websocket connection, websocket parser is not set yet + self._tail += data + return + + # parse http messages + try: + messages, upgraded, tail = self._parser.feed_data(data) + except BaseException as underlying_exc: + if self.transport is not None: + # connection.release() could be called BEFORE + # data_received(), the transport is already + # closed in this case + self.transport.close() + # should_close is True after the call + if isinstance(underlying_exc, HttpProcessingError): + exc = HttpProcessingError( + code=underlying_exc.code, + message=underlying_exc.message, + headers=underlying_exc.headers, + ) else: - # parse http messages - try: - messages, upgraded, tail = self._parser.feed_data(data) - except BaseException as underlying_exc: - if self.transport is not None: - # connection.release() could be called BEFORE - # data_received(), the transport is already - # closed in this case - self.transport.close() - # should_close is True after the call - if isinstance(underlying_exc, HttpProcessingError): - exc = HttpProcessingError( - code=underlying_exc.code, - message=underlying_exc.message, - headers=underlying_exc.headers, - ) - else: - exc = HttpProcessingError() - self.set_exception(exc, underlying_exc) - return - - self._upgraded = upgraded - - payload: Optional[StreamReader] = None - for message, payload in messages: - if message.should_close: - self._should_close = True - - self._payload = payload - - if self._skip_payload or message.code in EMPTY_BODY_STATUS_CODES: - self.feed_data((message, EMPTY_PAYLOAD), 0) - else: - self.feed_data((message, payload), 0) - if payload is not None: - # new message(s) was processed - # register timeout handler unsubscribing - # either on end-of-stream or immediately for - # EMPTY_PAYLOAD - if payload is not EMPTY_PAYLOAD: - payload.on_eof(self._drop_timeout) - else: - self._drop_timeout() + exc = HttpProcessingError() + self.set_exception(exc, underlying_exc) + return - if tail: - if upgraded: - self.data_received(tail) - else: - self._tail = tail + self._upgraded = upgraded + + payload: Optional[StreamReader] = None + for message, payload in messages: + if message.should_close: + self._should_close = True + + self._payload = payload + + if self._skip_payload or message.code in EMPTY_BODY_STATUS_CODES: + self.feed_data((message, EMPTY_PAYLOAD), 0) + else: + self.feed_data((message, payload), 0) + + if payload is not None: + # new message(s) was processed + # register timeout handler unsubscribing + # either on end-of-stream or immediately for + # EMPTY_PAYLOAD + if payload is not EMPTY_PAYLOAD: + payload.on_eof(self._drop_timeout) + else: + self._drop_timeout() + + if upgraded and tail: + self.data_received(tail) diff --git a/tests/test_client_proto.py b/tests/test_client_proto.py index a70dc62e135..af1286dc310 100644 --- a/tests/test_client_proto.py +++ b/tests/test_client_proto.py @@ -72,7 +72,89 @@ async def test_uncompleted_message(loop) -> None: assert dict(exc.message.headers) == {"Location": "http://python.org/"} -async def test_client_protocol_readuntil_eof(loop) -> None: +async def test_data_received_after_close(loop: asyncio.AbstractEventLoop) -> None: + proto = ResponseHandler(loop=loop) + transport = mock.Mock() + proto.connection_made(transport) + proto.set_response_params(read_until_eof=True) + proto.close() + assert transport.close.called + transport.close.reset_mock() + proto.data_received(b"HTTP\r\n\r\n") + assert proto.should_close + assert not transport.close.called + assert isinstance(proto.exception(), http.HttpProcessingError) + + +async def test_multiple_responses_one_byte_at_a_time( + loop: asyncio.AbstractEventLoop, +) -> None: + proto = ResponseHandler(loop=loop) + proto.connection_made(mock.Mock()) + conn = mock.Mock(protocol=proto) + proto.set_response_params(read_until_eof=True) + + for _ in range(2): + messages = ( + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nab" + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\ncd" + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nef" + ) + for i in range(len(messages)): + proto.data_received(messages[i : i + 1]) + + expected = [b"ab", b"cd", b"ef"] + for payload in expected: + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + writer=mock.Mock(), + continue100=None, + timer=TimerNoop(), + request_info=mock.Mock(), + traces=[], + loop=loop, + session=mock.Mock(), + ) + await response.start(conn) + await response.read() == payload + + +async def test_unexpected_exception_during_data_received( + loop: asyncio.AbstractEventLoop, +) -> None: + proto = ResponseHandler(loop=loop) + + class PatchableHttpResponseParser(http.HttpResponseParser): + """Subclass of HttpResponseParser to make it patchable.""" + + with mock.patch( + "aiohttp.client_proto.HttpResponseParser", PatchableHttpResponseParser + ): + proto.connection_made(mock.Mock()) + conn = mock.Mock(protocol=proto) + proto.set_response_params(read_until_eof=True) + proto.data_received(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nab") + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + writer=mock.Mock(), + continue100=None, + timer=TimerNoop(), + request_info=mock.Mock(), + traces=[], + loop=loop, + session=mock.Mock(), + ) + await response.start(conn) + await response.read() == b"ab" + with mock.patch.object(proto._parser, "feed_data", side_effect=ValueError): + proto.data_received(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\ncd") + + assert isinstance(proto.exception(), http.HttpProcessingError) + + +async def test_client_protocol_readuntil_eof(loop: asyncio.AbstractEventLoop) -> None: proto = ResponseHandler(loop=loop) transport = mock.Mock() proto.connection_made(transport) From 4409466813839221d6953e6886d09c569ad52b27 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 20:17:28 -0800 Subject: [PATCH 18/25] [PR #8699/11f0e7f backport][3.12] Reduce code indent in ResponseHandler.data_received (#10057) --- aiohttp/client_proto.py | 105 ++++++++++++++++++------------------- tests/test_client_proto.py | 84 ++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 54 deletions(-) diff --git a/aiohttp/client_proto.py b/aiohttp/client_proto.py index b899908d786..79f033e3e12 100644 --- a/aiohttp/client_proto.py +++ b/aiohttp/client_proto.py @@ -242,7 +242,7 @@ def data_received(self, data: bytes) -> None: if not data: return - # custom payload parser + # custom payload parser - currently always WebSocketReader if self._payload_parser is not None: eof, tail = self._payload_parser.feed_data(data) if eof: @@ -252,57 +252,56 @@ def data_received(self, data: bytes) -> None: if tail: self.data_received(tail) return - else: - if self._upgraded or self._parser is None: - # i.e. websocket connection, websocket parser is not set yet - self._tail += data + + if self._upgraded or self._parser is None: + # i.e. websocket connection, websocket parser is not set yet + self._tail += data + return + + # parse http messages + try: + messages, upgraded, tail = self._parser.feed_data(data) + except BaseException as underlying_exc: + if self.transport is not None: + # connection.release() could be called BEFORE + # data_received(), the transport is already + # closed in this case + self.transport.close() + # should_close is True after the call + if isinstance(underlying_exc, HttpProcessingError): + exc = HttpProcessingError( + code=underlying_exc.code, + message=underlying_exc.message, + headers=underlying_exc.headers, + ) else: - # parse http messages - try: - messages, upgraded, tail = self._parser.feed_data(data) - except BaseException as underlying_exc: - if self.transport is not None: - # connection.release() could be called BEFORE - # data_received(), the transport is already - # closed in this case - self.transport.close() - # should_close is True after the call - if isinstance(underlying_exc, HttpProcessingError): - exc = HttpProcessingError( - code=underlying_exc.code, - message=underlying_exc.message, - headers=underlying_exc.headers, - ) - else: - exc = HttpProcessingError() - self.set_exception(exc, underlying_exc) - return - - self._upgraded = upgraded - - payload: Optional[StreamReader] = None - for message, payload in messages: - if message.should_close: - self._should_close = True - - self._payload = payload - - if self._skip_payload or message.code in EMPTY_BODY_STATUS_CODES: - self.feed_data((message, EMPTY_PAYLOAD), 0) - else: - self.feed_data((message, payload), 0) - if payload is not None: - # new message(s) was processed - # register timeout handler unsubscribing - # either on end-of-stream or immediately for - # EMPTY_PAYLOAD - if payload is not EMPTY_PAYLOAD: - payload.on_eof(self._drop_timeout) - else: - self._drop_timeout() + exc = HttpProcessingError() + self.set_exception(exc, underlying_exc) + return - if tail: - if upgraded: - self.data_received(tail) - else: - self._tail = tail + self._upgraded = upgraded + + payload: Optional[StreamReader] = None + for message, payload in messages: + if message.should_close: + self._should_close = True + + self._payload = payload + + if self._skip_payload or message.code in EMPTY_BODY_STATUS_CODES: + self.feed_data((message, EMPTY_PAYLOAD), 0) + else: + self.feed_data((message, payload), 0) + + if payload is not None: + # new message(s) was processed + # register timeout handler unsubscribing + # either on end-of-stream or immediately for + # EMPTY_PAYLOAD + if payload is not EMPTY_PAYLOAD: + payload.on_eof(self._drop_timeout) + else: + self._drop_timeout() + + if upgraded and tail: + self.data_received(tail) diff --git a/tests/test_client_proto.py b/tests/test_client_proto.py index a70dc62e135..af1286dc310 100644 --- a/tests/test_client_proto.py +++ b/tests/test_client_proto.py @@ -72,7 +72,89 @@ async def test_uncompleted_message(loop) -> None: assert dict(exc.message.headers) == {"Location": "http://python.org/"} -async def test_client_protocol_readuntil_eof(loop) -> None: +async def test_data_received_after_close(loop: asyncio.AbstractEventLoop) -> None: + proto = ResponseHandler(loop=loop) + transport = mock.Mock() + proto.connection_made(transport) + proto.set_response_params(read_until_eof=True) + proto.close() + assert transport.close.called + transport.close.reset_mock() + proto.data_received(b"HTTP\r\n\r\n") + assert proto.should_close + assert not transport.close.called + assert isinstance(proto.exception(), http.HttpProcessingError) + + +async def test_multiple_responses_one_byte_at_a_time( + loop: asyncio.AbstractEventLoop, +) -> None: + proto = ResponseHandler(loop=loop) + proto.connection_made(mock.Mock()) + conn = mock.Mock(protocol=proto) + proto.set_response_params(read_until_eof=True) + + for _ in range(2): + messages = ( + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nab" + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\ncd" + b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nef" + ) + for i in range(len(messages)): + proto.data_received(messages[i : i + 1]) + + expected = [b"ab", b"cd", b"ef"] + for payload in expected: + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + writer=mock.Mock(), + continue100=None, + timer=TimerNoop(), + request_info=mock.Mock(), + traces=[], + loop=loop, + session=mock.Mock(), + ) + await response.start(conn) + await response.read() == payload + + +async def test_unexpected_exception_during_data_received( + loop: asyncio.AbstractEventLoop, +) -> None: + proto = ResponseHandler(loop=loop) + + class PatchableHttpResponseParser(http.HttpResponseParser): + """Subclass of HttpResponseParser to make it patchable.""" + + with mock.patch( + "aiohttp.client_proto.HttpResponseParser", PatchableHttpResponseParser + ): + proto.connection_made(mock.Mock()) + conn = mock.Mock(protocol=proto) + proto.set_response_params(read_until_eof=True) + proto.data_received(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nab") + response = ClientResponse( + "get", + URL("http://def-cl-resp.org"), + writer=mock.Mock(), + continue100=None, + timer=TimerNoop(), + request_info=mock.Mock(), + traces=[], + loop=loop, + session=mock.Mock(), + ) + await response.start(conn) + await response.read() == b"ab" + with mock.patch.object(proto._parser, "feed_data", side_effect=ValueError): + proto.data_received(b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\ncd") + + assert isinstance(proto.exception(), http.HttpProcessingError) + + +async def test_client_protocol_readuntil_eof(loop: asyncio.AbstractEventLoop) -> None: proto = ResponseHandler(loop=loop) transport = mock.Mock() proto.connection_made(transport) From a5a6981fe96790d5924df72db1f42bb021ed5265 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 21:12:22 -0800 Subject: [PATCH 19/25] [PR #10058/12372d7 backport][3.11] Remove unreachable content length check for chunked encoding (#10060) --- aiohttp/web_response.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index e05799ca7b7..cd2be24f1a3 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -181,14 +181,13 @@ def output_length(self) -> int: def enable_chunked_encoding(self, chunk_size: Optional[int] = None) -> None: """Enables automatic chunked transfer encoding.""" - self._chunked = True - if hdrs.CONTENT_LENGTH in self._headers: raise RuntimeError( "You can't enable chunked encoding when a content length is set" ) if chunk_size is not None: warnings.warn("Chunk size is deprecated #1615", DeprecationWarning) + self._chunked = True def enable_compression( self, @@ -493,8 +492,6 @@ async def _prepare_headers(self) -> None: if not self._must_be_empty_body: writer.enable_chunking() headers[hdrs.TRANSFER_ENCODING] = "chunked" - if hdrs.CONTENT_LENGTH in headers: - del headers[hdrs.CONTENT_LENGTH] elif self._length_check: # Disabled for WebSockets writer.length = self.content_length if writer.length is None: From d88c30c00a87425277dc26ca00e4746fd5b1d6cb Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 26 Nov 2024 21:12:28 -0800 Subject: [PATCH 20/25] [PR #10058/12372d7 backport][3.12] Remove unreachable content length check for chunked encoding (#10061) --- aiohttp/web_response.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/aiohttp/web_response.py b/aiohttp/web_response.py index e05799ca7b7..cd2be24f1a3 100644 --- a/aiohttp/web_response.py +++ b/aiohttp/web_response.py @@ -181,14 +181,13 @@ def output_length(self) -> int: def enable_chunked_encoding(self, chunk_size: Optional[int] = None) -> None: """Enables automatic chunked transfer encoding.""" - self._chunked = True - if hdrs.CONTENT_LENGTH in self._headers: raise RuntimeError( "You can't enable chunked encoding when a content length is set" ) if chunk_size is not None: warnings.warn("Chunk size is deprecated #1615", DeprecationWarning) + self._chunked = True def enable_compression( self, @@ -493,8 +492,6 @@ async def _prepare_headers(self) -> None: if not self._must_be_empty_body: writer.enable_chunking() headers[hdrs.TRANSFER_ENCODING] = "chunked" - if hdrs.CONTENT_LENGTH in headers: - del headers[hdrs.CONTENT_LENGTH] elif self._length_check: # Disabled for WebSockets writer.length = self.content_length if writer.length is None: From 1b78cae44d97e3eb5ea15a25319d5a5a3daf8c9f Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 05:16:47 +0000 Subject: [PATCH 21/25] [PR #10059/aac6f741 backport][3.11] Combine executor jobs in FileResponse sendfile_fallback (#10062) Co-authored-by: J. Nick Koston --- aiohttp/web_fileresponse.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index e7951acea16..3b2bc2caf12 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -88,6 +88,10 @@ def __init__( self._path = pathlib.Path(path) self._chunk_size = chunk_size + def _seek_and_read(self, fobj: IO[Any], offset: int, chunk_size: int) -> bytes: + fobj.seek(offset) + return fobj.read(chunk_size) # type: ignore[no-any-return] + async def _sendfile_fallback( self, writer: AbstractStreamWriter, fobj: IO[Any], offset: int, count: int ) -> AbstractStreamWriter: @@ -96,10 +100,9 @@ async def _sendfile_fallback( chunk_size = self._chunk_size loop = asyncio.get_event_loop() - - await loop.run_in_executor(None, fobj.seek, offset) - - chunk = await loop.run_in_executor(None, fobj.read, chunk_size) + chunk = await loop.run_in_executor( + None, self._seek_and_read, fobj, offset, chunk_size + ) while chunk: await writer.write(chunk) count = count - chunk_size From 6ee57820b26db57f58720a7afc41854393b9f359 Mon Sep 17 00:00:00 2001 From: "patchback[bot]" <45432694+patchback[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 05:20:37 +0000 Subject: [PATCH 22/25] [PR #10059/aac6f741 backport][3.12] Combine executor jobs in FileResponse sendfile_fallback (#10063) Co-authored-by: J. Nick Koston --- aiohttp/web_fileresponse.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index e7951acea16..3b2bc2caf12 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -88,6 +88,10 @@ def __init__( self._path = pathlib.Path(path) self._chunk_size = chunk_size + def _seek_and_read(self, fobj: IO[Any], offset: int, chunk_size: int) -> bytes: + fobj.seek(offset) + return fobj.read(chunk_size) # type: ignore[no-any-return] + async def _sendfile_fallback( self, writer: AbstractStreamWriter, fobj: IO[Any], offset: int, count: int ) -> AbstractStreamWriter: @@ -96,10 +100,9 @@ async def _sendfile_fallback( chunk_size = self._chunk_size loop = asyncio.get_event_loop() - - await loop.run_in_executor(None, fobj.seek, offset) - - chunk = await loop.run_in_executor(None, fobj.read, chunk_size) + chunk = await loop.run_in_executor( + None, self._seek_and_read, fobj, offset, chunk_size + ) while chunk: await writer.write(chunk) count = count - chunk_size From 1f8ade0748521753e2206aad2ff6af6f11c438e6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 27 Nov 2024 09:13:24 -0800 Subject: [PATCH 23/25] [PR #10055/c11fe96 backport][3.12] Downgrade logging of invalid HTTP methods on first request to debug level (#10065) --- CHANGES/10055.misc.rst | 3 + aiohttp/_http_parser.pyx | 4 +- aiohttp/http_exceptions.py | 7 +++ aiohttp/http_parser.py | 5 +- aiohttp/web_protocol.py | 14 ++++- tests/test_http_parser.py | 2 +- tests/test_web_server.py | 123 ++++++++++++++++++++++++++++++++++++- 7 files changed, 152 insertions(+), 6 deletions(-) create mode 100644 CHANGES/10055.misc.rst diff --git a/CHANGES/10055.misc.rst b/CHANGES/10055.misc.rst new file mode 100644 index 00000000000..3a5fa074f77 --- /dev/null +++ b/CHANGES/10055.misc.rst @@ -0,0 +1,3 @@ +Downgraded logging of invalid HTTP method exceptions on the first request to debug level -- by :user:`bdraco`. + +HTTP requests starting with an invalid method are relatively common, especially when connected to the public internet, because browsers or other clients may try to speak SSL to a plain-text server or vice-versa. These exceptions can quickly fill the log with noise when nothing is wrong. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index dd317edaf79..988e4247f93 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -23,6 +23,7 @@ from aiohttp.helpers import DEBUG, set_exception from .http_exceptions import ( BadHttpMessage, + BadHttpMethod, BadStatusLine, ContentLengthError, InvalidHeader, @@ -831,8 +832,9 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer): cparser.HPE_INVALID_EOF_STATE, cparser.HPE_INVALID_TRANSFER_ENCODING}: return BadHttpMessage(err_msg) + elif errno == cparser.HPE_INVALID_METHOD: + return BadHttpMethod(error=err_msg) elif errno in {cparser.HPE_INVALID_STATUS, - cparser.HPE_INVALID_METHOD, cparser.HPE_INVALID_VERSION}: return BadStatusLine(error=err_msg) elif errno == cparser.HPE_INVALID_URL: diff --git a/aiohttp/http_exceptions.py b/aiohttp/http_exceptions.py index c43ee0d9659..b8dda999acf 100644 --- a/aiohttp/http_exceptions.py +++ b/aiohttp/http_exceptions.py @@ -101,5 +101,12 @@ def __init__(self, line: str = "", error: Optional[str] = None) -> None: self.line = line +class BadHttpMethod(BadStatusLine): + """Invalid HTTP method in status line.""" + + def __init__(self, line: str = "", error: Optional[str] = None) -> None: + super().__init__(line, error or f"Bad HTTP method in status line {line!r}") + + class InvalidURLError(BadHttpMessage): pass diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 148a30b2ca1..1b8b5b4d49e 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -38,6 +38,7 @@ ) from .http_exceptions import ( BadHttpMessage, + BadHttpMethod, BadStatusLine, ContentEncodingError, ContentLengthError, @@ -576,7 +577,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: try: method, path, version = line.split(" ", maxsplit=2) except ValueError: - raise BadStatusLine(line) from None + raise BadHttpMethod(line) from None if len(path) > self.max_line_size: raise LineTooLong( @@ -585,7 +586,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: # method if not TOKENRE.fullmatch(method): - raise BadStatusLine(method) + raise BadHttpMethod(method) # version match = VERSRE.fullmatch(version) diff --git a/aiohttp/web_protocol.py b/aiohttp/web_protocol.py index 2201eef30ad..fe2ae8a1269 100644 --- a/aiohttp/web_protocol.py +++ b/aiohttp/web_protocol.py @@ -35,6 +35,7 @@ RawRequestMessage, StreamWriter, ) +from .http_exceptions import BadHttpMethod from .log import access_logger, server_logger from .streams import EMPTY_PAYLOAD, StreamReader from .tcp_helpers import tcp_keepalive @@ -687,7 +688,18 @@ def handle_error( Returns HTTP response with specific status code. Logs additional information. It always closes current connection. """ - self.log_exception("Error handling request", exc_info=exc) + if ( + self._manager + and self._manager.requests_count == 1 + and isinstance(exc, BadHttpMethod) + ): + # BadHttpMethod is common when a client sends non-HTTP + # or encrypted traffic to an HTTP port. This is expected + # to happen when connected to the public internet so we log + # it at the debug level as to not fill logs with noise. + self.logger.debug("Error handling request", exc_info=exc) + else: + self.log_exception("Error handling request", exc_info=exc) # some data already got sent, connection is broken if request.writer.output_size > 0: diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index a8305da84f7..58fef625f82 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -965,7 +965,7 @@ def test_http_request_parser_two_slashes(parser) -> None: def test_http_request_parser_bad_method( parser, rfc9110_5_6_2_token_delim: bytes ) -> None: - with pytest.raises(http_exceptions.BadStatusLine): + with pytest.raises(http_exceptions.BadHttpMethod): parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n') diff --git a/tests/test_web_server.py b/tests/test_web_server.py index 9e2d078c1a0..e77df623020 100644 --- a/tests/test_web_server.py +++ b/tests/test_web_server.py @@ -1,11 +1,14 @@ import asyncio import socket from contextlib import suppress +from typing import NoReturn from unittest import mock import pytest from aiohttp import client, web +from aiohttp.http_exceptions import BadHttpMethod, BadStatusLine +from aiohttp.pytest_plugin import AiohttpClient, AiohttpRawServer async def test_simple_server(aiohttp_raw_server, aiohttp_client) -> None: @@ -56,7 +59,125 @@ async def handler(request): logger.exception.assert_called_with("Error handling request", exc_info=exc) -async def test_raw_server_handler_timeout(aiohttp_raw_server, aiohttp_client) -> None: +async def test_raw_server_logs_invalid_method_with_loop_debug( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(True) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" in txt + + # BadHttpMethod should be logged as debug + # on the first request since the client may + # be probing for TLS/SSL support which is + # expected to fail + logger.debug.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_invalid_method_without_loop_debug( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger, debug=False) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" not in txt + + # BadHttpMethod should be logged as debug + # on the first request since the client may + # be probing for TLS/SSL support which is + # expected to fail + logger.debug.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_invalid_method_second_request( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + request_count = 0 + + async def handler(request: web.BaseRequest) -> web.Response: + nonlocal request_count + request_count += 1 + if request_count == 2: + raise exc + return web.Response() + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 200 + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + # BadHttpMethod should be logged as an exception + # if its not the first request since we know + # that the client already was speaking HTTP + logger.exception.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_bad_status_line_as_exception( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadStatusLine(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger, debug=False) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" not in txt + + logger.exception.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_handler_timeout( + aiohttp_raw_server: AiohttpRawServer, aiohttp_client: AiohttpClient +) -> None: + loop = asyncio.get_event_loop() + loop.set_debug(True) exc = asyncio.TimeoutError("error") async def handler(request): From 13152c39f9360cef7ba1d78252afdc91cf220441 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 27 Nov 2024 09:17:40 -0800 Subject: [PATCH 24/25] [PR #10055/c11fe96 backport][3.11] Downgrade logging of invalid HTTP methods on first request to debug level (#10064) --- CHANGES/10055.misc.rst | 3 + aiohttp/_http_parser.pyx | 4 +- aiohttp/http_exceptions.py | 7 +++ aiohttp/http_parser.py | 5 +- aiohttp/web_protocol.py | 14 ++++- tests/test_http_parser.py | 2 +- tests/test_web_server.py | 123 ++++++++++++++++++++++++++++++++++++- 7 files changed, 152 insertions(+), 6 deletions(-) create mode 100644 CHANGES/10055.misc.rst diff --git a/CHANGES/10055.misc.rst b/CHANGES/10055.misc.rst new file mode 100644 index 00000000000..3a5fa074f77 --- /dev/null +++ b/CHANGES/10055.misc.rst @@ -0,0 +1,3 @@ +Downgraded logging of invalid HTTP method exceptions on the first request to debug level -- by :user:`bdraco`. + +HTTP requests starting with an invalid method are relatively common, especially when connected to the public internet, because browsers or other clients may try to speak SSL to a plain-text server or vice-versa. These exceptions can quickly fill the log with noise when nothing is wrong. diff --git a/aiohttp/_http_parser.pyx b/aiohttp/_http_parser.pyx index dd317edaf79..988e4247f93 100644 --- a/aiohttp/_http_parser.pyx +++ b/aiohttp/_http_parser.pyx @@ -23,6 +23,7 @@ from aiohttp.helpers import DEBUG, set_exception from .http_exceptions import ( BadHttpMessage, + BadHttpMethod, BadStatusLine, ContentLengthError, InvalidHeader, @@ -831,8 +832,9 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer): cparser.HPE_INVALID_EOF_STATE, cparser.HPE_INVALID_TRANSFER_ENCODING}: return BadHttpMessage(err_msg) + elif errno == cparser.HPE_INVALID_METHOD: + return BadHttpMethod(error=err_msg) elif errno in {cparser.HPE_INVALID_STATUS, - cparser.HPE_INVALID_METHOD, cparser.HPE_INVALID_VERSION}: return BadStatusLine(error=err_msg) elif errno == cparser.HPE_INVALID_URL: diff --git a/aiohttp/http_exceptions.py b/aiohttp/http_exceptions.py index c43ee0d9659..b8dda999acf 100644 --- a/aiohttp/http_exceptions.py +++ b/aiohttp/http_exceptions.py @@ -101,5 +101,12 @@ def __init__(self, line: str = "", error: Optional[str] = None) -> None: self.line = line +class BadHttpMethod(BadStatusLine): + """Invalid HTTP method in status line.""" + + def __init__(self, line: str = "", error: Optional[str] = None) -> None: + super().__init__(line, error or f"Bad HTTP method in status line {line!r}") + + class InvalidURLError(BadHttpMessage): pass diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 148a30b2ca1..1b8b5b4d49e 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -38,6 +38,7 @@ ) from .http_exceptions import ( BadHttpMessage, + BadHttpMethod, BadStatusLine, ContentEncodingError, ContentLengthError, @@ -576,7 +577,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: try: method, path, version = line.split(" ", maxsplit=2) except ValueError: - raise BadStatusLine(line) from None + raise BadHttpMethod(line) from None if len(path) > self.max_line_size: raise LineTooLong( @@ -585,7 +586,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage: # method if not TOKENRE.fullmatch(method): - raise BadStatusLine(method) + raise BadHttpMethod(method) # version match = VERSRE.fullmatch(version) diff --git a/aiohttp/web_protocol.py b/aiohttp/web_protocol.py index 2201eef30ad..fe2ae8a1269 100644 --- a/aiohttp/web_protocol.py +++ b/aiohttp/web_protocol.py @@ -35,6 +35,7 @@ RawRequestMessage, StreamWriter, ) +from .http_exceptions import BadHttpMethod from .log import access_logger, server_logger from .streams import EMPTY_PAYLOAD, StreamReader from .tcp_helpers import tcp_keepalive @@ -687,7 +688,18 @@ def handle_error( Returns HTTP response with specific status code. Logs additional information. It always closes current connection. """ - self.log_exception("Error handling request", exc_info=exc) + if ( + self._manager + and self._manager.requests_count == 1 + and isinstance(exc, BadHttpMethod) + ): + # BadHttpMethod is common when a client sends non-HTTP + # or encrypted traffic to an HTTP port. This is expected + # to happen when connected to the public internet so we log + # it at the debug level as to not fill logs with noise. + self.logger.debug("Error handling request", exc_info=exc) + else: + self.log_exception("Error handling request", exc_info=exc) # some data already got sent, connection is broken if request.writer.output_size > 0: diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index a8305da84f7..58fef625f82 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -965,7 +965,7 @@ def test_http_request_parser_two_slashes(parser) -> None: def test_http_request_parser_bad_method( parser, rfc9110_5_6_2_token_delim: bytes ) -> None: - with pytest.raises(http_exceptions.BadStatusLine): + with pytest.raises(http_exceptions.BadHttpMethod): parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n') diff --git a/tests/test_web_server.py b/tests/test_web_server.py index 9e2d078c1a0..e77df623020 100644 --- a/tests/test_web_server.py +++ b/tests/test_web_server.py @@ -1,11 +1,14 @@ import asyncio import socket from contextlib import suppress +from typing import NoReturn from unittest import mock import pytest from aiohttp import client, web +from aiohttp.http_exceptions import BadHttpMethod, BadStatusLine +from aiohttp.pytest_plugin import AiohttpClient, AiohttpRawServer async def test_simple_server(aiohttp_raw_server, aiohttp_client) -> None: @@ -56,7 +59,125 @@ async def handler(request): logger.exception.assert_called_with("Error handling request", exc_info=exc) -async def test_raw_server_handler_timeout(aiohttp_raw_server, aiohttp_client) -> None: +async def test_raw_server_logs_invalid_method_with_loop_debug( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(True) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" in txt + + # BadHttpMethod should be logged as debug + # on the first request since the client may + # be probing for TLS/SSL support which is + # expected to fail + logger.debug.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_invalid_method_without_loop_debug( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger, debug=False) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" not in txt + + # BadHttpMethod should be logged as debug + # on the first request since the client may + # be probing for TLS/SSL support which is + # expected to fail + logger.debug.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_invalid_method_second_request( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error") + request_count = 0 + + async def handler(request: web.BaseRequest) -> web.Response: + nonlocal request_count + request_count += 1 + if request_count == 2: + raise exc + return web.Response() + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 200 + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + # BadHttpMethod should be logged as an exception + # if its not the first request since we know + # that the client already was speaking HTTP + logger.exception.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_logs_bad_status_line_as_exception( + aiohttp_raw_server: AiohttpRawServer, + aiohttp_client: AiohttpClient, + loop: asyncio.AbstractEventLoop, +) -> None: + exc = BadStatusLine(b"\x16\x03\x03\x01F\x01".decode(), "error") + + async def handler(request: web.BaseRequest) -> NoReturn: + raise exc + + loop = asyncio.get_event_loop() + loop.set_debug(False) + logger = mock.Mock() + server = await aiohttp_raw_server(handler, logger=logger, debug=False) + cli = await aiohttp_client(server) + resp = await cli.get("/path/to") + assert resp.status == 500 + assert resp.headers["Content-Type"].startswith("text/plain") + + txt = await resp.text() + assert "Traceback (most recent call last):\n" not in txt + + logger.exception.assert_called_with("Error handling request", exc_info=exc) + + +async def test_raw_server_handler_timeout( + aiohttp_raw_server: AiohttpRawServer, aiohttp_client: AiohttpClient +) -> None: + loop = asyncio.get_event_loop() + loop.set_debug(True) exc = asyncio.TimeoutError("error") async def handler(request): From 5ddf7208524f9ba5957d9a84d3e205f4f2692e28 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 27 Nov 2024 09:34:23 -0800 Subject: [PATCH 25/25] Release 3.11.8 (#10066) --- CHANGES.rst | 60 ++++++++++++++++++++++++++++++++++++++++++ CHANGES/10029.misc.rst | 1 - CHANGES/10030.misc.rst | 1 - CHANGES/10037.misc.rst | 1 - CHANGES/10043.misc.rst | 1 - CHANGES/10049.misc.rst | 1 - CHANGES/10055.misc.rst | 3 --- aiohttp/__init__.py | 2 +- 8 files changed, 61 insertions(+), 9 deletions(-) delete mode 100644 CHANGES/10029.misc.rst delete mode 100644 CHANGES/10030.misc.rst delete mode 100644 CHANGES/10037.misc.rst delete mode 100644 CHANGES/10043.misc.rst delete mode 100644 CHANGES/10049.misc.rst delete mode 100644 CHANGES/10055.misc.rst diff --git a/CHANGES.rst b/CHANGES.rst index e204f07b370..8a003a78c45 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,66 @@ .. towncrier release notes start +3.11.8 (2024-11-27) +=================== + +Miscellaneous internal changes +------------------------------ + +- Improved performance of creating :class:`aiohttp.ClientResponse` objects when there are no cookies -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`10029`. + + + +- Improved performance of creating :class:`aiohttp.ClientResponse` objects -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`10030`. + + + +- Improved performances of creating objects during the HTTP request lifecycle -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`10037`. + + + +- Improved performance of constructing :class:`aiohttp.web.Response` with headers -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`10043`. + + + +- Improved performance of making requests when there are no auto headers to skip -- by :user:`bdraco`. + + + *Related issues and pull requests on GitHub:* + :issue:`10049`. + + + +- Downgraded logging of invalid HTTP method exceptions on the first request to debug level -- by :user:`bdraco`. + + HTTP requests starting with an invalid method are relatively common, especially when connected to the public internet, because browsers or other clients may try to speak SSL to a plain-text server or vice-versa. These exceptions can quickly fill the log with noise when nothing is wrong. + + + *Related issues and pull requests on GitHub:* + :issue:`10055`. + + + + +---- + + 3.11.7 (2024-11-21) =================== diff --git a/CHANGES/10029.misc.rst b/CHANGES/10029.misc.rst deleted file mode 100644 index d98729ecac8..00000000000 --- a/CHANGES/10029.misc.rst +++ /dev/null @@ -1 +0,0 @@ -Improved performance of creating :class:`aiohttp.ClientResponse` objects when there are no cookies -- by :user:`bdraco`. diff --git a/CHANGES/10030.misc.rst b/CHANGES/10030.misc.rst deleted file mode 100644 index 68ed7d058d6..00000000000 --- a/CHANGES/10030.misc.rst +++ /dev/null @@ -1 +0,0 @@ -Improved performance of creating :class:`aiohttp.ClientResponse` objects -- by :user:`bdraco`. diff --git a/CHANGES/10037.misc.rst b/CHANGES/10037.misc.rst deleted file mode 100644 index 655c804c995..00000000000 --- a/CHANGES/10037.misc.rst +++ /dev/null @@ -1 +0,0 @@ -Improved performances of creating objects during the HTTP request lifecycle -- by :user:`bdraco`. diff --git a/CHANGES/10043.misc.rst b/CHANGES/10043.misc.rst deleted file mode 100644 index cfd4e88ee24..00000000000 --- a/CHANGES/10043.misc.rst +++ /dev/null @@ -1 +0,0 @@ -Improved performance of constructing :class:`aiohttp.web.Response` with headers -- by :user:`bdraco`. diff --git a/CHANGES/10049.misc.rst b/CHANGES/10049.misc.rst deleted file mode 100644 index 58f61d48420..00000000000 --- a/CHANGES/10049.misc.rst +++ /dev/null @@ -1 +0,0 @@ -Improved performance of making requests when there are no auto headers to skip -- by :user:`bdraco`. diff --git a/CHANGES/10055.misc.rst b/CHANGES/10055.misc.rst deleted file mode 100644 index 3a5fa074f77..00000000000 --- a/CHANGES/10055.misc.rst +++ /dev/null @@ -1,3 +0,0 @@ -Downgraded logging of invalid HTTP method exceptions on the first request to debug level -- by :user:`bdraco`. - -HTTP requests starting with an invalid method are relatively common, especially when connected to the public internet, because browsers or other clients may try to speak SSL to a plain-text server or vice-versa. These exceptions can quickly fill the log with noise when nothing is wrong. diff --git a/aiohttp/__init__.py b/aiohttp/__init__.py index 838c31a5fcd..32273ac23b0 100644 --- a/aiohttp/__init__.py +++ b/aiohttp/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.11.8.dev0" +__version__ = "3.11.8" from typing import TYPE_CHECKING, Tuple