From 7557b03da53a51a21dbd66ee8206755fdcd0d728 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 13:20:52 -0600 Subject: [PATCH 1/3] Fix deprecated calls to `guess_type` for paths with Python 3.13 (#10102) --- CHANGES/10102.bugfix.rst | 1 + aiohttp/payload.py | 7 ++++++- aiohttp/web_fileresponse.py | 9 ++++++--- 3 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 CHANGES/10102.bugfix.rst diff --git a/CHANGES/10102.bugfix.rst b/CHANGES/10102.bugfix.rst new file mode 100644 index 00000000000..86dda8684dd --- /dev/null +++ b/CHANGES/10102.bugfix.rst @@ -0,0 +1 @@ +Replaced deprecated call to :func:`mimetypes.guess_type` with :func:`mimetypes.guess_file_type` when using Python 3.13+ -- by :user:`bdraco`. diff --git a/aiohttp/payload.py b/aiohttp/payload.py index 1bc7f27de8b..55a7a677f49 100644 --- a/aiohttp/payload.py +++ b/aiohttp/payload.py @@ -4,6 +4,7 @@ import json import mimetypes import os +import sys import warnings from abc import ABC, abstractmethod from itertools import chain @@ -169,7 +170,11 @@ def __init__( assert isinstance(content_type, str) self._headers[hdrs.CONTENT_TYPE] = content_type elif self._filename is not None: - content_type = mimetypes.guess_type(self._filename)[0] + if sys.version_info >= (3, 13): + guesser = mimetypes.guess_file_type + else: + guesser = mimetypes.guess_type + content_type = guesser(self._filename)[0] if content_type is None: content_type = self._default_content_type self._headers[hdrs.CONTENT_TYPE] = content_type diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index daadda9a207..dacbb2b5892 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -1,6 +1,7 @@ import asyncio import os import pathlib +import sys from contextlib import suppress from mimetypes import MimeTypes from stat import S_ISREG @@ -314,9 +315,11 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # extension of the request path. The encoding returned by guess_type # can be ignored since the map was cleared above. if hdrs.CONTENT_TYPE not in self.headers: - self.content_type = ( - CONTENT_TYPES.guess_type(self._path)[0] or FALLBACK_CONTENT_TYPE - ) + if sys.version_info >= (3, 13): + guesser = CONTENT_TYPES.guess_file_type + else: + guesser = CONTENT_TYPES.guess_type + self.content_type = guesser(self._path)[0] or FALLBACK_CONTENT_TYPE if file_encoding: self.headers[hdrs.CONTENT_ENCODING] = file_encoding From 678993a4bd071200fbcc14fa121c6673221aa540 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 14:13:05 -0600 Subject: [PATCH 2/3] Fix race in `FileResponse` if file is replaced during `prepare` (#10101) --- CHANGES/10101.bugfix.rst | 1 + aiohttp/web_fileresponse.py | 79 ++++++++++++++++++++++++--------- tests/test_web_urldispatcher.py | 7 +-- 3 files changed, 63 insertions(+), 24 deletions(-) create mode 100644 CHANGES/10101.bugfix.rst diff --git a/CHANGES/10101.bugfix.rst b/CHANGES/10101.bugfix.rst new file mode 100644 index 00000000000..e06195ac028 --- /dev/null +++ b/CHANGES/10101.bugfix.rst @@ -0,0 +1 @@ +Fixed race condition in :class:`aiohttp.web.FileResponse` that could have resulted in an incorrect response if the file was replaced on the file system during ``prepare`` -- by :user:`bdraco`. diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index dacbb2b5892..eafc3b051cd 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -1,4 +1,5 @@ import asyncio +import io import os import pathlib import sys @@ -14,6 +15,7 @@ Callable, Final, Optional, + Set, Tuple, cast, ) @@ -70,6 +72,9 @@ CONTENT_TYPES.add_type(content_type, extension) +_CLOSE_FUTURES: Set[asyncio.Future[None]] = set() + + class FileResponse(StreamResponse): """A response object can be used to send files.""" @@ -158,10 +163,10 @@ async def _precondition_failed( self.content_length = 0 return await super().prepare(request) - def _get_file_path_stat_encoding( + def _open_file_path_stat_encoding( self, accept_encoding: str - ) -> Tuple[pathlib.Path, os.stat_result, Optional[str]]: - """Return the file path, stat result, and encoding. + ) -> Tuple[Optional[io.BufferedReader], os.stat_result, Optional[str]]: + """Return the io object, stat result, and encoding. If an uncompressed file is returned, the encoding is set to :py:data:`None`. @@ -179,10 +184,27 @@ def _get_file_path_stat_encoding( # Do not follow symlinks and ignore any non-regular files. st = compressed_path.lstat() if S_ISREG(st.st_mode): - return compressed_path, st, file_encoding + fobj = compressed_path.open("rb") + with suppress(OSError): + # fstat() may not be available on all platforms + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) + return fobj, st, file_encoding # Fallback to the uncompressed file - return file_path, file_path.stat(), None + st = file_path.stat() + if not S_ISREG(st.st_mode): + return None, st, None + fobj = file_path.open("rb") + with suppress(OSError): + # fstat() may not be available on all platforms + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) + return fobj, st, None async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]: loop = asyncio.get_running_loop() @@ -190,20 +212,44 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1 accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower() try: - file_path, st, file_encoding = await loop.run_in_executor( - None, self._get_file_path_stat_encoding, accept_encoding + fobj, st, file_encoding = await loop.run_in_executor( + None, self._open_file_path_stat_encoding, accept_encoding ) + except PermissionError: + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) except OSError: # Most likely to be FileNotFoundError or OSError for circular # symlinks in python >= 3.13, so respond with 404. self.set_status(HTTPNotFound.status_code) return await super().prepare(request) - # Forbid special files like sockets, pipes, devices, etc. - if not S_ISREG(st.st_mode): - self.set_status(HTTPForbidden.status_code) - return await super().prepare(request) + try: + # Forbid special files like sockets, pipes, devices, etc. + if not fobj or not S_ISREG(st.st_mode): + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) + return await self._prepare_open_file(request, fobj, st, file_encoding) + finally: + if fobj: + # We do not await here because we do not want to wait + # for the executor to finish before returning the response + # so the connection can begin servicing another request + # as soon as possible. + close_future = loop.run_in_executor(None, fobj.close) + # Hold a strong reference to the future to prevent it from being + # garbage collected before it completes. + _CLOSE_FUTURES.add(close_future) + close_future.add_done_callback(_CLOSE_FUTURES.remove) + + async def _prepare_open_file( + self, + request: "BaseRequest", + fobj: io.BufferedReader, + st: os.stat_result, + file_encoding: Optional[str], + ) -> Optional[AbstractStreamWriter]: etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" last_modified = st.st_mtime @@ -346,18 +392,9 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter if count == 0 or must_be_empty_body(request.method, self.status): return await super().prepare(request) - try: - fobj = await loop.run_in_executor(None, file_path.open, "rb") - except PermissionError: - self.set_status(HTTPForbidden.status_code) - return await super().prepare(request) - if start: # be aware that start could be None or int=0 here. offset = start else: offset = 0 - try: - return await self._sendfile(request, fobj, offset, count) - finally: - await asyncio.shield(loop.run_in_executor(None, fobj.close)) + return await self._sendfile(request, fobj, offset, count) diff --git a/tests/test_web_urldispatcher.py b/tests/test_web_urldispatcher.py index 5cd4aebdc55..d21ecaa101a 100644 --- a/tests/test_web_urldispatcher.py +++ b/tests/test_web_urldispatcher.py @@ -579,16 +579,17 @@ async def test_access_mock_special_resource( my_special.touch() real_result = my_special.stat() - real_stat = pathlib.Path.stat + real_stat = os.stat - def mock_stat(self: pathlib.Path, **kwargs: Any) -> os.stat_result: - s = real_stat(self, **kwargs) + def mock_stat(path: Any, **kwargs: Any) -> os.stat_result: + s = real_stat(path, **kwargs) if os.path.samestat(s, real_result): mock_mode = S_IFIFO | S_IMODE(s.st_mode) s = os.stat_result([mock_mode] + list(s)[1:]) return s monkeypatch.setattr("pathlib.Path.stat", mock_stat) + monkeypatch.setattr("os.stat", mock_stat) app = web.Application() app.router.add_static("/", str(tmp_path)) From 84bb77d14484c04561fabb7ad628aee903531955 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 4 Dec 2024 14:51:33 -0600 Subject: [PATCH 3/3] Use internal `self._headers` var in `FileResponse` (#10107) --- aiohttp/web_fileresponse.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index eafc3b051cd..1177eaf7af8 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -312,7 +312,7 @@ async def _prepare_open_file( # # Will do the same below. Many servers ignore this and do not # send a Content-Range header with HTTP 416 - self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) @@ -348,7 +348,7 @@ async def _prepare_open_file( # suffix-byte-range-spec with a non-zero suffix-length, # then the byte-range-set is satisfiable. Otherwise, the # byte-range-set is unsatisfiable. - self.headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" + self._headers[hdrs.CONTENT_RANGE] = f"bytes */{file_size}" self.set_status(HTTPRequestRangeNotSatisfiable.status_code) return await super().prepare(request) @@ -360,7 +360,7 @@ async def _prepare_open_file( # If the Content-Type header is not already set, guess it based on the # extension of the request path. The encoding returned by guess_type # can be ignored since the map was cleared above. - if hdrs.CONTENT_TYPE not in self.headers: + if hdrs.CONTENT_TYPE not in self._headers: if sys.version_info >= (3, 13): guesser = CONTENT_TYPES.guess_file_type else: @@ -368,8 +368,8 @@ async def _prepare_open_file( self.content_type = guesser(self._path)[0] or FALLBACK_CONTENT_TYPE if file_encoding: - self.headers[hdrs.CONTENT_ENCODING] = file_encoding - self.headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING + self._headers[hdrs.CONTENT_ENCODING] = file_encoding + self._headers[hdrs.VARY] = hdrs.ACCEPT_ENCODING # Disable compression if we are already sending # a compressed file since we don't want to double # compress. @@ -379,12 +379,12 @@ async def _prepare_open_file( self.last_modified = st.st_mtime # type: ignore[assignment] self.content_length = count - self.headers[hdrs.ACCEPT_RANGES] = "bytes" + self._headers[hdrs.ACCEPT_RANGES] = "bytes" real_start = cast(int, start) if status == HTTPPartialContent.status_code: - self.headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( + self._headers[hdrs.CONTENT_RANGE] = "bytes {}-{}/{}".format( real_start, real_start + count - 1, file_size )