diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 16913289a02f59..2c83101b6b26fe 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -249,16 +249,7 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) { PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate); -static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { - uintptr_t here_addr = _Py_get_machine_stack_pointer(); - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - assert(_tstate->c_stack_hard_limit != 0); -#if _Py_STACK_GROWS_DOWN - return here_addr <= _tstate->c_stack_soft_limit; -#else - return here_addr >= _tstate->c_stack_soft_limit; -#endif -} +PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate); // Export for test_peg_generator PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin( diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index a66543cf1eb164..189a8dde9f09ed 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -312,7 +312,18 @@ static uintptr_t return_pointer_as_int(char* p) { } #endif -PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void); +static inline uintptr_t +_Py_get_machine_stack_pointer(void) { +#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) + return (uintptr_t)__builtin_frame_address(0); +#elif defined(_MSC_VER) + return (uintptr_t)_AddressOfReturnAddress(); +#else + char here; + /* Avoid compiler warning about returning stack address */ + return return_pointer_as_int(&here); +#endif +} static inline intptr_t _Py_RecursionLimit_GetMargin(PyThreadState *tstate) diff --git a/Python/ceval.c b/Python/ceval.c index b4c57b65d13d18..f95900ae01a6af 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1201,6 +1201,19 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from return PyStackRef_FromPyObjectSteal(iter_o); } +Py_NO_INLINE int +_Py_ReachedRecursionLimit(PyThreadState *tstate) { + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + assert(_tstate->c_stack_hard_limit != 0); +#if _Py_STACK_GROWS_DOWN + return here_addr <= _tstate->c_stack_soft_limit; +#else + return here_addr >= _tstate->c_stack_soft_limit; +#endif +} + + #if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* * gh-129987: The SLP autovectorizer can cause poor code generation for diff --git a/Python/pystate.c b/Python/pystate.c index f974c82c391f6a..143175da0f45c7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3286,16 +3286,3 @@ _Py_GetMainConfig(void) } return _PyInterpreterState_GetConfig(interp); } - -uintptr_t -_Py_get_machine_stack_pointer(void) { -#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) - return (uintptr_t)__builtin_frame_address(0); -#elif defined(_MSC_VER) - return (uintptr_t)_AddressOfReturnAddress(); -#else - char here; - /* Avoid compiler warning about returning stack address */ - return return_pointer_as_int(&here); -#endif -} diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 83c878d8fe205b..ef28e0c0ddeac8 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -162,6 +162,7 @@ class Optimizer: label_prefix: str symbol_prefix: str re_global: re.Pattern[str] + frame_pointers: bool # The first block in the linked list: _root: _Block = dataclasses.field(init=False, default_factory=_Block) _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) @@ -193,6 +194,7 @@ class Optimizer: _re_small_const_1 = _RE_NEVER_MATCH _re_small_const_2 = _RE_NEVER_MATCH const_reloc = "" + _frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH def __post_init__(self) -> None: # Split the code into a linked list of basic blocks. A basic block is an @@ -553,6 +555,16 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]: def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool: raise NotImplementedError() + def _validate(self) -> None: + for block in self._blocks(): + if not block.instructions: + continue + for inst in block.instructions: + if self.frame_pointers: + assert ( + self._frame_pointer_modify.match(inst.text) is None + ), "Frame pointer should not be modified" + def run(self) -> None: """Run this optimizer.""" self._insert_continue_label() @@ -565,6 +577,7 @@ def run(self) -> None: self._remove_unreachable() self._fixup_external_labels() self._fixup_constants() + self._validate() self.path.write_text(self._body()) @@ -595,6 +608,7 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods r"\s*(?Pldr)\s+.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" ) const_reloc = "CUSTOM_AARCH64_CONST" + _frame_pointer_modify = re.compile(r"\s*stp\s+x29.*") def _get_reg(self, inst: Instruction) -> str: _, rest = inst.text.split(inst.name) @@ -649,4 +663,5 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods # https://www.felixcloutier.com/x86/jmp _re_jump = re.compile(r"\s*jmp\s+(?P[\w.]+)") # https://www.felixcloutier.com/x86/ret - _re_return = re.compile(r"\s*ret\b") + _re_return = re.compile(r"\s*retq?\b") + _frame_pointer_modify = re.compile(r"\s*movq?\s+%(\w+),\s+%rbp.*") diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index ad2d5b3c780d54..787fcf53260f3d 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -176,8 +176,9 @@ async def _compile( f"{s}", f"{c}", ] + is_shim = opname == "shim" if self.frame_pointers: - frame_pointer = "all" if opname == "shim" else "reserved" + frame_pointer = "all" if is_shim else "reserved" args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"] args_s += self.args # Allow user-provided CFLAGS to override any defaults @@ -185,12 +186,14 @@ async def _compile( await _llvm.run( "clang", args_s, echo=self.verbose, llvm_version=self.llvm_version ) - self.optimizer( - s, - label_prefix=self.label_prefix, - symbol_prefix=self.symbol_prefix, - re_global=self.re_global, - ).run() + if not is_shim: + self.optimizer( + s, + label_prefix=self.label_prefix, + symbol_prefix=self.symbol_prefix, + re_global=self.re_global, + frame_pointers=self.frame_pointers, + ).run() args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] await _llvm.run( "clang", args_o, echo=self.verbose, llvm_version=self.llvm_version