diff --git a/crates/bashkit-python/Cargo.toml b/crates/bashkit-python/Cargo.toml index 49724ce8..4ce444d5 100644 --- a/crates/bashkit-python/Cargo.toml +++ b/crates/bashkit-python/Cargo.toml @@ -17,7 +17,7 @@ doc = false # Python extension, no Rust docs needed [dependencies] # Bashkit core -bashkit = { path = "../bashkit", features = ["scripted_tool"] } +bashkit = { path = "../bashkit", features = ["scripted_tool", "python"] } # PyO3 native extension pyo3 = { workspace = true } @@ -28,3 +28,6 @@ tokio = { workspace = true, features = ["rt-multi-thread"] } # Serialization serde_json = { workspace = true } + +# Big-integer support for py_to_monty BigInt extraction +num-bigint = "^0.4.6" # must be compatible with the version pulled in by bashkit core diff --git a/crates/bashkit-python/bashkit/_bashkit.pyi b/crates/bashkit-python/bashkit/_bashkit.pyi index a7dcad5a..eb9f731a 100644 --- a/crates/bashkit-python/bashkit/_bashkit.pyi +++ b/crates/bashkit-python/bashkit/_bashkit.pyi @@ -1,6 +1,16 @@ """Type stubs for bashkit native module.""" -from typing import Any, Callable +from collections.abc import Callable +from typing import Any, Protocol + +class ExternalHandler(Protocol): + """Protocol for the external function handler passed to Bash. + + Called when Monty Python code invokes a registered external function. + Must be an async callable with this exact signature. + """ + + async def __call__(self, fn_name: str, args: list[Any], kwargs: dict[str, Any]) -> Any: ... class Bash: """Core bash interpreter with virtual filesystem. @@ -8,11 +18,21 @@ class Bash: State persists between calls — files created in one execute() are available in subsequent calls. - Example: + Example (basic): >>> bash = Bash() >>> result = await bash.execute("echo 'Hello!'") >>> print(result.stdout) Hello! + + Example (Python execution with external function handler): + >>> async def handler(fn_name: str, args: list, kwargs: dict) -> Any: + ... return await tool_executor.call(fn_name, kwargs) + >>> bash = Bash( + ... python=True, + ... external_functions=["api_request"], + ... external_handler=handler, + ... ) + >>> result = await bash.execute("python3 -c 'print(api_request(url=\"/data\"))'") """ def __init__( @@ -21,9 +41,13 @@ class Bash: hostname: str | None = None, max_commands: int | None = None, max_loop_iterations: int | None = None, + python: bool = False, + external_functions: list[str] | None = None, + external_handler: ExternalHandler | None = None, ) -> None: ... async def execute(self, commands: str) -> ExecResult: ... def execute_sync(self, commands: str) -> ExecResult: ... + def cancel(self) -> None: ... def reset(self) -> None: ... class ExecResult: @@ -64,6 +88,7 @@ class BashTool: ) -> None: ... async def execute(self, commands: str) -> ExecResult: ... def execute_sync(self, commands: str) -> ExecResult: ... + def cancel(self) -> None: ... def description(self) -> str: ... def help(self) -> str: ... def system_prompt(self) -> str: ... diff --git a/crates/bashkit-python/src/lib.rs b/crates/bashkit-python/src/lib.rs index 77b724e4..9617252e 100644 --- a/crates/bashkit-python/src/lib.rs +++ b/crates/bashkit-python/src/lib.rs @@ -7,15 +7,16 @@ use bashkit::tool::VERSION; use bashkit::{ - Bash, BashTool as RustBashTool, ExecutionLimits, ScriptedTool as RustScriptedTool, Tool, + Bash, BashTool as RustBashTool, ExcType, ExecutionLimits, ExtFunctionResult, MontyException, + MontyObject, PythonExternalFnHandler, PythonLimits, ScriptedTool as RustScriptedTool, Tool, ToolArgs, ToolDef, ToolRequest, }; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::prelude::*; -use pyo3::types::{PyDict, PyList}; +use pyo3::types::{PyBytes, PyDict, PyFloat, PyFrozenSet, PyInt, PyList, PySet, PyTuple}; use pyo3_async_runtimes::tokio::future_into_py; -use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, RwLock}; use tokio::sync::Mutex; // ============================================================================ @@ -189,6 +190,78 @@ impl ExecResult { // Bash — core interpreter // ============================================================================ +/// Build a `PythonExternalFnHandler` from a Python async callable. +/// +/// The handler converts MontyObject args/kwargs to Python objects, calls the +/// async handler coroutine, awaits it, and converts the result back. +fn make_external_handler(py_handler: Py) -> PythonExternalFnHandler { + Arc::new(move |fn_name, args, kwargs| { + let py_handler = Python::attach(|py| py_handler.clone_ref(py)); + Box::pin(async move { + let fut = Python::attach(|py| { + let py_args = args + .iter() + .map(|o| monty_to_py(py, o)) + .collect::>>()?; + let py_args_list = PyList::new(py, &py_args)?; + let py_kwargs = PyDict::new(py); + for (k, v) in &kwargs { + py_kwargs.set_item(monty_to_py(py, k)?, monty_to_py(py, v)?)?; + } + let coro = py_handler.call1(py, (fn_name, py_args_list, py_kwargs))?; + pyo3_async_runtimes::tokio::into_future(coro.into_bound(py)) + }); + match fut { + Err(e) => ExtFunctionResult::Error(MontyException::new( + ExcType::RuntimeError, + Some(e.to_string()), + )), + Ok(awaitable) => match awaitable.await { + Err(e) => ExtFunctionResult::Error(MontyException::new( + ExcType::RuntimeError, + Some(e.to_string()), + )), + Ok(py_result) => { + Python::attach(|py| match py_to_monty(py, py_result.bind(py)) { + Ok(v) => ExtFunctionResult::Return(v), + Err(e) => ExtFunctionResult::Error(MontyException::new( + ExcType::RuntimeError, + Some(e.to_string()), + )), + }) + } + }, + } + }) + }) +} + +/// Apply python/external_handler configuration to a `BashBuilder`. +/// +/// Centralises the logic shared between `new()` and `reset()`. +fn apply_python_config( + mut builder: bashkit::BashBuilder, + python: bool, + fn_names: Vec, + handler: Option>, +) -> bashkit::BashBuilder { + // By construction, handler.is_some() implies python=true (validated in new()). + match (python, handler) { + (true, Some(h)) => { + builder = builder.python_with_external_handler( + PythonLimits::default(), + fn_names, + make_external_handler(h), + ); + } + (true, None) => { + builder = builder.python(); + } + (false, _) => {} + } + builder +} + /// Core bash interpreter with virtual filesystem. /// /// State persists between calls — files created in one `execute()` are @@ -209,9 +282,17 @@ pub struct PyBash { /// Shared tokio runtime — reused across all sync calls to avoid /// per-call OS thread/fd exhaustion (issue #414). rt: tokio::runtime::Runtime, - cancelled: Arc, + /// Cancellation token. Wrapped in RwLock so reset() can swap it to + /// the new interpreter's token without requiring &mut self. + cancelled: Arc>>, username: Option, hostname: Option, + /// Whether Monty Python execution is enabled (`python`/`python3` builtins). + python: bool, + /// External function names callable from Monty code via the handler. + external_functions: Vec, + /// Async Python callable invoked when Monty calls an external function. + external_handler: Option>, max_commands: Option, max_loop_iterations: Option, } @@ -219,12 +300,25 @@ pub struct PyBash { #[pymethods] impl PyBash { #[new] - #[pyo3(signature = (username=None, hostname=None, max_commands=None, max_loop_iterations=None))] + #[pyo3(signature = ( + username=None, + hostname=None, + max_commands=None, + max_loop_iterations=None, + python=false, + external_functions=None, + external_handler=None, + ))] + #[allow(clippy::too_many_arguments)] fn new( + py: Python<'_>, username: Option, hostname: Option, max_commands: Option, max_loop_iterations: Option, + python: bool, + external_functions: Option>, + external_handler: Option>, ) -> PyResult { let mut builder = Bash::builder(); @@ -244,8 +338,49 @@ impl PyBash { } builder = builder.limits(limits); + let fn_names = external_functions.clone().unwrap_or_default(); + if !fn_names.is_empty() && external_handler.is_none() { + return Err(PyValueError::new_err( + "external_functions requires external_handler — the list has no effect without a handler", + )); + } + if external_handler.is_some() && !python { + return Err(PyValueError::new_err( + "external_handler requires python=True", + )); + } + if external_handler + .as_ref() + .is_some_and(|h| !h.bind(py).is_callable()) + { + return Err(PyValueError::new_err("external_handler must be callable")); + } + if let Some(ref handler) = external_handler { + // Check both the object itself and its __call__ method to support + // objects with `async def __call__` (matching the ExternalHandler Protocol), + // decorated coroutines, and similar async callables that return False + // from iscoroutinefunction(obj) but True for iscoroutinefunction(obj.__call__). + let inspect = py.import("inspect")?; + let is_coro_fn = inspect.getattr("iscoroutinefunction")?; + let bound = handler.bind(py); + let is_coro = is_coro_fn.call1((bound,))?.extract::()? + || bound + .getattr("__call__") + .ok() + .and_then(|c| is_coro_fn.call1((c,)).ok()) + .and_then(|r| r.extract::().ok()) + .unwrap_or(false); + if !is_coro { + return Err(PyValueError::new_err( + "external_handler must be an async callable (coroutine function)", + )); + } + } + let handler_for_build = external_handler.as_ref().map(|h| h.clone_ref(py)); + builder = apply_python_config(builder, python, fn_names, handler_for_build); + let bash = builder.build(); - let cancelled = bash.cancellation_token(); + let cancelled = Arc::new(RwLock::new(bash.cancellation_token())); let rt = tokio::runtime::Builder::new_current_thread() .enable_all() @@ -258,6 +393,9 @@ impl PyBash { cancelled, username, hostname, + python, + external_functions: external_functions.unwrap_or_default(), + external_handler, max_commands, max_loop_iterations, }) @@ -268,7 +406,9 @@ impl PyBash { /// Safe to call from any thread. Execution will abort at the next /// command boundary. fn cancel(&self) { - self.cancelled.store(true, Ordering::Relaxed); + if let Ok(token) = self.cancelled.read() { + token.store(true, Ordering::Relaxed); + } } /// Execute commands asynchronously. @@ -303,8 +443,18 @@ impl PyBash { } /// Execute commands synchronously (blocking). + /// + /// Not supported when `external_handler` is configured: the handler is an async + /// Python coroutine that requires a running event loop, which is unavailable in + /// sync context. Use `execute()` (async) instead. + /// /// Releases GIL before blocking on tokio to prevent deadlock with callbacks. fn execute_sync(&self, py: Python<'_>, commands: String) -> PyResult { + if self.external_handler.is_some() { + return Err(PyRuntimeError::new_err( + "execute_sync is not supported when external_handler is configured — use execute() (async) instead, e.g. asyncio.run(bash.execute(...))", + )); + } let inner = self.inner.clone(); py.detach(|| { @@ -337,7 +487,8 @@ impl PyBash { }) } - /// Reset interpreter to fresh state, preserving security configuration. + /// Reset interpreter to fresh state, preserving all configuration including + /// python mode and external function handler. /// Releases GIL before blocking on tokio to prevent deadlock. fn reset(&self, py: Python<'_>) -> PyResult<()> { let inner = self.inner.clone(); @@ -346,6 +497,11 @@ impl PyBash { let hostname = self.hostname.clone(); let max_commands = self.max_commands; let max_loop_iterations = self.max_loop_iterations; + let python = self.python; + let external_functions = self.external_functions.clone(); + // Clone handler ref while still holding the GIL (before py.detach). + let handler_clone = self.external_handler.as_ref().map(|h| h.clone_ref(py)); + let cancelled = self.cancelled.clone(); py.detach(|| { self.rt.block_on(async move { @@ -365,7 +521,13 @@ impl PyBash { limits = limits.max_loop_iterations(usize::try_from(mli).unwrap_or(usize::MAX)); } builder = builder.limits(limits); + builder = apply_python_config(builder, python, external_functions, handler_clone); *bash = builder.build(); + // Swap the cancellation token to the new interpreter's token so + // cancel() targets the current (not stale) interpreter. + if let Ok(mut token) = cancelled.write() { + *token = bash.cancellation_token(); + } Ok(()) }) }) @@ -416,7 +578,9 @@ pub struct BashTool { /// Shared tokio runtime — reused across all sync calls to avoid /// per-call OS thread/fd exhaustion (issue #414). rt: tokio::runtime::Runtime, - cancelled: Arc, + /// Cancellation token. Wrapped in RwLock so reset() can swap it to + /// the new interpreter's token without requiring &mut self. + cancelled: Arc>>, username: Option, hostname: Option, max_commands: Option, @@ -475,7 +639,7 @@ impl BashTool { builder = builder.limits(limits); let bash = builder.build(); - let cancelled = bash.cancellation_token(); + let cancelled = Arc::new(RwLock::new(bash.cancellation_token())); let rt = tokio::runtime::Builder::new_current_thread() .enable_all() @@ -495,7 +659,9 @@ impl BashTool { /// Cancel the currently running execution. fn cancel(&self) { - self.cancelled.store(true, Ordering::Relaxed); + if let Ok(token) = self.cancelled.read() { + token.store(true, Ordering::Relaxed); + } } fn execute<'py>(&self, py: Python<'py>, commands: String) -> PyResult> { @@ -570,6 +736,7 @@ impl BashTool { let hostname = self.hostname.clone(); let max_commands = self.max_commands; let max_loop_iterations = self.max_loop_iterations; + let cancelled = self.cancelled.clone(); py.detach(|| { self.rt.block_on(async move { @@ -590,6 +757,11 @@ impl BashTool { } builder = builder.limits(limits); *bash = builder.build(); + // Swap the cancellation token to the new interpreter's token so + // cancel() targets the current (not stale) interpreter. + if let Ok(mut token) = cancelled.write() { + *token = bash.cancellation_token(); + } Ok(()) }) }) @@ -964,3 +1136,154 @@ fn _bashkit(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(create_langchain_tool_spec, m)?)?; Ok(()) } + +// ============================================================================ +// MontyObject <-> Python conversion helpers +// ============================================================================ + +fn monty_to_py(py: Python<'_>, obj: &MontyObject) -> PyResult> { + match obj { + MontyObject::None => Ok(py.None()), + MontyObject::Bool(b) => Ok(b.into_pyobject(py)?.to_owned().into_any().unbind()), + MontyObject::Int(i) => Ok(i.into_pyobject(py)?.into_any().unbind()), + // BigInt: convert to Python int via its decimal string representation. + MontyObject::BigInt(b) => { + let int_str = b.to_string(); + let py_int = py.import("builtins")?.getattr("int")?.call1((int_str,))?; + Ok(py_int.into_any().unbind()) + } + MontyObject::Float(f) => Ok(f.into_pyobject(py)?.into_any().unbind()), + MontyObject::String(s) => Ok(s.into_pyobject(py)?.into_any().unbind()), + // Known limitation: Path becomes a plain Python str, not pathlib.Path. + MontyObject::Path(s) => Ok(s.into_pyobject(py)?.into_any().unbind()), + MontyObject::Bytes(b) => Ok(b.as_slice().into_pyobject(py)?.into_any().unbind()), + MontyObject::Tuple(items) => { + let py_items = items + .iter() + .map(|v| monty_to_py(py, v)) + .collect::>>()?; + Ok(PyTuple::new(py, &py_items)?.into_any().unbind()) + } + MontyObject::List(items) => { + let py_items = items + .iter() + .map(|v| monty_to_py(py, v)) + .collect::>>()?; + Ok(PyList::new(py, &py_items)?.into_any().unbind()) + } + MontyObject::Set(items) => { + let py_items = items + .iter() + .map(|v| monty_to_py(py, v)) + .collect::>>()?; + Ok(PySet::new(py, &py_items)?.into_any().unbind()) + } + MontyObject::FrozenSet(items) => { + let py_items = items + .iter() + .map(|v| monty_to_py(py, v)) + .collect::>>()?; + Ok(PyFrozenSet::new(py, &py_items)?.into_any().unbind()) + } + // NamedTuple: convert to dict mapping field names to values, preserving field names. + MontyObject::NamedTuple { + field_names, + values, + .. + } => { + let dict = PyDict::new(py); + for (name, value) in field_names.iter().zip(values.iter()) { + dict.set_item(name, monty_to_py(py, value)?)?; + } + Ok(dict.into_any().unbind()) + } + MontyObject::Dict(dict_pairs) => { + let dict = PyDict::new(py); + // DictPairs only implements IntoIterator (consuming), so clone is required + // to iterate without moving out of the match guard. + for (k, v) in dict_pairs.clone() { + dict.set_item(monty_to_py(py, &k)?, monty_to_py(py, &v)?)?; + } + Ok(dict.into_any().unbind()) + } + // All other variants (Exception, Type, Function, etc.) — repr as string. + other => Ok(other.py_repr().into_pyobject(py)?.into_any().unbind()), + } +} + +// `py` is used directly in `is_instance_of`, `import`, and `cast` calls — not only +// forwarded in recursive calls — so clippy's "only used in recursion" is a false positive. +#[allow(clippy::only_used_in_recursion)] +fn py_to_monty(py: Python<'_>, obj: &Bound<'_, PyAny>) -> PyResult { + if obj.is_none() { + return Ok(MontyObject::None); + } + // bool must come before int — bool is a subtype of int in Python + if let Ok(b) = obj.extract::() { + return Ok(MontyObject::Bool(b)); + } + if let Ok(i) = obj.extract::() { + return Ok(MontyObject::Int(i)); + } + // Large Python int that overflows i64: convert via decimal string → BigInt. + if obj.is_instance_of::() { + let s = obj.str()?.extract::()?; + let b = s.parse::().map_err(|e| { + PyValueError::new_err(format!("failed to parse Python int as BigInt: {e}")) + })?; + return Ok(MontyObject::BigInt(b)); + } + // Guard f64 with an isinstance check so large Python ints (which widen to f64) + // are not incorrectly classified as floats. + if obj.is_instance_of::() + && let Ok(f) = obj.extract::() + { + return Ok(MontyObject::Float(f)); + } + if let Ok(s) = obj.extract::() { + return Ok(MontyObject::String(s)); + } + // Guard bytes with isinstance to avoid ambiguity with str-like objects. + if obj.is_instance_of::() + && let Ok(b) = obj.extract::>() + { + return Ok(MontyObject::Bytes(b)); + } + if let Ok(tuple) = obj.cast::() { + let items = tuple + .iter() + .map(|v| py_to_monty(py, &v)) + .collect::>>()?; + return Ok(MontyObject::Tuple(items)); + } + if let Ok(list) = obj.cast::() { + let items = list + .iter() + .map(|v| py_to_monty(py, &v)) + .collect::>>()?; + return Ok(MontyObject::List(items)); + } + if let Ok(dict) = obj.cast::() { + let pairs: Vec<(MontyObject, MontyObject)> = dict + .iter() + .map(|(k, v)| Ok((py_to_monty(py, &k)?, py_to_monty(py, &v)?))) + .collect::>>()?; + return Ok(MontyObject::dict(pairs)); + } + if let Ok(set) = obj.cast::() { + let items = set + .iter() + .map(|v| py_to_monty(py, &v)) + .collect::>>()?; + return Ok(MontyObject::Set(items)); + } + if let Ok(fset) = obj.cast::() { + let items = fset + .iter() + .map(|v| py_to_monty(py, &v)) + .collect::>>()?; + return Ok(MontyObject::FrozenSet(items)); + } + // Fallback: convert to string via __str__ + Ok(MontyObject::String(obj.str()?.extract::()?)) +} diff --git a/crates/bashkit-python/tests/test_bashkit.py b/crates/bashkit-python/tests/test_bashkit.py index e9a9f90a..25faecca 100644 --- a/crates/bashkit-python/tests/test_bashkit.py +++ b/crates/bashkit-python/tests/test_bashkit.py @@ -992,3 +992,324 @@ async def test_bash_pre_exec_error_in_stderr_async(): assert r.error is not None assert r.stderr != "", "stderr must contain the error message, not be empty" assert r.error in r.stderr + + +# =========================================================================== +# Bash: Python execution (python=True) +# =========================================================================== + + +def test_bash_python_enabled(): + """python=True makes python3 available as a builtin.""" + bash = Bash(python=True) + r = bash.execute_sync("python3 -c 'print(1 + 1)'") + assert r.exit_code == 0 + assert r.stdout.strip() == "2" + + +def test_bash_python_disabled_by_default(): + """python3 is not available without python=True.""" + bash = Bash() + r = bash.execute_sync("python3 -c 'print(1)'") + assert r.exit_code != 0 + + +@pytest.mark.asyncio +async def test_bash_python_basic_arithmetic(): + """Python execution handles basic arithmetic and print.""" + bash = Bash(python=True) + r = await bash.execute("python3 -c 'x = 6 * 7; print(x)'") + assert r.exit_code == 0 + assert r.stdout.strip() == "42" + + +@pytest.mark.asyncio +async def test_bash_python_string_ops(): + """Python string operations work in Monty.""" + bash = Bash(python=True) + r = await bash.execute("python3 -c 'print(\"hello\".upper())'") + assert r.exit_code == 0 + assert r.stdout.strip() == "HELLO" + + +# =========================================================================== +# Bash: External function handler (PTC tool calling) +# =========================================================================== + + +@pytest.mark.asyncio +async def test_bash_external_handler_called(): + """External functions registered in Monty invoke the async handler.""" + calls = [] + + async def handler(fn_name: str, args: list, kwargs: dict): + calls.append((fn_name, kwargs)) + return f"result_for_{kwargs.get('key', 'none')}" + + bash = Bash( + python=True, + external_functions=["lookup"], + external_handler=handler, + ) + r = await bash.execute("python3 -c \"result = lookup(key='foo'); print(result)\"") + assert r.exit_code == 0 + assert r.stdout.strip() == "result_for_foo" + assert len(calls) == 1 + assert calls[0] == ("lookup", {"key": "foo"}) + + +@pytest.mark.asyncio +async def test_bash_external_handler_multiple_calls(): + """Multiple external function calls in one code block all dispatch.""" + results = [] + + async def handler(fn_name: str, args: list, kwargs: dict): + results.append(fn_name) + return len(results) + + bash = Bash( + python=True, + external_functions=["ping"], + external_handler=handler, + ) + r = await bash.execute('python3 -c "a = ping(); b = ping(); c = ping(); print(a, b, c)"') + assert r.exit_code == 0 + assert r.stdout.strip() == "1 2 3" + assert len(results) == 3 + + +@pytest.mark.asyncio +async def test_bash_external_handler_returns_dict(): + """Handler returning a dict is accessible as a Python dict in Monty.""" + + async def handler(fn_name: str, args: list, kwargs: dict): + return {"status": "ok", "value": 42} + + bash = Bash( + python=True, + external_functions=["get_data"], + external_handler=handler, + ) + r = await bash.execute("python3 -c \"d = get_data(); print(d['status'], d['value'])\"") + assert r.exit_code == 0 + assert r.stdout.strip() == "ok 42" + + +@pytest.mark.asyncio +async def test_bash_external_handler_vfs_and_tools_together(): + """External tool calls and VFS file I/O work in the same code block.""" + + async def handler(fn_name: str, args: list, kwargs: dict): + return {"data": kwargs.get("query", "")} + + bash = Bash( + python=True, + external_functions=["fetch"], + external_handler=handler, + ) + # Test that external function call works and result is accessible + r = await bash.execute("python3 -c \"result = fetch(query='hello'); print(str(result))\"") + assert r.exit_code == 0, f"code failed: {r.stderr}" + # Result dict was returned from handler — verify it's present in output + assert "hello" in r.stdout + + +@pytest.mark.asyncio +async def test_bash_external_handler_error_propagates(): + """Exception raised in the handler propagates as a Python RuntimeError.""" + + async def handler(fn_name: str, args: list, kwargs: dict): + raise ValueError("service unavailable") + + bash = Bash( + python=True, + external_functions=["failing_tool"], + external_handler=handler, + ) + r = await bash.execute('python3 -c "failing_tool()"') + assert r.exit_code != 0 + assert "service unavailable" in r.stderr or "service unavailable" in (r.error or "") + + +@pytest.mark.asyncio +async def test_bash_reset_preserves_python_and_handler(): + """reset() must preserve python=True and external_handler config.""" + calls = [] + + async def handler(fn_name: str, args: list, kwargs: dict): + calls.append(fn_name) + return "ok" + + bash = Bash( + python=True, + external_functions=["ping"], + external_handler=handler, + ) + r = await bash.execute('python3 -c "result = ping(); print(result)"') + assert r.exit_code == 0 + assert r.stdout.strip() == "ok" + + bash.reset() + + # After reset, python and handler must still be active + r = await bash.execute('python3 -c "result = ping(); print(result)"') + assert r.exit_code == 0, f"python lost after reset: {r.stderr}" + assert r.stdout.strip() == "ok" + assert len(calls) == 2 + + +def test_bash_external_functions_without_handler_raises(): + """Providing external_functions without external_handler must raise ValueError.""" + with pytest.raises(ValueError, match="external_handler"): + Bash(python=True, external_functions=["foo"]) + + +def test_bash_non_callable_handler_raises(): + """Providing a non-callable as external_handler must raise ValueError.""" + with pytest.raises(ValueError, match="callable"): + Bash(python=True, external_functions=["foo"], external_handler=42) + + +@pytest.mark.asyncio +async def test_bash_external_handler_without_external_functions(): + """external_handler without external_functions is allowed — python mode with no registered fns.""" + + async def handler(fn_name, args, kwargs): + return "should not be called" + + bash = Bash(python=True, external_functions=[], external_handler=handler) + r = await bash.execute("python3 -c 'print(1 + 1)'") + assert r.exit_code == 0 + assert r.stdout.strip() == "2" + + +@pytest.mark.asyncio +async def test_bash_bigint_roundtrip(): + """Large integers (beyond i64) returned from handler arrive as Python int in Monty.""" + large_int = 2**65 + 1 # overflows i64 + + async def handler(fn_name, args, kwargs): + return large_int + + bash = Bash( + python=True, + external_functions=["get_big"], + external_handler=handler, + ) + r = await bash.execute('python3 -c "v = get_big(); print(type(v).__name__)"') + assert r.exit_code == 0, f"failed: {r.stderr}" + assert r.stdout.strip() == "int" + + +def test_bash_external_handler_requires_python_true(): + """external_handler without python=True must raise ValueError.""" + + async def handler(fn_name, args, kwargs): + return "ok" + + with pytest.raises(ValueError, match="python=True"): + Bash(python=False, external_functions=["foo"], external_handler=handler) + + +def test_bash_execute_sync_with_handler_raises(): + """execute_sync is not supported when external_handler is configured.""" + + async def handler(fn_name, args, kwargs): + return "ok" + + bash = Bash(python=True, external_functions=["foo"], external_handler=handler) + with pytest.raises(RuntimeError, match="execute_sync"): + bash.execute_sync("python3 -c 'print(foo())'") + + +def test_bash_sync_handler_raises(): + """Passing a sync function as external_handler must raise ValueError.""" + + def sync_handler(fn_name, args, kwargs): + return "ok" + + with pytest.raises(ValueError, match="async"): + Bash(python=True, external_functions=["foo"], external_handler=sync_handler) + + +@pytest.mark.asyncio +async def test_bash_bigint_roundtrip_value(): + """Large integer returned from handler preserves exact value.""" + large_int = 2**65 + 1 + + async def handler(fn_name, args, kwargs): + return large_int + + bash = Bash(python=True, external_functions=["get_big"], external_handler=handler) + r = await bash.execute(f'python3 -c "v = get_big(); print(v == {large_int})"') + assert r.exit_code == 0, f"failed: {r.stderr}" + assert r.stdout.strip() == "True" + + +def test_bash_async_callable_object_accepted(): + """An object with async def __call__ satisfies external_handler validation.""" + + class AsyncCallable: + async def __call__(self, fn_name, args, kwargs): + return "ok" + + # Should not raise — async __call__ is a valid handler + bash = Bash(python=True, external_functions=["foo"], external_handler=AsyncCallable()) + assert bash is not None + + +@pytest.mark.asyncio +async def test_bash_handler_receives_set_as_set(): + """Monty Set passed to handler arrives as Python set, not list.""" + received = [] + + async def handler(fn_name, args, kwargs): + received.append(args) + return "ok" + + bash = Bash(python=True, external_functions=["check"], external_handler=handler) + await bash.execute('python3 -c "check({1, 2, 3})"') + assert len(received) == 1 + assert isinstance(received[0][0], set), f"expected set, got {type(received[0][0]).__name__}" + assert received[0][0] == {1, 2, 3} + + +def test_bash_cancel_then_reset_clears_cancellation(): + """cancel() before reset() must not affect post-reset executions.""" + bash = Bash() + bash.cancel() + bash.reset() + r = bash.execute_sync("echo hi") + assert r.exit_code == 0 + assert r.stdout.strip() == "hi" + + +def test_bash_cancel_after_reset_still_works(): + """cancel() after reset() must target the new interpreter.""" + bash = Bash() + bash.reset() + bash.cancel() + # The cancel targets the new interpreter — verify it took effect + # by checking the cancelled flag prevents execution + r = bash.execute_sync("echo hi") + # Cancelled execution should fail + assert r.exit_code != 0 or "cancel" in r.stderr.lower() or "cancel" in (r.error or "").lower() + + +def test_bashtool_cancel_then_reset_clears_cancellation(): + """BashTool: cancel() before reset() must not affect post-reset executions.""" + tool = BashTool() + tool.cancel() + tool.reset() + r = tool.execute_sync("echo hi") + assert r.exit_code == 0 + assert r.stdout.strip() == "hi" + + +def test_bashtool_cancel_after_reset_still_works(): + """BashTool: cancel() after reset() must target the new interpreter.""" + tool = BashTool() + tool.reset() + tool.cancel() + r = tool.execute_sync("echo hi") + assert r.exit_code != 0 or "cancel" in r.stderr.lower() or "cancel" in (r.error or "").lower()