diff --git a/src/fetch/README.md b/src/fetch/README.md index 2c3e048927..c08942e8b8 100644 --- a/src/fetch/README.md +++ b/src/fetch/README.md @@ -26,7 +26,7 @@ The fetch tool will truncate the response, but by using the `start_index` argume ## Installation -Optionally: Install node.js, this will cause the fetch server to use a different HTML simplifier that is more robust. +The default HTML simplifier runs in Python and does not require Node.js. ### Using uv (recommended) diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index b42c7b1f6b..1b1c94b5f6 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -34,14 +34,16 @@ def extract_content_from_html(html: str) -> str: Simplified markdown version of the content """ ret = readabilipy.simple_json.simple_json_from_html_string( - html, use_readability=True + html, use_readability=False ) - if not ret["content"]: + if not ret.get("content"): return "Page failed to be simplified from HTML" content = markdownify.markdownify( ret["content"], heading_style=markdownify.ATX, ) + if not content.strip(): + return "Page failed to be simplified from HTML" return content diff --git a/src/fetch/tests/test_server.py b/src/fetch/tests/test_server.py index 96c1cb38c7..72c1519039 100644 --- a/src/fetch/tests/test_server.py +++ b/src/fetch/tests/test_server.py @@ -87,6 +87,18 @@ def test_empty_content_returns_error(self): result = extract_content_from_html(html) assert "" in result + def test_uses_python_readability_backend_by_default(self): + """Test HTML simplification does not require the optional Node.js backend.""" + html = "

Hello World

" + + with patch("readabilipy.simple_json.simple_json_from_html_string") as mock_extract: + mock_extract.return_value = {"content": "

Hello World

"} + + result = extract_content_from_html(html) + + mock_extract.assert_called_once_with(html, use_readability=False) + assert "Hello World" in result + class TestCheckMayAutonomouslyFetchUrl: """Tests for check_may_autonomously_fetch_url function."""