Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/fetch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ The fetch tool will truncate the response, but by using the `start_index` argume

## Installation

Optionally: Install node.js, this will cause the fetch server to use a different HTML simplifier that is more robust.
The default HTML simplifier runs in Python and does not require Node.js.

### Using uv (recommended)

Expand Down
6 changes: 4 additions & 2 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@ def extract_content_from_html(html: str) -> str:
Simplified markdown version of the content
"""
ret = readabilipy.simple_json.simple_json_from_html_string(
html, use_readability=True
html, use_readability=False
)
if not ret["content"]:
if not ret.get("content"):
return "<error>Page failed to be simplified from HTML</error>"
content = markdownify.markdownify(
ret["content"],
heading_style=markdownify.ATX,
)
if not content.strip():
return "<error>Page failed to be simplified from HTML</error>"
return content


Expand Down
12 changes: 12 additions & 0 deletions src/fetch/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ def test_empty_content_returns_error(self):
result = extract_content_from_html(html)
assert "<error>" in result

def test_uses_python_readability_backend_by_default(self):
"""Test HTML simplification does not require the optional Node.js backend."""
html = "<html><body><article><p>Hello World</p></article></body></html>"

with patch("readabilipy.simple_json.simple_json_from_html_string") as mock_extract:
mock_extract.return_value = {"content": "<article><p>Hello World</p></article>"}

result = extract_content_from_html(html)

mock_extract.assert_called_once_with(html, use_readability=False)
assert "Hello World" in result


class TestCheckMayAutonomouslyFetchUrl:
"""Tests for check_may_autonomously_fetch_url function."""
Expand Down
Loading