ModelTC · sufubao · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/docs/EN/source/index.rst b/docs/EN/source/index.rst
@@ -53,6 +53,7 @@ Documentation List
    Multimodal Deployment <tutorial/multimodal>
    Reward Model Deployment <tutorial/reward_model>
    OpenAI api Usage <tutorial/openai>
+   Anthropic Messages API <tutorial/anthropic>
    Function Calling <tutorial/function_calling>
    Reasoning Parser <tutorial/reasoning_parser>
    APIServer Parameters <tutorial/api_server_args_zh>

diff --git a/docs/EN/source/tutorial/anthropic.rst b/docs/EN/source/tutorial/anthropic.rst
@@ -0,0 +1,80 @@
+.. _anthropic_api:
+
+Anthropic Messages API (Experimental)
+=====================================
+
+LightLLM can expose a ``/v1/messages`` endpoint that speaks the Anthropic
+Messages API wire protocol. This is useful if you have client code written
+against the Anthropic Python/TypeScript SDK and want to point it at a locally
+hosted open-source model without rewriting the client.
+
+Enabling
+--------
+
+Install the optional dependency:
+
+.. code-block:: bash
+
+    pip install 'lightllm[anthropic_api]'
+
+Start the server with the flag:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/model \
+        --enable_anthropic_api \
+        --port 8088
+
+Using it from the Anthropic SDK
+-------------------------------
+
+.. code-block:: python
+
+    import anthropic
+
+    client = anthropic.Anthropic(
+        base_url="http://localhost:8088",
+        api_key="dummy",
+    )
+    resp = client.messages.create(
+        model="any-name",  # echoed back; LightLLM serves the loaded model
+        max_tokens=1024,
+        messages=[{"role": "user", "content": "hello"}],
+    )
+    print(resp.content[0].text)
+
+Streaming works the same way the Anthropic SDK expects:
+
+.. code-block:: python
+
+    with client.messages.stream(
+        model="any-name",
+        max_tokens=256,
+        messages=[{"role": "user", "content": "Count from 1 to 5."}],
+    ) as stream:
+        for text in stream.text_stream:
+            print(text, end="", flush=True)
+
+Supported features
+------------------
+
+- Text generation (streaming and non-streaming)
+- System prompts
+- Tool use / function calling
+- Multi-turn conversations
+- Vision (image inputs) via Anthropic content blocks
+
+Known limitations
+-----------------
+
+- Prompt caching (``cache_control``) is accepted but ignored; ``cache_*``
+  fields in ``usage`` are always zero.
+- Extended thinking (``thinking`` parameter) is not supported.
+- The Batch API (``/v1/messages/batches``) and Files API are not implemented.
+- Model name is accepted but ignored; LightLLM always serves the model
+  loaded via ``--model_dir`` and echoes the requested name back in the response.
+- On the streaming path, ``message_start.message.usage.input_tokens`` is
+  always ``0`` because the upstream usage chunk arrives after all content
+  chunks. Clients that need an accurate prompt-token count should read
+  ``message_delta.usage`` at the end of the stream.
diff --git a/lightllm/server/_litellm_shim.py b/lightllm/server/_litellm_shim.py
@@ -0,0 +1,104 @@
+"""LiteLLM integration shim for the Anthropic Messages API endpoint.
+
+LiteLLM's Anthropic<->OpenAI translation code lives under an
+``experimental_pass_through`` import path. Centralising all LiteLLM imports
+here means a LiteLLM upgrade that relocates those symbols requires editing
+exactly one file. Callers should use the getters below; they must not
+import LiteLLM symbols directly from elsewhere in the server package.
+"""
+from __future__ import annotations
+
+from typing import Any
+
+from lightllm.utils.log_utils import init_logger
+
+logger = init_logger(__name__)
+
+# Known-good LiteLLM versions. Bump explicitly after retesting.
+_MIN_LITELLM_VERSION = "1.52.0"
+_MAX_TESTED_LITELLM_VERSION = "1.84.0"
+
+_cached_adapter: Any = None
+_import_checked: bool = False
+
+
+def _raise_missing() -> None:
+    raise RuntimeError(
+        "--enable_anthropic_api requires the 'litellm' package. Install it with:\n"
+        f"    pip install 'litellm>={_MIN_LITELLM_VERSION}'"
+    )
+
+
+def _get_litellm_version() -> str:
+    """Return the installed litellm version string, or 'unknown' if not found.
+
+    litellm >= 1.x does not expose ``__version__`` as a module attribute;
+    use importlib.metadata as the primary source.
+    """
+    try:
+        import importlib.metadata
+
+        return importlib.metadata.version("litellm")
+    except Exception:
+        pass
+    # Fallback: some older builds do expose it.
+    try:
+        import litellm
+
+        return getattr(litellm, "__version__", "unknown")
+    except Exception:
+        return "unknown"
+
+
+def _check_import_once() -> None:
+    global _import_checked
+    if _import_checked:
+        return
+    try:
+        import litellm  # noqa: F401
+    except ImportError:
+        _raise_missing()
+    else:
+        version = _get_litellm_version()
+        logger.info(
+            "LiteLLM detected (version=%s) for Anthropic API compatibility layer. " "Tested range: %s..%s",
+            version,
+            _MIN_LITELLM_VERSION,
+            _MAX_TESTED_LITELLM_VERSION,
+        )
+    _import_checked = True
+
+
+def get_anthropic_messages_adapter() -> Any:
+    """Return a cached instance of LiteLLM's Anthropic<->OpenAI adapter.
+
+    The returned object exposes ``translate_anthropic_to_openai`` and
+    ``translate_openai_response_to_anthropic`` methods.
+    """
+    global _cached_adapter
+    if _cached_adapter is not None:
+        return _cached_adapter
+
+    _check_import_once()
+    try:
+        from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import (
+            LiteLLMAnthropicMessagesAdapter,
+        )
+    except ImportError as exc:
+        raise RuntimeError(
+            "Failed to import LiteLLMAnthropicMessagesAdapter from LiteLLM. "
+            "The experimental_pass_through module may have been relocated in a newer release. "
+            f"Tested with LiteLLM {_MIN_LITELLM_VERSION}..{_MAX_TESTED_LITELLM_VERSION}. "
+            f"To pin to a known-good version: pip install 'litellm<={_MAX_TESTED_LITELLM_VERSION}'. "
+            f"Original error: {exc}"
+        ) from exc
+
+    _cached_adapter = LiteLLMAnthropicMessagesAdapter()
+    return _cached_adapter
+
+
+def ensure_available() -> None:
+    """Eagerly verify LiteLLM is importable. Called once at server startup
+    so that misconfiguration fails loudly instead of on the first request."""
+    _check_import_once()
+    get_anthropic_messages_adapter()