Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
34a470f
feat(api): add --enable_anthropic_api CLI flag
sufubao Apr 14, 2026
6125090
feat(api): add LiteLLM shim module for Anthropic adapter
sufubao Apr 14, 2026
2f5774b
fix(api): bump _MAX_TESTED_LITELLM_VERSION to 1.84.0
sufubao Apr 14, 2026
40c229a
refactor(api): remove dead stream wrapper cache, sharpen import error
sufubao Apr 14, 2026
9158066
test(api): add LiteLLM adapter round-trip characterisation test
sufubao Apr 14, 2026
3a3b0b0
feat(api): add non-streaming Anthropic Messages handler
sufubao Apr 14, 2026
455b487
feat(api): register POST /v1/messages route
sufubao Apr 15, 2026
1bb536d
feat(api): stream Anthropic Messages events by wrapping OpenAI SSE
sufubao Apr 15, 2026
fa2f2a2
fix(api): address Task 6 review feedback on streaming bridge
sufubao Apr 15, 2026
af65e9b
test(api): cover Anthropic tool-use request and response translation
sufubao Apr 15, 2026
6ecf17a
test(api): simplify tautological tool-name assertion
sufubao Apr 15, 2026
0b570c1
test(api): add manual integration test script for Anthropic SDK
sufubao Apr 15, 2026
d36e5c3
test(api): exclude manual Anthropic SDK script from pytest collection
sufubao Apr 15, 2026
9f8c9fc
build: declare litellm as anthropic_api optional extra
sufubao Apr 15, 2026
d80a5e6
docs: document the /v1/messages Anthropic compatibility endpoint
sufubao Apr 15, 2026
9600d51
test(api): cover Anthropic image content block pass-through
sufubao Apr 15, 2026
3f57999
fix(api): accept str SSE chunks in Anthropic streaming bridge
sufubao Apr 15, 2026
8b4cddd
fix(api): stream tool_use content blocks in Anthropic SSE bridge
sufubao Apr 15, 2026
04db237
fix(api): Anthropic response cosmetic cleanups and error envelope
sufubao Apr 15, 2026
b03d06c
fix(api): catch translation errors and return Anthropic error envelope
sufubao Apr 15, 2026
ca59417
test: remove all Anthropic API tests
sufubao Apr 15, 2026
1f1d300
style: apply black formatting to api_anthropic and _litellm_shim
sufubao Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/EN/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Documentation List
Multimodal Deployment <tutorial/multimodal>
Reward Model Deployment <tutorial/reward_model>
OpenAI api Usage <tutorial/openai>
Anthropic Messages API <tutorial/anthropic>
Function Calling <tutorial/function_calling>
Reasoning Parser <tutorial/reasoning_parser>
APIServer Parameters <tutorial/api_server_args_zh>
Expand Down
80 changes: 80 additions & 0 deletions docs/EN/source/tutorial/anthropic.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
.. _anthropic_api:

Anthropic Messages API (Experimental)
=====================================

LightLLM can expose a ``/v1/messages`` endpoint that speaks the Anthropic
Messages API wire protocol. This is useful if you have client code written
against the Anthropic Python/TypeScript SDK and want to point it at a locally
hosted open-source model without rewriting the client.

Enabling
--------

Install the optional dependency:

.. code-block:: bash

pip install 'lightllm[anthropic_api]'

Start the server with the flag:

.. code-block:: bash

python -m lightllm.server.api_server \
--model_dir /path/to/model \
--enable_anthropic_api \
--port 8088

Using it from the Anthropic SDK
-------------------------------

.. code-block:: python

import anthropic

client = anthropic.Anthropic(
base_url="http://localhost:8088",
api_key="dummy",
)
resp = client.messages.create(
model="any-name", # echoed back; LightLLM serves the loaded model
max_tokens=1024,
messages=[{"role": "user", "content": "hello"}],
)
print(resp.content[0].text)

Streaming works the same way the Anthropic SDK expects:

.. code-block:: python

with client.messages.stream(
model="any-name",
max_tokens=256,
messages=[{"role": "user", "content": "Count from 1 to 5."}],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)

Supported features
------------------

- Text generation (streaming and non-streaming)
- System prompts
- Tool use / function calling
- Multi-turn conversations
- Vision (image inputs) via Anthropic content blocks

Known limitations
-----------------

- Prompt caching (``cache_control``) is accepted but ignored; ``cache_*``
fields in ``usage`` are always zero.
- Extended thinking (``thinking`` parameter) is not supported.
- The Batch API (``/v1/messages/batches``) and Files API are not implemented.
- Model name is accepted but ignored; LightLLM always serves the model
loaded via ``--model_dir`` and echoes the requested name back in the response.
- On the streaming path, ``message_start.message.usage.input_tokens`` is
always ``0`` because the upstream usage chunk arrives after all content
chunks. Clients that need an accurate prompt-token count should read
``message_delta.usage`` at the end of the stream.
104 changes: 104 additions & 0 deletions lightllm/server/_litellm_shim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""LiteLLM integration shim for the Anthropic Messages API endpoint.

LiteLLM's Anthropic<->OpenAI translation code lives under an
``experimental_pass_through`` import path. Centralising all LiteLLM imports
here means a LiteLLM upgrade that relocates those symbols requires editing
exactly one file. Callers should use the getters below; they must not
import LiteLLM symbols directly from elsewhere in the server package.
"""
from __future__ import annotations

from typing import Any

from lightllm.utils.log_utils import init_logger

logger = init_logger(__name__)

# Known-good LiteLLM versions. Bump explicitly after retesting.
_MIN_LITELLM_VERSION = "1.52.0"
_MAX_TESTED_LITELLM_VERSION = "1.84.0"

_cached_adapter: Any = None
_import_checked: bool = False


def _raise_missing() -> None:
raise RuntimeError(
"--enable_anthropic_api requires the 'litellm' package. Install it with:\n"
f" pip install 'litellm>={_MIN_LITELLM_VERSION}'"
)


def _get_litellm_version() -> str:
"""Return the installed litellm version string, or 'unknown' if not found.

litellm >= 1.x does not expose ``__version__`` as a module attribute;
use importlib.metadata as the primary source.
"""
try:
import importlib.metadata

return importlib.metadata.version("litellm")
except Exception:
pass
# Fallback: some older builds do expose it.
try:
import litellm

return getattr(litellm, "__version__", "unknown")
except Exception:
return "unknown"


def _check_import_once() -> None:
global _import_checked
if _import_checked:
return
try:
import litellm # noqa: F401
except ImportError:
_raise_missing()
else:
version = _get_litellm_version()
logger.info(
"LiteLLM detected (version=%s) for Anthropic API compatibility layer. " "Tested range: %s..%s",
version,
_MIN_LITELLM_VERSION,
_MAX_TESTED_LITELLM_VERSION,
)
_import_checked = True


def get_anthropic_messages_adapter() -> Any:
"""Return a cached instance of LiteLLM's Anthropic<->OpenAI adapter.

The returned object exposes ``translate_anthropic_to_openai`` and
``translate_openai_response_to_anthropic`` methods.
"""
global _cached_adapter
if _cached_adapter is not None:
return _cached_adapter

_check_import_once()
try:
from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import (
LiteLLMAnthropicMessagesAdapter,
)
except ImportError as exc:
raise RuntimeError(
"Failed to import LiteLLMAnthropicMessagesAdapter from LiteLLM. "
"The experimental_pass_through module may have been relocated in a newer release. "
f"Tested with LiteLLM {_MIN_LITELLM_VERSION}..{_MAX_TESTED_LITELLM_VERSION}. "
f"To pin to a known-good version: pip install 'litellm<={_MAX_TESTED_LITELLM_VERSION}'. "
f"Original error: {exc}"
) from exc

_cached_adapter = LiteLLMAnthropicMessagesAdapter()
return _cached_adapter


def ensure_available() -> None:
"""Eagerly verify LiteLLM is importable. Called once at server startup
so that misconfiguration fails loudly instead of on the first request."""
_check_import_once()
get_anthropic_messages_adapter()
Loading
Loading