diff --git a/docs/EN/source/index.rst b/docs/EN/source/index.rst
index 808f432892..d4a36385cf 100755
--- a/docs/EN/source/index.rst
+++ b/docs/EN/source/index.rst
@@ -53,6 +53,7 @@ Documentation List
    Multimodal Deployment <tutorial/multimodal>
    Reward Model Deployment <tutorial/reward_model>
    OpenAI api Usage <tutorial/openai>
+   Anthropic Messages API <tutorial/anthropic>
    Function Calling <tutorial/function_calling>
    Reasoning Parser <tutorial/reasoning_parser>
    APIServer Parameters <tutorial/api_server_args_zh>
diff --git a/docs/EN/source/tutorial/anthropic.rst b/docs/EN/source/tutorial/anthropic.rst
new file mode 100644
index 0000000000..4bd9fabcba
--- /dev/null
+++ b/docs/EN/source/tutorial/anthropic.rst
@@ -0,0 +1,80 @@
+.. _anthropic_api:
+
+Anthropic Messages API (Experimental)
+=====================================
+
+LightLLM can expose a ``/v1/messages`` endpoint that speaks the Anthropic
+Messages API wire protocol. This is useful if you have client code written
+against the Anthropic Python/TypeScript SDK and want to point it at a locally
+hosted open-source model without rewriting the client.
+
+Enabling
+--------
+
+Install the optional dependency:
+
+.. code-block:: bash
+
+    pip install 'lightllm[anthropic_api]'
+
+Start the server with the flag:
+
+.. code-block:: bash
+
+    python -m lightllm.server.api_server \
+        --model_dir /path/to/model \
+        --enable_anthropic_api \
+        --port 8088
+
+Using it from the Anthropic SDK
+-------------------------------
+
+.. code-block:: python
+
+    import anthropic
+
+    client = anthropic.Anthropic(
+        base_url="http://localhost:8088",
+        api_key="dummy",
+    )
+    resp = client.messages.create(
+        model="any-name",  # echoed back; LightLLM serves the loaded model
+        max_tokens=1024,
+        messages=[{"role": "user", "content": "hello"}],
+    )
+    print(resp.content[0].text)
+
+Streaming works the same way the Anthropic SDK expects:
+
+.. code-block:: python
+
+    with client.messages.stream(
+        model="any-name",
+        max_tokens=256,
+        messages=[{"role": "user", "content": "Count from 1 to 5."}],
+    ) as stream:
+        for text in stream.text_stream:
+            print(text, end="", flush=True)
+
+Supported features
+------------------
+
+- Text generation (streaming and non-streaming)
+- System prompts
+- Tool use / function calling
+- Multi-turn conversations
+- Vision (image inputs) via Anthropic content blocks
+
+Known limitations
+-----------------
+
+- Prompt caching (``cache_control``) is accepted but ignored; ``cache_*``
+  fields in ``usage`` are always zero.
+- Extended thinking (``thinking`` parameter) is not supported.
+- The Batch API (``/v1/messages/batches``) and Files API are not implemented.
+- Model name is accepted but ignored; LightLLM always serves the model
+  loaded via ``--model_dir`` and echoes the requested name back in the response.
+- On the streaming path, ``message_start.message.usage.input_tokens`` is
+  always ``0`` because the upstream usage chunk arrives after all content
+  chunks. Clients that need an accurate prompt-token count should read
+  ``message_delta.usage`` at the end of the stream.
diff --git a/lightllm/server/_litellm_shim.py b/lightllm/server/_litellm_shim.py
new file mode 100644
index 0000000000..a055d9be71
--- /dev/null
+++ b/lightllm/server/_litellm_shim.py
@@ -0,0 +1,104 @@
+"""LiteLLM integration shim for the Anthropic Messages API endpoint.
+
+LiteLLM's Anthropic<->OpenAI translation code lives under an
+``experimental_pass_through`` import path. Centralising all LiteLLM imports
+here means a LiteLLM upgrade that relocates those symbols requires editing
+exactly one file. Callers should use the getters below; they must not
+import LiteLLM symbols directly from elsewhere in the server package.
+"""
+from __future__ import annotations
+
+from typing import Any
+
+from lightllm.utils.log_utils import init_logger
+
+logger = init_logger(__name__)
+
+# Known-good LiteLLM versions. Bump explicitly after retesting.
+_MIN_LITELLM_VERSION = "1.52.0"
+_MAX_TESTED_LITELLM_VERSION = "1.84.0"
+
+_cached_adapter: Any = None
+_import_checked: bool = False
+
+
+def _raise_missing() -> None:
+    raise RuntimeError(
+        "--enable_anthropic_api requires the 'litellm' package. Install it with:\n"
+        f"    pip install 'litellm>={_MIN_LITELLM_VERSION}'"
+    )
+
+
+def _get_litellm_version() -> str:
+    """Return the installed litellm version string, or 'unknown' if not found.
+
+    litellm >= 1.x does not expose ``__version__`` as a module attribute;
+    use importlib.metadata as the primary source.
+    """
+    try:
+        import importlib.metadata
+
+        return importlib.metadata.version("litellm")
+    except Exception:
+        pass
+    # Fallback: some older builds do expose it.
+    try:
+        import litellm
+
+        return getattr(litellm, "__version__", "unknown")
+    except Exception:
+        return "unknown"
+
+
+def _check_import_once() -> None:
+    global _import_checked
+    if _import_checked:
+        return
+    try:
+        import litellm  # noqa: F401
+    except ImportError:
+        _raise_missing()
+    else:
+        version = _get_litellm_version()
+        logger.info(
+            "LiteLLM detected (version=%s) for Anthropic API compatibility layer. " "Tested range: %s..%s",
+            version,
+            _MIN_LITELLM_VERSION,
+            _MAX_TESTED_LITELLM_VERSION,
+        )
+    _import_checked = True
+
+
+def get_anthropic_messages_adapter() -> Any:
+    """Return a cached instance of LiteLLM's Anthropic<->OpenAI adapter.
+
+    The returned object exposes ``translate_anthropic_to_openai`` and
+    ``translate_openai_response_to_anthropic`` methods.
+    """
+    global _cached_adapter
+    if _cached_adapter is not None:
+        return _cached_adapter
+
+    _check_import_once()
+    try:
+        from litellm.llms.anthropic.experimental_pass_through.adapters.transformation import (
+            LiteLLMAnthropicMessagesAdapter,
+        )
+    except ImportError as exc:
+        raise RuntimeError(
+            "Failed to import LiteLLMAnthropicMessagesAdapter from LiteLLM. "
+            "The experimental_pass_through module may have been relocated in a newer release. "
+            f"Tested with LiteLLM {_MIN_LITELLM_VERSION}..{_MAX_TESTED_LITELLM_VERSION}. "
+            f"To pin to a known-good version: pip install 'litellm<={_MAX_TESTED_LITELLM_VERSION}'. "
+            f"Original error: {exc}"
+        ) from exc
+
+    _cached_adapter = LiteLLMAnthropicMessagesAdapter()
+    return _cached_adapter
+
+
+def ensure_available() -> None:
+    """Eagerly verify LiteLLM is importable. Called once at server startup
+    so that misconfiguration fails loudly instead of on the first request."""
+    _check_import_once()
+    get_anthropic_messages_adapter()
diff --git a/lightllm/server/api_anthropic.py b/lightllm/server/api_anthropic.py
new file mode 100644
index 0000000000..67cd9f2777
--- /dev/null
+++ b/lightllm/server/api_anthropic.py
@@ -0,0 +1,538 @@
+"""Anthropic Messages API compatibility layer.
+
+Translates incoming /v1/messages requests into LightLLM's internal chat
+completions pipeline by delegating the hard parts (content-block parsing,
+tool schema normalisation, stop-reason mapping) to LiteLLM's adapter.
+
+The streaming path intercepts the OpenAI-format SSE stream from
+chat_completions_impl and re-emits it as the Anthropic event sequence
+(message_start, content_block_*, message_delta, message_stop).
+"""
+from __future__ import annotations
+
+import uuid
+import ujson as json
+from http import HTTPStatus
+from typing import Any, Dict, Tuple
+
+from fastapi import Request
+from fastapi.responses import JSONResponse, Response
+
+from lightllm.utils.log_utils import init_logger
+
+from ._litellm_shim import get_anthropic_messages_adapter
+
+logger = init_logger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Request translation
+# ---------------------------------------------------------------------------
+
+
+def _anthropic_to_chat_request(anthropic_body: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, str]]:
+    """Translate an Anthropic Messages request body into a dict suitable
+    for constructing a LightLLM ``ChatCompletionRequest``.
+
+    Returns ``(chat_request_dict, tool_name_mapping)``. The mapping must
+    be passed back to ``_chat_response_to_anthropic`` so that tool names
+    truncated by LiteLLM's 64-character limit can be restored.
+    """
+    adapter = get_anthropic_messages_adapter()
+
+    openai_request, tool_name_mapping = adapter.translate_anthropic_to_openai(anthropic_body)
+
+    if hasattr(openai_request, "model_dump"):
+        openai_dict = openai_request.model_dump(exclude_none=True)
+    else:
+        openai_dict = dict(openai_request)
+
+    if "max_tokens" not in openai_dict and "max_completion_tokens" not in openai_dict:
+        if "max_tokens" in anthropic_body:
+            openai_dict["max_tokens"] = anthropic_body["max_tokens"]
+
+    _UNKNOWN_FIELDS = {"extra_body", "metadata", "anthropic_version", "cache_control"}
+    for key in list(openai_dict.keys()):
+        if key in _UNKNOWN_FIELDS:
+            openai_dict.pop(key, None)
+
+    return openai_dict, tool_name_mapping
+
+
+# ---------------------------------------------------------------------------
+# Response translation
+# ---------------------------------------------------------------------------
+
+
+_FINISH_REASON_TO_STOP_REASON = {
+    "stop": "end_turn",
+    "length": "max_tokens",
+    "tool_calls": "tool_use",
+    None: "end_turn",
+}
+
+
+def _chat_response_to_anthropic(
+    chat_response: Any,
+    tool_name_mapping: Dict[str, str],
+    requested_model: str,
+) -> Dict[str, Any]:
+    """Wrap a LightLLM ``ChatCompletionResponse`` into an Anthropic
+    Messages response dict.
+
+    LiteLLM's ``translate_openai_response_to_anthropic`` requires a
+    ``litellm.ModelResponse`` object (discovered via Task 3's characterisation
+    test). We construct one from the LightLLM response's dict form.
+    """
+    adapter = get_anthropic_messages_adapter()
+    if hasattr(chat_response, "model_dump"):
+        openai_dict = chat_response.model_dump(exclude_none=True)
+    else:
+        openai_dict = dict(chat_response)
+
+    try:
+        # Lazy import so this module stays importable when litellm is absent.
+        from litellm import ModelResponse  # type: ignore
+
+        model_response = ModelResponse(**openai_dict)
+        anthropic_obj = adapter.translate_openai_response_to_anthropic(model_response, tool_name_mapping)
+    except Exception as exc:
+        logger.warning("LiteLLM response translation failed (%s); using fallback", exc)
+        return _fallback_openai_to_anthropic(openai_dict, requested_model)
+
+    if hasattr(anthropic_obj, "model_dump"):
+        result = anthropic_obj.model_dump(exclude_none=True)
+    else:
+        result = dict(anthropic_obj)
+
+    return _normalize_anthropic_response(result, requested_model)
+
+
+def _normalize_anthropic_response(result: Dict[str, Any], requested_model: str) -> Dict[str, Any]:
+    """Cosmetic clean-ups applied to every non-streaming Anthropic response:
+
+    - echo the client-supplied model name (LiteLLM sometimes emits the
+      upstream model id instead);
+    - force the Anthropic ``msg_`` id prefix (LiteLLM passes LightLLM's
+      raw numeric request id through, which confuses strict clients);
+    - set default ``type`` / ``role`` / ``stop_sequence`` when missing;
+    - drop empty text blocks (LiteLLM sometimes produces a leading
+      ``{"type":"text","text":""}`` before a tool_use block);
+    - strip the LiteLLM-specific ``provider_specific_fields`` leak from
+      every content block.
+    """
+    result["model"] = requested_model
+
+    if not str(result.get("id", "")).startswith("msg_"):
+        result["id"] = f"msg_{uuid.uuid4().hex[:24]}"
+    result.setdefault("type", "message")
+    result.setdefault("role", "assistant")
+    result.setdefault("stop_sequence", None)
+
+    cleaned_content = []
+    for block in result.get("content") or []:
+        if not isinstance(block, dict):
+            cleaned_content.append(block)
+            continue
+        if block.get("type") == "text" and not block.get("text"):
+            continue
+        block.pop("provider_specific_fields", None)
+        cleaned_content.append(block)
+    result["content"] = cleaned_content
+
+    return result
+
+
+def _fallback_openai_to_anthropic(openai_dict: Dict[str, Any], requested_model: str) -> Dict[str, Any]:
+    """Minimal hand-built OpenAI->Anthropic translation for text-only responses.
+
+    Used only when LiteLLM's adapter raises on the response path. Does
+    not support tool_use; errors out loudly if tool calls are present
+    since silently dropping them would corrupt the response.
+    """
+    choice = (openai_dict.get("choices") or [{}])[0]
+    message = choice.get("message") or {}
+    if message.get("tool_calls"):
+        raise RuntimeError("Fallback translator cannot handle tool_calls; LiteLLM adapter path is required.")
+    text = message.get("content") or ""
+    usage = openai_dict.get("usage") or {}
+    finish_reason = choice.get("finish_reason")
+    return {
+        "id": f"msg_{uuid.uuid4().hex[:24]}",
+        "type": "message",
+        "role": "assistant",
+        "model": requested_model,
+        "content": [{"type": "text", "text": text}],
+        "stop_reason": _FINISH_REASON_TO_STOP_REASON.get(finish_reason, "end_turn"),
+        "stop_sequence": None,
+        "usage": {
+            "input_tokens": int(usage.get("prompt_tokens", 0)),
+            "output_tokens": int(usage.get("completion_tokens", 0)),
+            "cache_creation_input_tokens": 0,
+            "cache_read_input_tokens": 0,
+        },
+    }
+
+
+# ---------------------------------------------------------------------------
+# Streaming bridge
+# ---------------------------------------------------------------------------
+
+
+def _sse_event(event_type: str, data_obj: Dict[str, Any]) -> bytes:
+    """Encode an Anthropic-style SSE event."""
+    return f"event: {event_type}\ndata: {json.dumps(data_obj)}\n\n".encode("utf-8")
+
+
+async def _openai_sse_to_anthropic_events(
+    openai_body_iterator,
+    requested_model: str,
+    message_id: str,
+):
+    """Async generator: consume OpenAI-format SSE bytes and yield
+    Anthropic-format SSE event bytes.
+
+    Handles both text deltas (emitted as text_delta content blocks) and
+    tool-call deltas (emitted as tool_use content blocks whose arguments
+    stream as input_json_delta events). Anthropic's protocol opens one
+    content block at a time — when switching between a text block and a
+    tool_use block (or between tool_use blocks) the current block is
+    closed before the next is opened.
+    """
+    message_started = False
+    next_content_index = 0
+
+    # Currently open content block, if any.
+    # current_open is either None or a tuple ("text"|"tool_use", anthropic_index).
+    current_open = None
+
+    text_block_index = None  # Anthropic index of the active text block.
+
+    # Per-tool-call state keyed by OpenAI streaming tool_calls[i].index.
+    # Each entry: {anthropic_index, id, name, started, buffered_args}
+    tool_state: Dict[int, Dict[str, Any]] = {}
+
+    final_stop_reason = "end_turn"
+    final_output_tokens = 0
+    final_input_tokens = 0
+
+    _OPENAI_TO_ANTHROPIC_STOP = {
+        "stop": "end_turn",
+        "length": "max_tokens",
+        "tool_calls": "tool_use",
+    }
+
+    async for raw_chunk in openai_body_iterator:
+        if not raw_chunk:
+            continue
+        # chat_completions_impl yields str ("data: {...}\n\n"); some callers or
+        # middlewares may hand us bytes. Normalise to str so the splitter below
+        # does not have to branch on type.
+        if isinstance(raw_chunk, (bytes, bytearray)):
+            raw_chunk = raw_chunk.decode("utf-8", errors="replace")
+        # A single StreamingResponse chunk may contain multiple SSE lines.
+        for line in raw_chunk.split("\n"):
+            line = line.strip()
+            if not line or not line.startswith("data: "):
+                continue
+            payload = line[len("data: ") :]
+            if payload == "[DONE]":
+                continue
+            try:
+                chunk = json.loads(payload)
+            except Exception:
+                logger.debug("Skipping non-JSON SSE payload: %r", payload)
+                continue
+
+            # final_output_tokens is sourced exclusively from the trailing usage
+            # chunk emitted by chat_completions_impl; we intentionally do not
+            # estimate it per delta because that would diverge from the
+            # tokenizer-accurate count on any upstream change.
+            usage = chunk.get("usage")
+            if usage:
+                final_input_tokens = int(usage.get("prompt_tokens", 0))
+                final_output_tokens = int(usage.get("completion_tokens", final_output_tokens))
+
+            choices = chunk.get("choices") or []
+            if not choices:
+                continue
+            choice = choices[0]
+            delta = choice.get("delta") or {}
+            finish_reason = choice.get("finish_reason")
+
+            # Emit message_start the first time we see any content.
+            # NOTE: The upstream usage chunk arrives AFTER all content chunks, so
+            # final_input_tokens is still 0 here. message_start.message.usage.input_tokens
+            # will always be 0 on this path — Anthropic clients that care about prompt
+            # token counts should read message_delta.usage instead. Fixing this would
+            # require buffering until the usage chunk arrives, trading streaming
+            # latency for accurate prompt-token reporting at message_start time.
+            if not message_started:
+                message_started = True
+                yield _sse_event(
+                    "message_start",
+                    {
+                        "type": "message_start",
+                        "message": {
+                            "id": message_id,
+                            "type": "message",
+                            "role": "assistant",
+                            "model": requested_model,
+                            "content": [],
+                            "stop_reason": None,
+                            "stop_sequence": None,
+                            "usage": {
+                                "input_tokens": final_input_tokens,
+                                "output_tokens": 0,
+                                "cache_creation_input_tokens": 0,
+                                "cache_read_input_tokens": 0,
+                            },
+                        },
+                    },
+                )
+
+            # ---- Text delta ----
+            content_piece = delta.get("content")
+            if content_piece:
+                if current_open is None or current_open[0] != "text":
+                    if current_open is not None:
+                        yield _sse_event(
+                            "content_block_stop",
+                            {"type": "content_block_stop", "index": current_open[1]},
+                        )
+                    text_block_index = next_content_index
+                    next_content_index += 1
+                    current_open = ("text", text_block_index)
+                    yield _sse_event(
+                        "content_block_start",
+                        {
+                            "type": "content_block_start",
+                            "index": text_block_index,
+                            "content_block": {"type": "text", "text": ""},
+                        },
+                    )
+                yield _sse_event(
+                    "content_block_delta",
+                    {
+                        "type": "content_block_delta",
+                        "index": text_block_index,
+                        "delta": {"type": "text_delta", "text": content_piece},
+                    },
+                )
+
+            # ---- Tool-call deltas ----
+            for tc in delta.get("tool_calls") or []:
+                tc_idx = tc.get("index", 0)
+                fn = tc.get("function") or {}
+                state = tool_state.setdefault(
+                    tc_idx,
+                    {
+                        "anthropic_index": None,
+                        "id": None,
+                        "name": None,
+                        "started": False,
+                        "buffered_args": "",
+                    },
+                )
+                if tc.get("id"):
+                    state["id"] = tc["id"]
+                if fn.get("name"):
+                    state["name"] = fn["name"]
+                new_args = fn.get("arguments") or ""
+
+                if not state["started"]:
+                    # Buffer args until we know the tool name (required for
+                    # content_block_start).
+                    state["buffered_args"] += new_args
+                    if not state["name"]:
+                        continue
+                    # Close whatever block is currently open (text or a
+                    # previous tool_use) before opening this one.
+                    if current_open is not None:
+                        yield _sse_event(
+                            "content_block_stop",
+                            {"type": "content_block_stop", "index": current_open[1]},
+                        )
+                    state["anthropic_index"] = next_content_index
+                    next_content_index += 1
+                    current_open = ("tool_use", state["anthropic_index"])
+                    state["started"] = True
+                    yield _sse_event(
+                        "content_block_start",
+                        {
+                            "type": "content_block_start",
+                            "index": state["anthropic_index"],
+                            "content_block": {
+                                "type": "tool_use",
+                                "id": state["id"] or f"toolu_{uuid.uuid4().hex[:24]}",
+                                "name": state["name"],
+                                "input": {},
+                            },
+                        },
+                    )
+                    if state["buffered_args"]:
+                        yield _sse_event(
+                            "content_block_delta",
+                            {
+                                "type": "content_block_delta",
+                                "index": state["anthropic_index"],
+                                "delta": {
+                                    "type": "input_json_delta",
+                                    "partial_json": state["buffered_args"],
+                                },
+                            },
+                        )
+                        state["buffered_args"] = ""
+                else:
+                    # Already started. If deltas for a different block are
+                    # now arriving (unusual interleaving), close whatever's
+                    # currently open and reopen... but in practice OpenAI
+                    # streams tool_calls sequentially per index, so the
+                    # current_open is this same block.
+                    if new_args:
+                        yield _sse_event(
+                            "content_block_delta",
+                            {
+                                "type": "content_block_delta",
+                                "index": state["anthropic_index"],
+                                "delta": {
+                                    "type": "input_json_delta",
+                                    "partial_json": new_args,
+                                },
+                            },
+                        )
+
+            if finish_reason:
+                final_stop_reason = _OPENAI_TO_ANTHROPIC_STOP.get(finish_reason, "end_turn")
+
+    # Close any still-open content block.
+    if current_open is not None:
+        yield _sse_event(
+            "content_block_stop",
+            {"type": "content_block_stop", "index": current_open[1]},
+        )
+
+    # message_delta carries the final stop_reason and cumulative output_tokens.
+    if message_started:
+        yield _sse_event(
+            "message_delta",
+            {
+                "type": "message_delta",
+                "delta": {"stop_reason": final_stop_reason, "stop_sequence": None},
+                "usage": {"input_tokens": final_input_tokens, "output_tokens": final_output_tokens},
+            },
+        )
+        yield _sse_event("message_stop", {"type": "message_stop"})
+
+
+# ---------------------------------------------------------------------------
+# Error response helper
+# ---------------------------------------------------------------------------
+
+
+# HTTP status → Anthropic error type. Derived from
+# https://docs.anthropic.com/en/api/errors ; values outside this map fall
+# back to "api_error".
+_STATUS_TO_ERROR_TYPE = {
+    400: "invalid_request_error",
+    401: "authentication_error",
+    403: "permission_error",
+    404: "not_found_error",
+    413: "request_too_large",
+    429: "rate_limit_error",
+    500: "api_error",
+    529: "overloaded_error",
+}
+
+
+def _anthropic_error_response(status: HTTPStatus, message: str) -> JSONResponse:
+    """Return an Anthropic-shaped error envelope.
+
+    Anthropic clients (including Claude Code) parse the {"type":"error",
+    "error":{"type":..., "message":...}} shape; the OpenAI-style envelope
+    from create_error_response hides the real message from them.
+    """
+    err_type = _STATUS_TO_ERROR_TYPE.get(int(status), "api_error")
+    return JSONResponse(
+        {"type": "error", "error": {"type": err_type, "message": message}},
+        status_code=int(status),
+    )
+
+
+def _rewrap_openai_error_as_anthropic(resp: JSONResponse) -> JSONResponse:
+    """Convert an OpenAI-format JSONResponse produced by create_error_response
+    into Anthropic's error envelope. Best-effort: if we can't decode the body
+    we leave the response alone so the caller still sees something."""
+    try:
+        body = json.loads(bytes(resp.body).decode("utf-8"))
+        inner = (body or {}).get("error") or {}
+        message = inner.get("message") or "request failed"
+    except Exception:
+        return resp
+    return _anthropic_error_response(HTTPStatus(resp.status_code), message)
+
+
+# ---------------------------------------------------------------------------
+# HTTP entry point
+# ---------------------------------------------------------------------------
+
+
+async def anthropic_messages_impl(raw_request: Request) -> Response:
+    # Lazy imports to avoid pulling in heavy server deps at module import time.
+    from .api_models import ChatCompletionRequest, ChatCompletionResponse
+    from .api_openai import chat_completions_impl
+
+    try:
+        raw_body = await raw_request.json()
+    except Exception as exc:
+        return _anthropic_error_response(HTTPStatus.BAD_REQUEST, f"Invalid JSON body: {exc}")
+
+    if not isinstance(raw_body, dict):
+        return _anthropic_error_response(HTTPStatus.BAD_REQUEST, "Request body must be a JSON object")
+
+    requested_model = raw_body.get("model", "default")
+    is_stream = bool(raw_body.get("stream"))
+
+    try:
+        chat_dict, tool_name_mapping = _anthropic_to_chat_request(raw_body)
+    except Exception as exc:
+        logger.exception("Failed to translate Anthropic request")
+        return _anthropic_error_response(HTTPStatus.BAD_REQUEST, f"Request translation failed: {exc}")
+
+    # Force the downstream path to stream if the client asked for stream.
+    chat_dict["stream"] = is_stream
+
+    try:
+        chat_request = ChatCompletionRequest(**chat_dict)
+    except Exception as exc:
+        logger.exception("Failed to build ChatCompletionRequest")
+        return _anthropic_error_response(HTTPStatus.BAD_REQUEST, f"Invalid request after translation: {exc}")
+
+    downstream = await chat_completions_impl(chat_request, raw_request)
+
+    if is_stream:
+        from fastapi.responses import StreamingResponse
+
+        if not isinstance(downstream, StreamingResponse):
+            # chat_completions_impl returned an OpenAI-format error — rewrap it.
+            if isinstance(downstream, JSONResponse):
+                return _rewrap_openai_error_as_anthropic(downstream)
+            return downstream
+
+        message_id = f"msg_{uuid.uuid4().hex[:24]}"
+        anthropic_stream = _openai_sse_to_anthropic_events(
+            downstream.body_iterator, requested_model=requested_model, message_id=message_id
+        )
+        return StreamingResponse(anthropic_stream, media_type="text/event-stream")
+
+    if not isinstance(downstream, ChatCompletionResponse):
+        if isinstance(downstream, JSONResponse):
+            return _rewrap_openai_error_as_anthropic(downstream)
+        return downstream
+
+    try:
+        anthropic_dict = _chat_response_to_anthropic(downstream, tool_name_mapping, requested_model)
+    except Exception as exc:
+        logger.error("Failed to translate response to Anthropic format: %s", exc)
+        return JSONResponse(_anthropic_error_response(500, str(exc)), status_code=500)
+    return JSONResponse(anthropic_dict)
diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py
index 7dcd7df1bb..5b456b2ce7 100644
--- a/lightllm/server/api_cli.py
+++ b/lightllm/server/api_cli.py
@@ -450,6 +450,14 @@ def make_argument_parser() -> argparse.ArgumentParser:
         center : remove some tokens in center loc to make input_token_len + max_new_tokens <= max_req_total_len""",
     )
     parser.add_argument("--use_tgi_api", action="store_true", help="use tgi input and ouput format")
+    parser.add_argument(
+        "--enable_anthropic_api",
+        action="store_true",
+        help="""Expose an Anthropic Messages API compatible endpoint (/v1/messages)
+        that translates requests into the internal chat completions path. Requires
+        the 'litellm' package to be installed. When enabled, clients written against
+        the Anthropic SDK can use this server by setting base_url to the server address.""",
+    )
     parser.add_argument(
         "--health_monitor", action="store_true", help="check the health of service and restart when error"
     )
diff --git a/lightllm/server/api_http.py b/lightllm/server/api_http.py
index 50d992bf9c..0d8fbc0db1 100755
--- a/lightllm/server/api_http.py
+++ b/lightllm/server/api_http.py
@@ -110,6 +110,16 @@ def set_args(self, args: StartArgs):
             if self.model_created is None:
                 self.model_created = int(time.time())
 
+        if getattr(args, "enable_anthropic_api", False):
+            from ._litellm_shim import ensure_available
+
+            try:
+                ensure_available()
+                logger.info("Anthropic Messages API enabled at POST /v1/messages")
+            except RuntimeError as exc:
+                logger.error("Cannot enable Anthropic API: %s", exc)
+                raise
+
 
 g_objs = G_Objs()
 
@@ -266,6 +276,22 @@ async def completions(request: CompletionRequest, raw_request: Request) -> Respo
     return resp
 
 
+@app.post("/v1/messages")
+async def anthropic_messages(raw_request: Request) -> Response:
+    if not getattr(g_objs.args, "enable_anthropic_api", False):
+        return create_error_response(
+            HTTPStatus.NOT_FOUND,
+            "Anthropic API is not enabled. Start the server with --enable_anthropic_api.",
+        )
+    if get_env_start_args().run_mode in ["prefill", "decode", "nixl_prefill", "nixl_decode"]:
+        return create_error_response(
+            HTTPStatus.EXPECTATION_FAILED, "service in pd mode dont recv reqs from http interface"
+        )
+    from .api_anthropic import anthropic_messages_impl
+
+    return await anthropic_messages_impl(raw_request)
+
+
 @app.get("/v1/models", response_model=ModelListResponse)
 @app.post("/v1/models", response_model=ModelListResponse)
 async def get_models(raw_request: Request):
diff --git a/setup.py b/setup.py
index 94c5b192e6..b6141741c7 100644
--- a/setup.py
+++ b/setup.py
@@ -28,5 +28,8 @@
         "triton",
         "orjson",
     ],
+    extras_require={
+        "anthropic_api": ["litellm>=1.52.0,<1.85"],
+    },
     package_data=package_data,
 )