diff --git a/docs/CN/source/tutorial/multimodal_request.rst b/docs/CN/source/tutorial/multimodal_request.rst
new file mode 100644
index 0000000000..838f6b3301
--- /dev/null
+++ b/docs/CN/source/tutorial/multimodal_request.rst
@@ -0,0 +1,413 @@
+多模态请求指南
+============================
+
+LightLLM 提供两种 API 格式来接收多模态请求：OpenAI 兼容格式（推荐）和 Legacy 格式。本文档介绍如何向 LightLLM 服务发送包含图片和音频的多模态请求。
+
+OpenAI 兼容格式（推荐）
+------------------------
+
+使用 ``POST /v1/chat/completions`` 端点，与 OpenAI API 格式完全兼容。
+
+图片输入方式
+^^^^^^^^^^^^
+
+LightLLM 支持三种图片输入方式：
+
+**1. URL 方式**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+        }
+    }
+
+**2. Base64 编码**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
+        }
+    }
+
+**3. 本地文件路径**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "file:///path/to/image.jpg"
+        }
+    }
+
+音频输入方式
+^^^^^^^^^^^^
+
+音频同样支持 URL 和 Base64 两种方式：
+
+.. code-block:: json
+
+    {
+        "type": "audio_url",
+        "audio_url": {
+            "url": "https://example.com/audio.wav"
+        }
+    }
+
+使用 curl 发送请求
+^^^^^^^^^^^^^^^^^^
+
+**基本图片请求**
+
+.. code-block:: bash
+
+    curl http://127.0.0.1:8080/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "model": "qwen",
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "image_url",
+                "image_url": {
+                  "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+                }
+              },
+              {
+                "type": "text",
+                "text": "请描述这张图片的内容。"
+              }
+            ]
+          }
+        ],
+        "max_completion_tokens": 512,
+        "temperature": 0.7
+      }'
+
+**流式输出**
+
+.. code-block:: bash
+
+    curl http://127.0.0.1:8080/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "model": "qwen",
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "image_url",
+                "image_url": {
+                  "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+                }
+              },
+              {
+                "type": "text",
+                "text": "请描述这张图片的内容。"
+              }
+            ]
+          }
+        ],
+        "max_completion_tokens": 512,
+        "stream": true
+      }'
+
+使用 Python requests 发送请求
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    import requests
+
+    url = "http://127.0.0.1:8080/v1/chat/completions"
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    payload = {
+        "model": "qwen",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "请描述这张图片的内容。"},
+                ],
+            }
+        ],
+        "max_completion_tokens": 512,
+        "temperature": 0.7,
+    }
+
+    response = requests.post(url, json=payload)
+    print(response.json())
+
+使用 OpenAI SDK 发送请求
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    from openai import OpenAI
+
+    client = OpenAI(base_url="http://127.0.0.1:8080/v1", api_key="none")
+
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    # 非流式
+    response = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "请描述这张图片的内容。"},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+        temperature=0.7,
+    )
+    print(response.choices[0].message.content)
+
+    # 流式
+    stream = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "请描述这张图片的内容。"},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+        stream=True,
+    )
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            print(chunk.choices[0].delta.content, end="", flush=True)
+
+多图片请求
+^^^^^^^^^^
+
+在 ``content`` 列表中传入多个 ``image_url`` 即可：
+
+.. code-block:: python
+
+    response = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image1.jpg"}},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image2.jpg"}},
+                    {"type": "text", "text": "请比较这两张图片的区别。"},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+    )
+
+多轮对话
+^^^^^^^^
+
+多轮对话中，相同图片会自动命中 embed 缓存，无需重复推理：
+
+.. code-block:: python
+
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "这张图片里有什么？"},
+            ],
+        },
+    ]
+
+    # 第一轮
+    response = client.chat.completions.create(
+        model="qwen", messages=messages, max_completion_tokens=512
+    )
+    answer = response.choices[0].message.content
+    messages.append({"role": "assistant", "content": answer})
+
+    # 第二轮（同一张图片会命中缓存）
+    messages.append({
+        "role": "user",
+        "content": [
+            {"type": "image_url", "image_url": {"url": image_url}},
+            {"type": "text", "text": "请更详细地描述图片中的文字内容。"},
+        ],
+    })
+    response = client.chat.completions.create(
+        model="qwen", messages=messages, max_completion_tokens=512
+    )
+
+Legacy 格式
+------------
+
+使用 ``POST /generate`` 端点，需要手动构造 prompt 模板和 multimodal_params。
+
+.. code-block:: python
+
+    import json
+    import requests
+    import base64
+
+
+    def run(query, uris):
+        images = []
+        for uri in uris:
+            if uri.startswith("http"):
+                images.append({"type": "url", "data": uri})
+            else:
+                with open(uri, "rb") as fin:
+                    b64 = base64.b64encode(fin.read()).decode("utf-8")
+                images.append({"type": "base64", "data": b64})
+
+        data = {
+            "inputs": query,
+            "parameters": {
+                "max_new_tokens": 512,
+                "do_sample": False,
+            },
+            "multimodal_params": {
+                "images": images,
+            },
+        }
+
+        url = "http://127.0.0.1:8080/generate"
+        headers = {"Content-Type": "application/json"}
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        return response
+
+
+    response = run(
+        uris=["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"],
+        query="<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+              "<|im_start|>user\n<img></img>\n请描述这张图片的内容。<|im_end|>\n"
+              "<|im_start|>assistant\n",
+    )
+
+    if response.status_code == 200:
+        print(f"Result: {response.json()}")
+    else:
+        print(f"Error: {response.status_code}, {response.text}")
+
+.. note:: Legacy 格式需要手动构造 chat template，推荐使用 OpenAI 兼容格式以避免模板错误。
+
+常用采样参数
+------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 15 15 45
+
+   * - 参数
+     - 类型
+     - 默认值
+     - 说明
+   * - max_completion_tokens
+     - int
+     - 16384
+     - 最大输出 token 数
+   * - temperature
+     - float
+     - 1.0
+     - 采样温度，越高越随机
+   * - top_p
+     - float
+     - 1.0
+     - 核采样参数
+   * - top_k
+     - int
+     - -1
+     - Top-K 采样，-1 表示禁用
+   * - do_sample
+     - bool
+     - true
+     - 是否启用采样（false 为贪心解码）
+   * - repetition_penalty
+     - float
+     - 1.0
+     - 重复惩罚系数，>1 抑制重复
+   * - stream
+     - bool
+     - false
+     - 是否流式输出
+   * - stop
+     - str/list
+     - -
+     - 停止生成的序列
+   * - seed
+     - int
+     - -1
+     - 随机种子，-1 为随机
+
+响应格式
+--------
+
+**非流式响应**
+
+.. code-block:: json
+
+    {
+        "id": "chatcmpl-xxx",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "qwen",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "这张图片显示的是..."
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 256,
+            "completion_tokens": 128,
+            "total_tokens": 384
+        }
+    }
+
+**流式响应**
+
+每个 chunk 以 ``data: `` 前缀的 SSE 格式返回：
+
+.. code-block:: json
+
+    {
+        "id": "chatcmpl-xxx",
+        "object": "chat.completion.chunk",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {
+                    "content": "这张"
+                },
+                "finish_reason": null
+            }
+        ]
+    }
+
+环境变量
+--------
+
+- **REQUEST_TIMEOUT**: 下载远程图片/音频的超时时间（秒），默认 5
+- **REQUEST_PROXY**: 下载远程资源时使用的代理地址
diff --git a/docs/EN/source/tutorial/multimodal_request.rst b/docs/EN/source/tutorial/multimodal_request.rst
new file mode 100644
index 0000000000..22ecdc9fca
--- /dev/null
+++ b/docs/EN/source/tutorial/multimodal_request.rst
@@ -0,0 +1,413 @@
+Multimodal Request Guide
+============================
+
+LightLLM provides two API formats for multimodal requests: OpenAI-compatible format (recommended) and Legacy format. This guide shows how to send requests with images and audio to a LightLLM server.
+
+OpenAI-Compatible Format (Recommended)
+---------------------------------------
+
+Use the ``POST /v1/chat/completions`` endpoint, fully compatible with the OpenAI API.
+
+Image Input Methods
+^^^^^^^^^^^^^^^^^^^
+
+LightLLM supports three image input methods:
+
+**1. URL**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+        }
+    }
+
+**2. Base64 Encoded**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "data:image/jpeg;base64,/9j/4AAQSkZJRg..."
+        }
+    }
+
+**3. Local File Path**
+
+.. code-block:: json
+
+    {
+        "type": "image_url",
+        "image_url": {
+            "url": "file:///path/to/image.jpg"
+        }
+    }
+
+Audio Input Methods
+^^^^^^^^^^^^^^^^^^^
+
+Audio supports both URL and Base64 methods:
+
+.. code-block:: json
+
+    {
+        "type": "audio_url",
+        "audio_url": {
+            "url": "https://example.com/audio.wav"
+        }
+    }
+
+Using curl
+^^^^^^^^^^
+
+**Basic Image Request**
+
+.. code-block:: bash
+
+    curl http://127.0.0.1:8080/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "model": "qwen",
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "image_url",
+                "image_url": {
+                  "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+                }
+              },
+              {
+                "type": "text",
+                "text": "Describe the content of this image."
+              }
+            ]
+          }
+        ],
+        "max_completion_tokens": 512,
+        "temperature": 0.7
+      }'
+
+**Streaming Output**
+
+.. code-block:: bash
+
+    curl http://127.0.0.1:8080/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "model": "qwen",
+        "messages": [
+          {
+            "role": "user",
+            "content": [
+              {
+                "type": "image_url",
+                "image_url": {
+                  "url": "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+                }
+              },
+              {
+                "type": "text",
+                "text": "Describe the content of this image."
+              }
+            ]
+          }
+        ],
+        "max_completion_tokens": 512,
+        "stream": true
+      }'
+
+Using Python requests
+^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    import requests
+
+    url = "http://127.0.0.1:8080/v1/chat/completions"
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    payload = {
+        "model": "qwen",
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "Describe the content of this image."},
+                ],
+            }
+        ],
+        "max_completion_tokens": 512,
+        "temperature": 0.7,
+    }
+
+    response = requests.post(url, json=payload)
+    print(response.json())
+
+Using OpenAI SDK
+^^^^^^^^^^^^^^^^
+
+.. code-block:: python
+
+    from openai import OpenAI
+
+    client = OpenAI(base_url="http://127.0.0.1:8080/v1", api_key="none")
+
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    # Non-streaming
+    response = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "Describe the content of this image."},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+        temperature=0.7,
+    )
+    print(response.choices[0].message.content)
+
+    # Streaming
+    stream = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "Describe the content of this image."},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+        stream=True,
+    )
+    for chunk in stream:
+        if chunk.choices[0].delta.content:
+            print(chunk.choices[0].delta.content, end="", flush=True)
+
+Multi-Image Request
+^^^^^^^^^^^^^^^^^^^
+
+Pass multiple ``image_url`` items in the ``content`` list:
+
+.. code-block:: python
+
+    response = client.chat.completions.create(
+        model="qwen",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image1.jpg"}},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/image2.jpg"}},
+                    {"type": "text", "text": "Compare the differences between these two images."},
+                ],
+            }
+        ],
+        max_completion_tokens=512,
+    )
+
+Multi-Turn Conversation
+^^^^^^^^^^^^^^^^^^^^^^^
+
+In multi-turn conversations, identical images automatically hit the embed cache, avoiding redundant inference:
+
+.. code-block:: python
+
+    image_url = "https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "text", "text": "What is in this image?"},
+            ],
+        },
+    ]
+
+    # First turn
+    response = client.chat.completions.create(
+        model="qwen", messages=messages, max_completion_tokens=512
+    )
+    answer = response.choices[0].message.content
+    messages.append({"role": "assistant", "content": answer})
+
+    # Second turn (same image hits cache)
+    messages.append({
+        "role": "user",
+        "content": [
+            {"type": "image_url", "image_url": {"url": image_url}},
+            {"type": "text", "text": "Describe the text content in the image in more detail."},
+        ],
+    })
+    response = client.chat.completions.create(
+        model="qwen", messages=messages, max_completion_tokens=512
+    )
+
+Legacy Format
+--------------
+
+Use the ``POST /generate`` endpoint. Requires manually constructing prompt templates and multimodal_params.
+
+.. code-block:: python
+
+    import json
+    import requests
+    import base64
+
+
+    def run(query, uris):
+        images = []
+        for uri in uris:
+            if uri.startswith("http"):
+                images.append({"type": "url", "data": uri})
+            else:
+                with open(uri, "rb") as fin:
+                    b64 = base64.b64encode(fin.read()).decode("utf-8")
+                images.append({"type": "base64", "data": b64})
+
+        data = {
+            "inputs": query,
+            "parameters": {
+                "max_new_tokens": 512,
+                "do_sample": False,
+            },
+            "multimodal_params": {
+                "images": images,
+            },
+        }
+
+        url = "http://127.0.0.1:8080/generate"
+        headers = {"Content-Type": "application/json"}
+        response = requests.post(url, headers=headers, data=json.dumps(data))
+        return response
+
+
+    response = run(
+        uris=["https://qianwen-res.oss-accelerate.aliyuncs.com/Qwen3.5/demo/CI_Demo/mathv-1327.jpg"],
+        query="<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+              "<|im_start|>user\n<img></img>\nDescribe the content of this image.<|im_end|>\n"
+              "<|im_start|>assistant\n",
+    )
+
+    if response.status_code == 200:
+        print(f"Result: {response.json()}")
+    else:
+        print(f"Error: {response.status_code}, {response.text}")
+
+.. note:: The Legacy format requires manually constructing chat templates. The OpenAI-compatible format is recommended to avoid template errors.
+
+Common Sampling Parameters
+---------------------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 15 15 45
+
+   * - Parameter
+     - Type
+     - Default
+     - Description
+   * - max_completion_tokens
+     - int
+     - 16384
+     - Maximum output tokens
+   * - temperature
+     - float
+     - 1.0
+     - Sampling temperature, higher is more random
+   * - top_p
+     - float
+     - 1.0
+     - Nucleus sampling parameter
+   * - top_k
+     - int
+     - -1
+     - Top-K sampling, -1 to disable
+   * - do_sample
+     - bool
+     - true
+     - Enable sampling (false for greedy decoding)
+   * - repetition_penalty
+     - float
+     - 1.0
+     - Repetition penalty, >1 suppresses repetition
+   * - stream
+     - bool
+     - false
+     - Enable streaming output
+   * - stop
+     - str/list
+     - -
+     - Stop generation sequences
+   * - seed
+     - int
+     - -1
+     - Random seed, -1 for random
+
+Response Format
+----------------
+
+**Non-Streaming Response**
+
+.. code-block:: json
+
+    {
+        "id": "chatcmpl-xxx",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "qwen",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "The image shows..."
+                },
+                "finish_reason": "stop"
+            }
+        ],
+        "usage": {
+            "prompt_tokens": 256,
+            "completion_tokens": 128,
+            "total_tokens": 384
+        }
+    }
+
+**Streaming Response**
+
+Each chunk is returned in SSE format with the ``data: `` prefix:
+
+.. code-block:: json
+
+    {
+        "id": "chatcmpl-xxx",
+        "object": "chat.completion.chunk",
+        "choices": [
+            {
+                "index": 0,
+                "delta": {
+                    "content": "The image"
+                },
+                "finish_reason": null
+            }
+        ]
+    }
+
+Environment Variables
+---------------------
+
+- **REQUEST_TIMEOUT**: Timeout (seconds) for downloading remote images/audio, default 5
+- **REQUEST_PROXY**: Proxy address for downloading remote resources