diff --git a/.github/workflows/scripts/get_test_matrix.sh b/.github/workflows/scripts/get_test_matrix.sh index 1b5b807ec1..c53549c5b1 100644 --- a/.github/workflows/scripts/get_test_matrix.sh +++ b/.github/workflows/scripts/get_test_matrix.sh @@ -26,7 +26,7 @@ for example in ${examples}; do echo -e "Test supported hardware list: \n${hardware_list}" run_hardware="" - if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '\.py|Dockerfile*|ui|docker_image_build' ) ]]; then + if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '\.py|Dockerfile*|ui|edgecraftrag|docker_image_build' ) ]]; then echo "run test on all hardware if megaservice or ui code change..." run_hardware=$hardware_list elif [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | grep 'tests'| cut -d'/' -f3 | grep -vE '^test_|^_test' ) ]]; then diff --git a/EdgeCraftRAG/assets/img/experience_access.png b/EdgeCraftRAG/assets/img/experience_access.png new file mode 100644 index 0000000000..703df92a1a Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_access.png differ diff --git a/EdgeCraftRAG/assets/img/experience_access_zh.png b/EdgeCraftRAG/assets/img/experience_access_zh.png new file mode 100644 index 0000000000..0ce7f86361 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_access_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_create_1.png b/EdgeCraftRAG/assets/img/experience_create_1.png new file mode 100644 index 0000000000..29127eafb1 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_create_1.png differ diff --git a/EdgeCraftRAG/assets/img/experience_create_1_zh.png b/EdgeCraftRAG/assets/img/experience_create_1_zh.png new file mode 100644 index 0000000000..068b9dbd4a Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_create_1_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_create_2.png b/EdgeCraftRAG/assets/img/experience_create_2.png new file mode 100644 index 0000000000..4bbe57d680 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_create_2.png differ diff --git a/EdgeCraftRAG/assets/img/experience_create_2_zh.png b/EdgeCraftRAG/assets/img/experience_create_2_zh.png new file mode 100644 index 0000000000..2721867db3 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_create_2_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_example_1.png b/EdgeCraftRAG/assets/img/experience_example_1.png new file mode 100644 index 0000000000..181d9481fa Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_example_1.png differ diff --git a/EdgeCraftRAG/assets/img/experience_example_2.png b/EdgeCraftRAG/assets/img/experience_example_2.png new file mode 100644 index 0000000000..4b81c1783f Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_example_2.png differ diff --git a/EdgeCraftRAG/assets/img/experience_example_3.png b/EdgeCraftRAG/assets/img/experience_example_3.png new file mode 100644 index 0000000000..dc30b7808e Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_example_3.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_1.png b/EdgeCraftRAG/assets/img/experience_management_1.png new file mode 100644 index 0000000000..833fef9f3e Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_1.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_1_zh.png b/EdgeCraftRAG/assets/img/experience_management_1_zh.png new file mode 100644 index 0000000000..b1457d5764 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_1_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_2.png b/EdgeCraftRAG/assets/img/experience_management_2.png new file mode 100644 index 0000000000..d16fc359a4 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_2.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_2_zh.png b/EdgeCraftRAG/assets/img/experience_management_2_zh.png new file mode 100644 index 0000000000..1180f621c9 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_2_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_3.png b/EdgeCraftRAG/assets/img/experience_management_3.png new file mode 100644 index 0000000000..0c229eb0dd Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_3.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_3_zh.png b/EdgeCraftRAG/assets/img/experience_management_3_zh.png new file mode 100644 index 0000000000..a5e30ae0ed Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_3_zh.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_4.png b/EdgeCraftRAG/assets/img/experience_management_4.png new file mode 100644 index 0000000000..e8f13373be Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_4.png differ diff --git a/EdgeCraftRAG/assets/img/experience_management_4_zh.png b/EdgeCraftRAG/assets/img/experience_management_4_zh.png new file mode 100644 index 0000000000..40293b58b0 Binary files /dev/null and b/EdgeCraftRAG/assets/img/experience_management_4_zh.png differ diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml index 707be2328c..55f6a79b4e 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -139,7 +139,7 @@ services: ports: - ${VLLM_SERVICE_PORT_B60:-8086}:${VLLM_SERVICE_PORT_B60:-8086} volumes: - - ${MODEL_PATH}:/workspace/vllm/models + - ${MODEL_PATH-${PWD}}:/workspace/vllm/models devices: - /dev/dri:/dev/dri environment: diff --git a/EdgeCraftRAG/docs/API_Guide.md b/EdgeCraftRAG/docs/API_Guide.md index c13753596c..2e7d787849 100644 --- a/EdgeCraftRAG/docs/API_Guide.md +++ b/EdgeCraftRAG/docs/API_Guide.md @@ -60,7 +60,7 @@ Benchmarking activities may significantly reduce system performance. export ENABLE_BENCHMARK="true" # check the benchmark data for pipeline {pipeline_name} -curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{pipeline_name}/benchmark -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{pipeline_name}/benchmarks -H "Content-Type: application/json" | jq '.' ``` ## Model Management diff --git a/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md b/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md index 735bfd02b2..5862a3f448 100644 --- a/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md +++ b/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md @@ -11,7 +11,7 @@ EC-RAG目前支持两种类型的智能体: - **Simple Agent**: 不包含规划、research的简单轻量级Agent; - **Deep Search Agent**: 能够进行多步规划、deep research和深度推理的更高级版本Agent。 -## Retrieval +## 创建智能体 ### 前提条件 diff --git a/EdgeCraftRAG/docs/Experience_Guide.md b/EdgeCraftRAG/docs/Experience_Guide.md new file mode 100644 index 0000000000..4be0dcfeb8 --- /dev/null +++ b/EdgeCraftRAG/docs/Experience_Guide.md @@ -0,0 +1,55 @@ +# EC-RAG Experience Guide + +This guide explains how to enable, configure, and use Experience capabilities within EdgeCraftRAG (EC-RAG). It covers the creation and management of experience to enhance the answer quality + +## Overview + +Experience refers to the knowledge and skills acquired through practical involvement, trial, and reflection, serving as a key foundation for solving real-world problems. In EC-RAG we now provide abilities to add such experience for the reference of your Chat. + +Before use Experience, please make sure Milvus VectorDB is enabled and below env is set: + +```bash +export METADATA_DATABASE_URL="http://${HOST_IP}:19530" +``` + +## Creating An Experience + +Upon successful deployment of EC-RAG, we may navigate to `Knowledge Base` tab from the left panel: +![alt text](../assets/img/experience_access.png) + +Then in the `Create Type Select` windows, choose `Go Create Experience`: +![alt text](../assets/img/experience_create_1.png) + +In the next windws, we can set Experience to `Activated` or `Inactive`, this could also be changed after creation of Experience: +![alt text](../assets/img/experience_create_2.png) + +## Managing Experiences + +### Add Experiences + +After successfully created `Experience`, Experience tab would be available with `Unique` tag: +![alt text](../assets/img/experience_management_1.png) + +To add new experience, select `Create Experience` from the right panel: +![alt text](../assets/img/experience_management_2.png) + +In the `Create Experience` window, we could add experience and its corresponding content as needed, multi experience creation is supported: +![alt text](../assets/img/experience_management_3.png) + +### View and Edit Experiences + +After experience added, they will be shown in list under `Experience` tab. We could toggle the `+` sign to show experience content details. Use the three dots next to `Experience` to activate or deactivate a Experience Base: +![alt text](../assets/img/experience_management_4.png) + +## Examples + +Here's a comparison with experience activated and not activated: + +Without experience: +![alt text](../assets/img/experience_example_3.png) + +Activated experience: +![alt text](../assets/img/experience_example_1.png) + +RAG Answer: +![alt text](../assets/img/experience_example_2.png) diff --git a/EdgeCraftRAG/docs/Experience_Guide_Zh.md b/EdgeCraftRAG/docs/Experience_Guide_Zh.md new file mode 100644 index 0000000000..8c39ad6380 --- /dev/null +++ b/EdgeCraftRAG/docs/Experience_Guide_Zh.md @@ -0,0 +1,54 @@ +# EC-RAG 经验注入指南 + +本指南将为您介绍如何在 EdgeCraftRAG (EC-RAG) 中启用、配置和使用经验注入(Experience)功能,涵盖 Experience 的创建和管理,旨在提升您使用EC-RAG时的回答质量。 + +## 概述 + +Experience(经验)是指个人或团队在实践过程中积累的知识和技能,通常通过实际操作、试错和反思获得,是解决实际问题的重要依据。EC-RAG 现已支持添加此类Experience,为您的对话提供更多参考。 +使用Experience之前,请确保Mivlus服务已经启动,并且请指定如下的环境变量: + +```bash +export METADATA_DATABASE_URL="http://${HOST_IP}:19530" +``` + +## 创建 Experience + +当 EC-RAG 被正确部署后,您可以通过左侧面板的`知识库`(Knowledge Base)选项卡切换到Experience创建页面: +![alt text](../assets/img/experience_access_zh.png) + +然后在`创建类型选择`窗口中,选择`去创建经验注入`: +![alt text](../assets/img/experience_create_1_zh.png) + +在接下来的窗口中,您可以将 Experience 设置为`已启用`或`已停用`状态。请注意,这也可以在创建 Experience 后进行更改: +![alt text](../assets/img/experience_create_2_zh.png) + +## 管理 Experience + +成功创建`Experience`后,左侧面板会出现`Experience`选项卡,同时带有`唯一`标签: +![alt text](../assets/img/experience_management_1_zh.png) + +### 添加 Experience + +当您需要添加新 Experience 时,请从右侧面板选择`新建经验`: +![alt text](../assets/img/experience_management_2_zh.png) + +在`新建经验`窗口中,您可以根据需要添加经验及其对应的内容,EC-RAG 支持创建多个经验条目: +![alt text](../assets/img/experience_management_3_zh.png) + +### 查看与修改 + +添加经验后,它们将以列表形式显示在`经验`选项卡下。您可以点击`+`号展开查看 Experience 内容的详细信息。使用左侧面板里`Experience`旁边的三个点可以激活或停用某个 Experience 库: +![alt text](../assets/img/experience_management_4_zh.png) + +## 使用示例 + +以下是激活和未激活Experience库的对比: + +未激活Experience: +![alt text](../assets/img/experience_example_3.png) + +激活Experience: +![alt text](../assets/img/experience_example_1.png) + +RAG回答: +![alt text](../assets/img/experience_example_2.png) diff --git a/EdgeCraftRAG/docs/Query_Search_Zh.md b/EdgeCraftRAG/docs/Query_Search_Zh.md deleted file mode 100644 index c7c1e2ec06..0000000000 --- a/EdgeCraftRAG/docs/Query_Search_Zh.md +++ /dev/null @@ -1,43 +0,0 @@ -# Query Search - -Query Search帮助EC-RAG在进入retrival和reranking阶段之前,对用户query进行预处理,为了使用Query Search,您需要vllm作为后端推理引擎。 - -## 1. 子问题文件样例 - -用于保存子问题的文件需要以`.json`结尾,同时需要遵守json文件的格式:主问题作为json键,子问题作为json值,比如: - -```json -{ - "问题1": "子问题1.1?子问题1.2?", - "问题2": "子问题2.1?子问题2.2?子问题2.3?" -} -``` - -> 请您注意:1. Query Search至少需要一个子问题文件。2. 增加问题数量会增加EC-RAG的整体查询时间。 - -## 2. 子问题文件位置 - -所有子问题文件应该放在`${TMPFILE_PATH}/configs/search_dir`路径下。 - -## 3. 配置文件样例 - -配置文件里定义了Query Search需要的prompts、temperature等参数: - -`instruction`, `input_template`, `output_template`会影响最终进行Query Search的提示词; -`json_key` 和 `json_levels` 两个参数相互关联,比如,如果`json_key`设置为"similarity",`json_levels`需要列出和其匹配的选项,像"Low, Medium, High"。 - -对于DeesSeep-R1-Distill-Qwen-32B模型,一个配置样例如下: - -```yaml -query_matcher: - instructions: "Decide similarity of two queries. For exactly the same, mark as High, for totally different, mark as Low.\n" - input_template: " {} \n {} \n" - output_template: "output from {json_levels}.\n" - json_key: "similarity" - json_levels: ["Low", "Medium", "High"] - temperature: 3.7 -``` - -## 4. 配置文件位置 - -配置文件应该放在`${TMPFILE_PATH}/configs`路径下,并且被命名为`search_config.yaml`, 所以完整路径为`${TMPFILE_PATH}/configs/search_config.yaml`。 diff --git a/EdgeCraftRAG/edgecraftrag/VERSION b/EdgeCraftRAG/edgecraftrag/VERSION index 751e5f1646..0441e5d7aa 100644 --- a/EdgeCraftRAG/edgecraftrag/VERSION +++ b/EdgeCraftRAG/edgecraftrag/VERSION @@ -1 +1 @@ -26.01-Dev +26.01-Release diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/agent.py b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py index 00a1b5db09..324e62ac7d 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/agent.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py @@ -5,6 +5,7 @@ import os import time +from edgecraftrag.api.v1.knowledge_base import Synchronizing_vector_data from edgecraftrag.api_schema import AgentCreateIn from edgecraftrag.base import AgentType from edgecraftrag.config_repository import MilvusConfigRepository, save_agent_configurations @@ -22,12 +23,14 @@ async def get_all_agents(): agents = ctx.get_agent_mgr().get_agents() active_id = ctx.get_agent_mgr().get_active_agent_id() for k, agent in agents.items(): + pipeline_name = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(agent.pipeline_idx).name out.append( AgentCreateIn( idx=agent.idx, name=agent.name, type=agent.comp_subtype, pipeline_idx=agent.pipeline_idx, + pipeline_name=pipeline_name, configs=agent.configs, active=True if agent.idx == active_id else False, ) @@ -40,12 +43,14 @@ async def get_all_agents(): async def get_agent(name): agent = ctx.get_agent_mgr().get_agent_by_name(name) if agent: + pipeline_name = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(agent.pipeline_idx).name isactive = True if agent.idx == ctx.get_agent_mgr().get_active_agent_id() else False return AgentCreateIn( idx=agent.idx, name=agent.name, type=agent.comp_subtype, pipeline_idx=agent.pipeline_idx, + pipeline_name=pipeline_name, configs=agent.configs, active=isactive, ) @@ -70,10 +75,15 @@ async def create_agent(request: AgentCreateIn, status_code=status.HTTP_201_CREAT async def update_agent(name, request: AgentCreateIn): try: agentmgr = ctx.get_agent_mgr() - if agentmgr.get_agent_by_name(name): + agent = agentmgr.get_agent_by_name(name) + if agent: ret = agentmgr.update_agent(name, request) if ret: await save_agent_configurations("update", ctx.get_agent_mgr().get_agents()) + # manage agent bound pipeline status, trigger kb indexing if needed + # can be removed once kb indexing is decoupled from pipeline + pl_idx = agent.pipeline_idx + await manage_agent_bound_pipeline(pl_idx, request) return ret else: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) @@ -146,3 +156,36 @@ async def load_agent(request: AgentCreateIn): agentmgr.remove_agent_by_name(request.name) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) return agent + + +async def manage_agent_bound_pipeline(bound_pl_idx, request): + # case1: activate agent, while bound pipeline is not active -> activate it, cache previous active pipeline if exists + # case2: activate agent, while bound pipeline is already active -> still call activate, for caching current pipeline + # case3: deactivate agent, while bound pipeline **was** active -> do NOT deactivate bound pipeline, do nothing + # case4: deactivate agent, while bound pipeline **was NOT** active -> deactivate bound pipeline, activate previous active pipeline if exists + pl_manager = ctx.get_pipeline_mgr() + prev_active_pipeline = pl_manager.get_active_pipeline() + + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + kb_name = active_kb.name if active_kb else "default" + + if request.active: + pl_manager.activate_pipeline(bound_pl_idx, request.active, ctx.get_node_mgr(), kb_name, cache_prev=True) + else: + # at deactivate, prev_active_pl can be 1.other pl/2.None/3.current bound_pl + prev_active_pl = pl_manager.get_prev_active_pipeline_name() + if prev_active_pl and prev_active_pl != bound_pl_idx: + # 1, restore to the other pipeline activated + pl_manager.activate_pipeline(prev_active_pl, True, ctx.get_node_mgr(), kb_name) + elif not prev_active_pl: + # 2, deactivate current bound pipeline, leave no active pipeline as before + pl_manager.activate_pipeline(bound_pl_idx, False, ctx.get_node_mgr(), kb_name) + else: + # 3, do nothing + pass + # when agent is deactivated, clear cached previous active pipeline + pl_manager.clear_prev_active_pipeline_name() + + cur_active_pipeline = pl_manager.get_active_pipeline() + if prev_active_pipeline and cur_active_pipeline and prev_active_pipeline.idx != cur_active_pipeline.idx: + await Synchronizing_vector_data(prev_active_pipeline, cur_active_pipeline) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py index cb3fcdb409..3eec57efa9 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -9,6 +9,7 @@ import requests from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.api_schema import RagOut +from edgecraftrag.base import GeneratorType from edgecraftrag.context import ctx from edgecraftrag.utils import chain_async_generators, serialize_contexts, stream_generator from fastapi import Body, FastAPI, HTTPException, status @@ -52,9 +53,10 @@ async def chatqna(request: ChatCompletionRequest): if experience_kb: request.tool_choice = "auto" if experience_kb.experience_active else "none" - request.input = ctx.get_session_mgr().concat_history( - sessionid, active_pl.generator.inference_type, request.messages - ) + generator = active_pl.get_generator(GeneratorType.CHATQNA) + inference_type = generator.inference_type if generator else "local" + + request.input = ctx.get_session_mgr().concat_history(sessionid, inference_type, request.messages) # Run agent if activated, otherwise, run pipeline if ctx.get_agent_mgr().get_active_agent(): @@ -62,9 +64,10 @@ async def chatqna(request: ChatCompletionRequest): return StreamingResponse(save_session(sessionid, run_agent_gen), media_type="text/plain") else: - generator = active_pl.generator - if generator: - request.model = generator.model_id + generator = active_pl.get_generator(GeneratorType.CHATQNA) + if not generator: + raise Exception("code:0000Please make sure chatqna generator is available in pipeline.") + request.model = generator.model_id if request.stream: run_pipeline_gen, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) @@ -75,6 +78,8 @@ async def chatqna(request: ChatCompletionRequest): return str(ret) except Exception as e: + if "code:0000" in str(e): + return str(e) raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"ChatQnA Error: {e}", @@ -91,7 +96,7 @@ async def ragqna(request: ChatCompletionRequest): request.user = active_kb if active_kb else None if experience_kb: request.tool_choice = "auto" if experience_kb.experience_active else "none" - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + generator = ctx.get_pipeline_mgr().get_active_pipeline().get_generator(GeneratorType.CHATQNA) if generator: request.model = generator.model_id if request.stream: diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py index 78d15555a5..289797d0ee 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -48,8 +48,8 @@ # GET Pipelines @pipeline_app.get(path="/v1/settings/pipelines") -async def get_pipelines(): - return ctx.get_pipeline_mgr().get_pipelines() +async def get_pipelines(gen_type: str = None): + return ctx.get_pipeline_mgr().get_pipelines(gen_type) # GET Pipeline @@ -81,7 +81,8 @@ async def get_pipeline_benchmark(): async def get_pipeline_benchmarks(name): pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) if pl and pl.benchmark: - return pl.benchmark.benchmark_data_list + bench_res = {"pipeline_bench": pl.benchmark.benchmark_data_list, "llm_bench": pl.benchmark.llm_data_list} + return bench_res # POST Pipeline @@ -293,38 +294,42 @@ async def update_pipeline_handler(pl, req): postprocessor = MetadataReplaceProcessor(target_metadata_key="window") pl.postprocessor.append(postprocessor) - if req.generator: - gen = req.generator - if gen.model is None: - raise Exception("No ChatQnA Model") - if gen.inference_type: - model = ctx.get_model_mgr().search_model(gen.model) - if model is None: - if gen.inference_type == InferenceType.VLLM: - gen.model.model_type = ModelType.VLLM - else: - gen.model.model_type = ModelType.LLM + if req.generator is not None: + pl.generator = [] + for gen in req.generator: + if gen.model is None: + raise Exception("No ChatQnA Model") + if gen.inference_type: + model = ctx.get_model_mgr().search_model(gen.model) + if model is None: + if gen.inference_type == InferenceType.VLLM: + gen.model.model_type = ModelType.VLLM + else: + gen.model.model_type = ModelType.LLM + if pl.enable_benchmark: + model, tokenizer, bench_hook = ctx.get_model_mgr().load_model_ben(gen.model) + else: + model = ctx.get_model_mgr().load_model(gen.model) + ctx.get_model_mgr().add(model) + # Use weakref to achieve model deletion and memory release + model_ref = weakref.ref(model) + if gen.generator_type == GeneratorType.CHATQNA: + pl.generator.append( + QnAGenerator( + model_ref, gen.prompt_path, gen.inference_type, gen.vllm_endpoint, gen.prompt_content + ) + ) + elif gen.generator_type == GeneratorType.FREECHAT: + pl.generator.append(FreeChatGenerator(model_ref, gen.inference_type, gen.vllm_endpoint)) + if pl.enable_benchmark: - model, tokenizer, bench_hook = ctx.get_model_mgr().load_model_ben(gen.model) + if "tokenizer" not in locals() or tokenizer is None: + _, tokenizer, bench_hook = ctx.get_model_mgr().load_model_ben(gen.model) + pl.benchmark = Benchmark(pl.enable_benchmark, gen.inference_type, tokenizer, bench_hook) else: - model = ctx.get_model_mgr().load_model(gen.model) - ctx.get_model_mgr().add(model) - # Use weakref to achieve model deletion and memory release - model_ref = weakref.ref(model) - if gen.generator_type == GeneratorType.CHATQNA: - pl.generator = QnAGenerator( - model_ref, gen.prompt_path, gen.inference_type, gen.vllm_endpoint, gen.prompt_content - ) - elif gen.generator_type == GeneratorType.FREECHAT: - pl.generator = FreeChatGenerator(model_ref, gen.inference_type, gen.vllm_endpoint) - if pl.enable_benchmark: - if "tokenizer" not in locals() or tokenizer is None: - _, tokenizer, bench_hook = ctx.get_model_mgr().load_model_ben(gen.model) - pl.benchmark = Benchmark(pl.enable_benchmark, gen.inference_type, tokenizer, bench_hook) + pl.benchmark = Benchmark(pl.enable_benchmark, gen.inference_type) else: - pl.benchmark = Benchmark(pl.enable_benchmark, gen.inference_type) - else: - raise Exception("Inference Type Not Supported") + raise Exception("Inference Type Not Supported") if pl.status.active != req.active: ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr(), kb_name) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py index 51ed5be6d4..71ee0943e7 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py @@ -3,6 +3,7 @@ from edgecraftrag.api.v1.pipeline import save_pipeline_configurations from edgecraftrag.api_schema import PromptIn +from edgecraftrag.base import GeneratorType from edgecraftrag.context import ctx from edgecraftrag.utils import DEFAULT_TEMPLATE from fastapi import FastAPI, File, HTTPException, UploadFile, status @@ -15,7 +16,7 @@ async def load_prompt_file(file: UploadFile = File(...)): try: pl = ctx.get_pipeline_mgr().get_active_pipeline() - generator = pl.generator + generator = pl.get_generator(GeneratorType.CHATQNA) if generator: content = await file.read() prompt_str = content.decode("utf-8") @@ -31,7 +32,7 @@ async def load_prompt_file(file: UploadFile = File(...)): async def load_prompt(request: PromptIn): try: pl = ctx.get_pipeline_mgr().get_active_pipeline() - generator = pl.generator + generator = pl.get_generator(GeneratorType.CHATQNA) if generator: prompt_str = request.prompt generator.set_prompt(prompt_str) @@ -45,7 +46,7 @@ async def load_prompt(request: PromptIn): @prompt_app.get(path="/v1/chatqna/prompt") async def get_prompt(): try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + generator = ctx.get_pipeline_mgr().get_active_pipeline().get_generator(GeneratorType.CHATQNA) if generator: return generator.original_template except Exception as e: @@ -55,7 +56,7 @@ async def get_prompt(): @prompt_app.get(path="/v1/chatqna/prompt/tagged") async def get_tagged_prompt(): try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + generator = ctx.get_pipeline_mgr().get_active_pipeline().get_generator(GeneratorType.CHATQNA) if generator: return generator.prompt raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Tagged prompt not found") @@ -72,7 +73,7 @@ async def get_default_prompt(): @prompt_app.post(path="/v1/chatqna/prompt/reset") async def reset_prompt(): try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + generator = ctx.get_pipeline_mgr().get_active_pipeline().get_generator(GeneratorType.CHATQNA) if generator: generator.reset_prompt() return "Reset LLM Prompt Successfully" diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py index 077b38339f..00a7631975 100644 --- a/EdgeCraftRAG/edgecraftrag/api_schema.py +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -59,7 +59,7 @@ class PipelineCreateIn(BaseModel): indexer: Optional[IndexerIn] = None retriever: Optional[RetrieverIn] = None postprocessor: Optional[list[PostProcessorIn]] = None - generator: Optional[GeneratorIn] = None + generator: Optional[list[GeneratorIn]] = None active: Optional[bool] = False documents_cache: Optional[Dict] = None @@ -110,6 +110,7 @@ class AgentCreateIn(BaseModel): name: Optional[str] = "" type: Optional[base.AgentType] = None pipeline_idx: Optional[str] = None + pipeline_name: Optional[str] = None configs: Optional[dict] = None active: Optional[bool] = False diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py index 195302451c..53209ca043 100644 --- a/EdgeCraftRAG/edgecraftrag/base.py +++ b/EdgeCraftRAG/edgecraftrag/base.py @@ -21,12 +21,16 @@ class CompType(str, Enum): GENERATOR = "generator" QUERYSEARCH = "querysearch" FILE = "file" - CHUNK_NUM = "chunk_num" KNOWLEDGE = "knowledge" AGENT = "agent" SESSION = "session" +class BenchType(str, Enum): + + CHUNK_NUM = "chunk_num" + + class ModelType(str, Enum): EMBEDDING = "embedding" diff --git a/EdgeCraftRAG/edgecraftrag/components/agent.py b/EdgeCraftRAG/edgecraftrag/components/agent.py index fd6c4ff7a2..525c10b8e3 100644 --- a/EdgeCraftRAG/edgecraftrag/components/agent.py +++ b/EdgeCraftRAG/edgecraftrag/components/agent.py @@ -5,7 +5,7 @@ from abc import abstractmethod from comps.cores.proto.api_protocol import ChatCompletionRequest -from edgecraftrag.base import BaseComponent, CallbackType, CompType +from edgecraftrag.base import BaseComponent, CallbackType, CompType, GeneratorType from edgecraftrag.components.agents.utils import remove_think_tags from edgecraftrag.utils import stream_generator from langgraph.config import get_stream_writer @@ -72,7 +72,9 @@ async def run_pipeline_chatqna(self, request): async def _run_pipeline_generate(self, request): pl = self.get_bound_pipeline() if pl is not None: - return await pl.run(cbtype=CallbackType.GENERATE, chat_request=request) + return await pl.run( + cbtype=CallbackType.GENERATE, chat_request=request, generator_type=GeneratorType.FREECHAT + ) async def run_pipeline_retrieve_and_rerank(self, request): pl = self.get_bound_pipeline() diff --git a/EdgeCraftRAG/edgecraftrag/components/benchmark.py b/EdgeCraftRAG/edgecraftrag/components/benchmark.py index 3bf2a7e602..f91324a9d8 100644 --- a/EdgeCraftRAG/edgecraftrag/components/benchmark.py +++ b/EdgeCraftRAG/edgecraftrag/components/benchmark.py @@ -6,7 +6,7 @@ from typing import Any, List, Optional import requests -from edgecraftrag.base import BaseComponent, CompType, InferenceType, ModelType +from edgecraftrag.base import BaseComponent, BenchType, CompType, InferenceType from prometheus_client.parser import text_string_to_metric_families from pydantic import BaseModel, Field, model_serializer @@ -16,8 +16,12 @@ class Benchmark(BaseComponent): def __init__(self, enable_benchmark, inference_type, tokenizer=None, bench_hook=None): super().__init__() self.enabled = enable_benchmark - self.is_vllm = True if inference_type == InferenceType.VLLM else False - + self.vllm_metrics = {} + if inference_type == InferenceType.VLLM: + self.is_vllm = True + self.vllm_metrics = self.get_vllm_metrics() + else: + self.is_vllm = False self.tokenizer = tokenizer self.bench_hook = bench_hook @@ -51,11 +55,11 @@ def cal_input_token_size(self, input_text_list): def init_benchmark_data(self): pipeline_comp = [ CompType.NODEPARSER, - CompType.CHUNK_NUM, CompType.RETRIEVER, CompType.POSTPROCESSOR, CompType.QUERYSEARCH, CompType.GENERATOR, + BenchType.CHUNK_NUM, ] if self.is_enabled(): with self._idx_lock: @@ -66,25 +70,24 @@ def init_benchmark_data(self): for comp in pipeline_comp: data[comp] = "" data[CompType.NODEPARSER] = 0 - data[CompType.CHUNK_NUM] = 0 - return idx, data + data[BenchType.CHUNK_NUM] = 0 + self.benchmark_data_list[idx] = data + return idx - def update_benchmark_data(self, idx, comp_type, start, end): + def update_benchmark_data(self, idx, comp_type, data): if self.is_enabled() and idx in self.benchmark_data_list and comp_type in self.benchmark_data_list[idx]: - self.benchmark_data_list[idx][comp_type] = end - start + self.benchmark_data_list[idx][comp_type] = data - def insert_benchmark_data(self, benchmark_data): - idx = benchmark_data["idx"] - self.benchmark_data_list[idx] = benchmark_data - self.dict_idx = idx + def get_benchmark_data(self, idx, comp_type): + if self.is_enabled() and idx in self.benchmark_data_list and comp_type in self.benchmark_data_list[idx]: + return self.benchmark_data_list[idx][comp_type] + else: + return None - def insert_llm_data(self, idx, input_token_size): + def insert_llm_data(self, idx, input_token_size=-1): if self.is_enabled(): if self.is_vllm: - metrics = {} - if input_token_size != -1: - metrics["input_token_size"] = input_token_size - metrics = get_vllm_metrics(metrics) + metrics = self.get_vllm_metrics() else: bench_hook = self.bench_hook if bench_hook: @@ -105,6 +108,43 @@ def insert_llm_data(self, idx, input_token_size): self.llm_data_list[idx] = metrics + def get_vllm_metrics(self): + # self.vllm_metrics is the previous vllm metric + vllm_metrics = [ + "vllm:prompt_tokens_total", + "vllm:generation_tokens_total", + "vllm:time_to_first_token_seconds_sum", + "vllm:time_per_output_token_seconds_sum", + "vllm:e2e_request_latency_seconds_sum", + ] + metrics = self.vllm_metrics.copy() + if metrics == {}: + for key in vllm_metrics: + metrics[key] = 0 + + llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8086") + response = requests.get(f"{llm_endpoint}/metrics", headers={"Content-Type": "application/json"}) + if response.status_code == 200: + metrics_data = text_string_to_metric_families(response.text) + else: + return {} + + parsed_metrics = {} + for family in metrics_data: + for sample in family.samples: + parsed_metrics[sample.name] = sample + + current_metrics = {} + for metric in vllm_metrics: + if metric in parsed_metrics: + parsed_value = parsed_metrics[metric].value + current_metrics[metric] = parsed_value + # save single request's metric to metrics by subtracting + metrics[metric] = parsed_value - metrics[metric] + # update self.vllm_metrics to current vllm request's metric + self.vllm_metrics = current_metrics + return metrics + @model_serializer def ser_model(self): if self.enabled: @@ -123,54 +163,3 @@ def ser_model(self): def run(self, **kwargs) -> Any: pass - - -def get_vllm_metrics(metrics): - - llm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8008") - response = requests.get(f"{llm_endpoint}/metrics", headers={"Content-Type": "application/json"}) - if response.status_code == 200: - metrics_data = text_string_to_metric_families(response.text) - else: - return None - - parsed_metrics = {} - for family in metrics_data: - for sample in family.samples: - parsed_metrics[sample.name] = sample - - vllm_metrics = [ - "vllm:prompt_tokens_total", - "vllm:generation_tokens_total", - "vllm:time_to_first_token_seconds_sum", - "vllm:time_to_first_token_seconds_count", - "vllm:time_per_output_token_seconds_sum", - "vllm:time_per_output_token_seconds_count", - "vllm:e2e_request_latency_seconds_sum", - "vllm:e2e_request_latency_seconds_count", - ] - - for metric in vllm_metrics: - if metric in parsed_metrics: - metrics[metric] = parsed_metrics[metric].value - - if "vllm:time_to_first_token_seconds_sum" in metrics and "vllm:time_to_first_token_seconds_count" in metrics: - metrics["average_time_to_first_token_seconds"] = ( - metrics["vllm:time_to_first_token_seconds_sum"] / metrics["vllm:time_to_first_token_seconds_count"] - if metrics["vllm:time_to_first_token_seconds_count"] > 0 - else None - ) - if "vllm:time_per_output_token_seconds_sum" in metrics and "vllm:time_per_output_token_seconds_count" in metrics: - metrics["average_time_per_output_token_seconds"] = ( - metrics["vllm:time_per_output_token_seconds_sum"] / metrics["vllm:time_per_output_token_seconds_count"] - if metrics["vllm:time_per_output_token_seconds_count"] > 0 - else None - ) - if "vllm:e2e_request_latency_seconds_sum" in metrics and "vllm:e2e_request_latency_seconds_count" in metrics: - metrics["average_e2e_request_latency_seconds"] = ( - metrics["vllm:e2e_request_latency_seconds_sum"] / metrics["vllm:e2e_request_latency_seconds_count"] - if metrics["vllm:e2e_request_latency_seconds_count"] > 0 - else None - ) - - return metrics diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py index fe5974e71f..8be6b4b9dc 100644 --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -4,6 +4,7 @@ import asyncio import json import os +import time import urllib.request from concurrent.futures import ThreadPoolExecutor from urllib.parse import urlparse @@ -100,7 +101,9 @@ async def local_stream_generator(lock, llm, prompt_str, unstructured_str): yield f"code:0000{result_error}" -async def stream_generator(llm, prompt_str, unstructured_str): +async def stream_generator(llm, prompt_str, unstructured_str, benchmark=None, benchmark_index=None): + enable_benchmark = benchmark.is_enabled() if benchmark else False + start_time = time.perf_counter() if enable_benchmark else None response = await llm.astream_complete(prompt_str) try: async for r in response: @@ -109,6 +112,10 @@ async def stream_generator(llm, prompt_str, unstructured_str): if unstructured_str: yield unstructured_str await asyncio.sleep(0) + if enable_benchmark: + benchmark.update_benchmark_data(benchmark_index, CompType.GENERATOR, time.perf_counter() - start_time) + benchmark.insert_llm_data(benchmark_index) + except asyncio.CancelledError as e: response.aclose() except Exception as e: @@ -117,6 +124,44 @@ async def stream_generator(llm, prompt_str, unstructured_str): yield f"code:0000{result_error}" +def clone_generator(src_generator: BaseComponent, dst_generator_cfg: dict = None): + if not dst_generator_cfg: + # If no config is provided, do a pure clone. + dst_generator_cfg = {"generator_type": src_generator.comp_subtype} + + if "generator_type" not in dst_generator_cfg: + return None + + generator_type = dst_generator_cfg.get("generator_type") + new_generator = None + + # Prepare shared arguments + shared_args = { + "llm_model": src_generator.llm, + "inference_type": src_generator.inference_type, + "vllm_endpoint": src_generator.vllm_endpoint, + } + + if generator_type == GeneratorType.CHATQNA: + if src_generator.comp_subtype == GeneratorType.FREECHAT: + # It's not possible to clone a QnAGenerator from a FreeChatGenerator + # because there is no prompt info in the source one. + return None + # For QnAGenerator, we also need prompt-related info + qna_args = shared_args.copy() + qna_args.update( + { + "prompt_template_file": src_generator.prompt_template_file, + "prompt_content": src_generator.prompt_content, + } + ) + new_generator = QnAGenerator(**qna_args) + elif generator_type == GeneratorType.FREECHAT: + new_generator = FreeChatGenerator(**shared_args) + + return new_generator + + class QnAGenerator(BaseComponent): def __init__(self, llm_model, prompt_template_file, inference_type, vllm_endpoint, prompt_content, **kwargs): @@ -277,6 +322,8 @@ async def generator(): async def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): # query transformation sub_questions = kwargs.get("sub_questions", None) + benchmark = kwargs.get("benchmark", None) + benchmark_index = kwargs.get("benchmark_index", None) text_gen_context, prompt_str = self.query_transform(chat_request, retrieved_nodes, sub_questions=sub_questions) llm = OpenAILike( api_key="fake", @@ -296,7 +343,7 @@ async def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwar # Asynchronous generator async def generator(): - async for chunk in stream_generator(llm, prompt_str, unstructured_str): + async for chunk in stream_generator(llm, prompt_str, unstructured_str, benchmark, benchmark_index): yield chunk or "" await asyncio.sleep(0) diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index 8bc0a3c15f..59bb8fffc1 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -6,10 +6,20 @@ import os import time from concurrent.futures import ThreadPoolExecutor -from typing import Any, Callable, List, Optional +from typing import Any, Callable, Dict, List, Optional from comps.cores.proto.api_protocol import ChatCompletionRequest -from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType, NodeParserType, RetrieverType +from edgecraftrag.base import ( + BaseComponent, + BenchType, + CallbackType, + CompType, + GeneratorType, + InferenceType, + NodeParserType, + RetrieverType, +) +from edgecraftrag.components.generator import clone_generator from edgecraftrag.components.postprocessor import RerankProcessor from edgecraftrag.components.query_preprocess import query_search from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever @@ -29,7 +39,7 @@ class Pipeline(BaseComponent): indexer: Optional[BaseComponent] = Field(default=None) retriever: Optional[BaseComponent] = Field(default=None) postprocessor: Optional[List[BaseComponent]] = Field(default=None) - generator: Optional[BaseComponent] = Field(default=None) + generator: Optional[List[BaseComponent]] = Field(default=None) benchmark: Optional[BaseComponent] = Field(default=None) status: PipelineStatus = Field(default=PipelineStatus()) run_pipeline_cb: Optional[Callable[..., Any]] = Field(default=None) @@ -56,6 +66,7 @@ def __init__( else: self.documents_cache = {} + self.generator = [] self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" self.run_pipeline_cb = run_pipeline self.run_retriever_postprocessor_cb = run_retrieve_postprocess @@ -153,10 +164,16 @@ async def run(self, **kwargs) -> Any: ) if kwargs["cbtype"] == CallbackType.GENERATE: if "chat_request" in kwargs: - return await self.run_generator_cb(self, chat_request=kwargs["chat_request"]) + generator_type = kwargs.get("generator_type", GeneratorType.CHATQNA) + return await self.run_generator_cb( + self, chat_request=kwargs["chat_request"], generator_type=generator_type + ) if kwargs["cbtype"] == CallbackType.PIPELINE: if "chat_request" in kwargs: - return await self.run_pipeline_cb(self, chat_request=kwargs["chat_request"]) + generator_type = kwargs.get("generator_type", GeneratorType.CHATQNA) + return await self.run_pipeline_cb( + self, chat_request=kwargs["chat_request"], generator_type=generator_type + ) if kwargs["cbtype"] == CallbackType.QUERYSEARCH: if "chat_request" in kwargs: return await self.run_query_search_cb(self, chat_request=kwargs["chat_request"]) @@ -280,27 +297,52 @@ def model_existed(self, model_id: str) -> bool: if hasattr(processor, "model_id") and processor.model_id == model_id: return True if self.generator: - llm = self.generator.llm - if isinstance(llm, str): - return llm == model_id - else: - return llm().model_id == model_id + for generator in self.generator: + llm = generator.llm + if isinstance(llm, str): + return llm == model_id + else: + return llm().model_id == model_id + return False + + def get_generator(self, generator_type: str) -> Optional[BaseComponent]: + if self.generator: + for gen in self.generator: + if gen.comp_subtype == generator_type: + return gen + return None + + def create_freechat_gen_from_chatqna_gen(self) -> bool: + if len(self.generator) == 0 or self.generator[0].comp_subtype != GeneratorType.CHATQNA: + return False + + dst_generator_cfg = {"generator_type": GeneratorType.FREECHAT} + new_gen = clone_generator(self.generator[0], dst_generator_cfg) + if new_gen: + self.generator.append(new_gen) + # update pipeline json + origin_json = json.loads(self._origin_json) + new_gen_config = origin_json["generator"][0].copy() + new_gen_config["generator_type"] = GeneratorType.FREECHAT + new_gen_config.pop("prompt_path", None) + new_gen_config.pop("prompt_content", None) + origin_json["generator"].append(new_gen_config) + self._origin_json = json.dumps(origin_json) + return True return False async def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - benchmark_data = {} query = chat_request.messages top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k contexts = {} start = 0 if pl.enable_benchmark: - _, benchmark_data = pl.benchmark.init_benchmark_data() + benchmark_index = pl.benchmark.init_benchmark_data() start = time.perf_counter() retri_res = pl.retriever.run(query=query, top_k=top_k) if pl.enable_benchmark: - benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start - pl.benchmark.insert_benchmark_data(benchmark_data) + pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) contexts[CompType.RETRIEVER] = retri_res return contexts @@ -325,18 +367,16 @@ async def run_postprocess(pl: Pipeline, chat_request: ChatCompletionRequest, con # Test callback to retrieve and rerank nodes from query async def run_retrieve_postprocess(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - benchmark_data = {} query = chat_request.messages top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k contexts = {} start = 0 if pl.enable_benchmark: - _, benchmark_data = pl.benchmark.init_benchmark_data() + benchmark_index = pl.benchmark.init_benchmark_data() start = time.perf_counter() retri_res = pl.retriever.run(query=query, top_k=top_k) if pl.enable_benchmark: - benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start - pl.benchmark.insert_benchmark_data(benchmark_data) + pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) contexts[CompType.RETRIEVER] = retri_res query_bundle = QueryBundle(query) if pl.postprocessor: @@ -353,17 +393,20 @@ async def run_retrieve_postprocess(pl: Pipeline, chat_request: ChatCompletionReq async def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: start = 0 - benchmark_data = {} if pl.enable_benchmark: - _, benchmark_data = pl.benchmark.init_benchmark_data() + benchmark_index = pl.benchmark.init_benchmark_data() start = time.perf_counter() n = pl.node_parser.run(docs=docs) if pl.indexer is not None: pl.indexer.insert_nodes(n) if pl.enable_benchmark: - benchmark_data[CompType.NODEPARSER] += time.perf_counter() - start - benchmark_data[CompType.CHUNK_NUM] += len(n) - pl.benchmark.insert_benchmark_data(benchmark_data) + benchmark_data = ( + pl.benchmark.get_benchmark_data(benchmark_index, CompType.NODEPARSER) + time.perf_counter() - start + ) + pl.benchmark.update_benchmark_data(benchmark_index, CompType.NODEPARSER, benchmark_data) + + benchmark_data = pl.benchmark.get_benchmark_data(benchmark_index, BenchType.CHUNK_NUM) + len(n) + pl.benchmark.update_benchmark_data(benchmark_index, BenchType.CHUNK_NUM, benchmark_data) return n @@ -386,26 +429,12 @@ def run_async_query_search(): return query, sub_questionss_result -def benchmark_response(ret, benchmark, benchmark_index, benchmark_data, input_token_size, start): - if isinstance(ret, StreamingResponse): - original_body_iterator = ret.body_iterator - - async def timing_wrapper(): - async for chunk in original_body_iterator: - yield chunk - benchmark_data[CompType.GENERATOR] = time.perf_counter() - start - benchmark.insert_llm_data(benchmark_index, input_token_size) - benchmark.insert_benchmark_data(benchmark_data) - - ret.body_iterator = timing_wrapper() - return ret - else: - return ret - - -async def run_pipeline(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: +async def run_pipeline( + pl: Pipeline, chat_request: ChatCompletionRequest, generator_type: str = GeneratorType.CHATQNA +) -> Any: + benchmark_index = -1 if pl.enable_benchmark: - benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() + benchmark_index = pl.benchmark.init_benchmark_data() contexts = {} retri_res = [] active_kb = chat_request.user if chat_request.user else None @@ -423,14 +452,17 @@ async def run_pipeline(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any query = chat_request.messages sub_questionss_result = None experience_status = True if chat_request.tool_choice == "auto" else False + target_generator = pl.get_generator(generator_type) + if target_generator is None: + raise ValueError(f"No Generator ({generator_type}) Specified") if enable_rag_retrieval: start = 0 if pl.enable_benchmark: start = time.perf_counter() - if pl.generator.inference_type == InferenceType.VLLM and experience_status: + if target_generator.inference_type == InferenceType.VLLM and experience_status: query, sub_questionss_result = await run_query_search(pl, chat_request) if pl.enable_benchmark: - benchmark_data[CompType.QUERYSEARCH] = time.perf_counter() - start + pl.benchmark.update_benchmark_data(benchmark_index, CompType.QUERYSEARCH, time.perf_counter() - start) start = time.perf_counter() top_k = ( None @@ -439,11 +471,10 @@ async def run_pipeline(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any ) retri_res = pl.retriever.run(query=query, top_k=top_k) if pl.enable_benchmark: - benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) + start = time.perf_counter() contexts[CompType.RETRIEVER] = retri_res query_bundle = QueryBundle(query) - if pl.enable_benchmark: - start = time.perf_counter() if pl.postprocessor: for processor in pl.postprocessor: if ( @@ -451,46 +482,51 @@ async def run_pipeline(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any and chat_request.top_n != processor.top_n and chat_request.top_n != 0 and chat_request.top_n is not None + and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default ): processor.top_n = chat_request.top_n retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) contexts[CompType.POSTPROCESSOR] = retri_res if pl.enable_benchmark: - benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start - - if pl.generator is None: - raise ValueError("No Generator Specified") + pl.benchmark.update_benchmark_data(benchmark_index, CompType.POSTPROCESSOR, time.perf_counter() - start) if pl.enable_benchmark: - _, prompt_str = pl.generator.query_transform(chat_request, retri_res) + _, prompt_str = target_generator.query_transform(chat_request, retri_res) input_token_size = pl.benchmark.cal_input_token_size(prompt_str) np_type = pl.node_parser.comp_subtype if pl.enable_benchmark: start = time.perf_counter() - if pl.generator.inference_type == InferenceType.LOCAL: - ret = await pl.generator.run(chat_request, retri_res, np_type) - elif pl.generator.inference_type == InferenceType.VLLM: - ret = await pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) + if target_generator.inference_type == InferenceType.LOCAL: + ret = await target_generator.run(chat_request, retri_res, np_type) + elif target_generator.inference_type == InferenceType.VLLM: + ret = await target_generator.run_vllm( + chat_request, + retri_res, + np_type, + sub_questions=sub_questionss_result, + benchmark=pl.benchmark, + benchmark_index=benchmark_index, + ) else: raise ValueError("LLM inference_type not supported") - if pl.enable_benchmark: - end = time.perf_counter() - if isinstance(ret, StreamingResponse): - ret = benchmark_response(ret, pl.benchmark, benchmark_index, benchmark_data, input_token_size, start) - else: - benchmark_data[CompType.GENERATOR] = end - start - pl.benchmark.insert_llm_data(benchmark_index, input_token_size) - pl.benchmark.insert_benchmark_data(benchmark_data) + if not isinstance(ret, StreamingResponse) and pl.enable_benchmark: + pl.benchmark.update_benchmark_data(benchmark_index, CompType.GENERATOR, time.perf_counter() - start) + pl.benchmark.insert_llm_data(benchmark_index, input_token_size) return ret, contexts -async def run_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: +async def run_generator( + pl: Pipeline, chat_request: ChatCompletionRequest, generator_type: str = GeneratorType.CHATQNA +) -> Any: np_type = pl.node_parser.comp_subtype - if pl.generator.inference_type == InferenceType.LOCAL: - ret = await pl.generator.run(chat_request, [], np_type) - elif pl.generator.inference_type == InferenceType.VLLM: - ret = await pl.generator.run_vllm(chat_request, [], np_type) + target_generator = pl.get_generator(generator_type) + if target_generator is None: + raise ValueError(f"No Generator ({generator_type}) Specified") + if target_generator.inference_type == InferenceType.LOCAL: + ret = await target_generator.run(chat_request, [], np_type) + elif target_generator.inference_type == InferenceType.VLLM: + ret = await target_generator.run_vllm(chat_request, [], np_type) else: raise ValueError("LLM inference_type not supported") return ret diff --git a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py index e17ab9d724..1d732e2a93 100644 --- a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py +++ b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py @@ -8,6 +8,7 @@ import aiohttp import numpy +from edgecraftrag.base import GeneratorType from edgecraftrag.config_repository import MilvusConfigRepository from omegaconf import OmegaConf @@ -211,8 +212,9 @@ async def query_search(user_input, SEARCH_CONFIG_PATH, SEARCH_DIR, pl): top1_issue = None sub_questions_result = None - model_id = pl.generator.model_id - vllm_endpoint = pl.generator.vllm_endpoint + generator = pl.get_generator(GeneratorType.CHATQNA) + model_id = generator.model_id + vllm_endpoint = generator.vllm_endpoint maintenance_data = read_json_files(SEARCH_DIR) issues = [] diff --git a/EdgeCraftRAG/edgecraftrag/config_repository.py b/EdgeCraftRAG/edgecraftrag/config_repository.py index 761297f4f7..d00d01596d 100644 --- a/EdgeCraftRAG/edgecraftrag/config_repository.py +++ b/EdgeCraftRAG/edgecraftrag/config_repository.py @@ -6,6 +6,7 @@ import time from typing import Dict, List, Optional +from edgecraftrag.base import GeneratorType from edgecraftrag.env import AGENT_FILE, KNOWLEDGEBASE_FILE, PIPELINE_FILE from pymilvus import ( Collection, @@ -279,7 +280,18 @@ async def save_pipeline_configurations(operation: str = None, pipeline=None): target_data["idx"] = pipeline.idx target_idx = target_data.get("idx") if "generator" in target_data and operation != "delete": - target_data["generator"]["prompt_content"] = pipeline.generator.prompt_content + gens_data = target_data["generator"] + if isinstance(gens_data, list): + for gen in gens_data: + if gen.get("comp_subtype") == GeneratorType.CHATQNA: + chatqna_gen = pipeline.get_generator(GeneratorType.CHATQNA) + if chatqna_gen: + gen["prompt_content"] = chatqna_gen.prompt_content + elif isinstance(gens_data, dict): + chatqna_gen = pipeline.get_generator(GeneratorType.CHATQNA) + if chatqna_gen: + if GeneratorType.CHATQNA in gens_data: + gens_data[GeneratorType.CHATQNA]["prompt_content"] = chatqna_gen.prompt_content target_data["documents_cache"] = pipeline.documents_cache target_data["active"] = pipeline.status.active diff --git a/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py index db03dab3fd..91a24385cb 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py @@ -5,7 +5,7 @@ from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.api_schema import AgentCreateIn -from edgecraftrag.base import AgentType, BaseMgr, CallbackType +from edgecraftrag.base import AgentType, BaseMgr, CallbackType, GeneratorType from edgecraftrag.components.agent import Agent from edgecraftrag.components.agents.deep_search.deep_search import DeepSearchAgent from edgecraftrag.components.agents.simple import SimpleRAGAgent @@ -58,7 +58,11 @@ def create_agent(self, cfgs: AgentCreateIn): self.agents[new_agent.idx] = new_agent if cfgs.active: self.active_agent_idx = new_agent.idx - return new_agent + + # check pipeline freechat generator + if not self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).get_generator(GeneratorType.FREECHAT): + if not self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).create_freechat_gen_from_chatqna_gen(): + return "Create freechat generator for agent bound pipeline failed." else: return "Create Agent failed." diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py index d44a227325..4b296c92ab 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -16,6 +16,7 @@ class PipelineMgr(BaseMgr): def __init__(self): self._active_pipeline = None + self._prev_active_pipeline_name = None self._lock = asyncio.Lock() super().__init__() @@ -43,6 +44,8 @@ def remove_pipeline_by_name_or_id(self, name: str): raise Exception("Pipeline not found...") if pl.status.active: raise Exception("Unable to remove an active pipeline...") + if self._prev_active_pipeline_name and pl.name == self._prev_active_pipeline_name: + raise Exception("Pipeline is currently cached, unable to remove...") pl.node_parser = None pl.indexer = None pl.retriever = None @@ -59,10 +62,12 @@ def remove_pipeline_by_name_or_id(self, name: str): gc.collect() return "Pipeline removed successfully" - def get_pipelines(self): + def get_pipelines(self, gen_type: str = None): + if gen_type: + return [pl for _, pl in self.components.items() if (pl.get_generator(gen_type) is not None)] return [pl for _, pl in self.components.items()] - def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None): + def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None, cache_prev: bool = False): pl = self.get_pipeline_by_name_or_id(name) if pl is None: return @@ -80,12 +85,20 @@ def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None) if prevactive: prevactive.status.active = False prevactive.update_pipeline_json({"active": prevactive.status.active}) + if cache_prev: + self._prev_active_pipeline_name = prevactive.name pl.status.active = True self._active_pipeline = pl def get_active_pipeline(self) -> Pipeline: return self._active_pipeline + def get_prev_active_pipeline_name(self) -> str: + return self._prev_active_pipeline_name + + def clear_prev_active_pipeline_name(self): + self._prev_active_pipeline_name = None + def notify_node_change(self): for _, pl in self.components.items(): pl.set_node_change() diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt index 636d1a047a..6c4b7e4451 100644 --- a/EdgeCraftRAG/edgecraftrag/requirements.txt +++ b/EdgeCraftRAG/edgecraftrag/requirements.txt @@ -3,12 +3,12 @@ EbookLib>=0.18 faiss-cpu>=1.8.0.post1 html2text>=2025.4.15 json-repair==0.52.0 -langchain-core==0.3.81 +langchain-core==0.3.80 langchain-milvus langchain-openai langgraph==0.6.10 llama-index==0.12.36 -llama-index-core==0.13.0 +llama-index-core==0.12.37 llama-index-embeddings-openvino==0.5.2 llama-index-llms-openai==0.3.44 llama-index-llms-openai-like==0.3.4 diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json index cac241ec73..e76922c154 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json @@ -30,17 +30,19 @@ } } ], - "generator": { - "inference_type": "vllm", - "model": { - "model_id": "Qwen/Qwen3-8B", - "model_path": "", - "device": "", - "weight": "" - }, - "generator_type": "chatqna", - "prompt_path": "./default_prompt.txt", - "vllm_endpoint": "" - }, + "generator": [ + { + "inference_type": "vllm", + "model": { + "model_id": "Qwen/Qwen3-8B", + "model_path": "", + "device": "", + "weight": "" + }, + "generator_type": "chatqna", + "prompt_path": "./default_prompt.txt", + "vllm_endpoint": "" + } + ], "active": "True" } diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json index 811e119690..c07e498169 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json @@ -30,16 +30,18 @@ } } ], - "generator": { - "model": { - "model_id": "Qwen/Qwen3-8B", - "model_path": "./models/Qwen/Qwen3-8B/INT4_compressed_weights", - "device": "auto", - "weight": "INT4" - }, - "generator_type": "chatqna", - "prompt_path": "./default_prompt.txt", - "inference_type": "local" - }, + "generator": [ + { + "model": { + "model_id": "Qwen/Qwen3-8B", + "model_path": "./models/Qwen/Qwen3-8B/INT4_compressed_weights", + "device": "auto", + "weight": "INT4" + }, + "generator_type": "chatqna", + "prompt_path": "./default_prompt.txt", + "inference_type": "local" + } + ], "active": "True" } diff --git a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json index cac241ec73..e76922c154 100644 --- a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json @@ -30,17 +30,19 @@ } } ], - "generator": { - "inference_type": "vllm", - "model": { - "model_id": "Qwen/Qwen3-8B", - "model_path": "", - "device": "", - "weight": "" - }, - "generator_type": "chatqna", - "prompt_path": "./default_prompt.txt", - "vllm_endpoint": "" - }, + "generator": [ + { + "inference_type": "vllm", + "model": { + "model_id": "Qwen/Qwen3-8B", + "model_path": "", + "device": "", + "weight": "" + }, + "generator_type": "chatqna", + "prompt_path": "./default_prompt.txt", + "vllm_endpoint": "" + } + ], "active": "True" } diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json index 811e119690..c07e498169 100644 --- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -30,16 +30,18 @@ } } ], - "generator": { - "model": { - "model_id": "Qwen/Qwen3-8B", - "model_path": "./models/Qwen/Qwen3-8B/INT4_compressed_weights", - "device": "auto", - "weight": "INT4" - }, - "generator_type": "chatqna", - "prompt_path": "./default_prompt.txt", - "inference_type": "local" - }, + "generator": [ + { + "model": { + "model_id": "Qwen/Qwen3-8B", + "model_path": "./models/Qwen/Qwen3-8B/INT4_compressed_weights", + "device": "auto", + "weight": "INT4" + }, + "generator_type": "chatqna", + "prompt_path": "./default_prompt.txt", + "inference_type": "local" + } + ], "active": "True" } diff --git a/EdgeCraftRAG/tools/quick_start.sh b/EdgeCraftRAG/tools/quick_start.sh index 909e34dd43..0d82bd6ab8 100755 --- a/EdgeCraftRAG/tools/quick_start.sh +++ b/EdgeCraftRAG/tools/quick_start.sh @@ -8,6 +8,26 @@ WORKPATH=$(dirname "$(pwd)") ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$ip_address +#use python venv +ENV_NAME="ecrag_venv" +python -m venv $ENV_NAME + +# check venv +if [ ! -d "$ENV_NAME" ]; then + echo "Failed to create virtual environment" + exit 1 +fi + +# activate venv +if [ -f "$ENV_NAME/bin/activate" ]; then + source $ENV_NAME/bin/activate +elif [ -f "$ENV_NAME/Scripts/activate" ]; then + source $ENV_NAME/Scripts/activate +else + echo "Failed to activate virtual environment" + exit 1 +fi + get_user_input() { local var_name=$1 local default_value=$2 @@ -47,10 +67,10 @@ function start_vllm_services() { else echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." mkdir -p $MODEL_PATH - pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + python -m pip install --upgrade-strategy eager "optimum-intel[openvino]" optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification - pip install -U huggingface_hub + pip install huggingface_hub huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" fi HF_CACHE="${HOME}/.cache" @@ -128,7 +148,7 @@ function start_services() { if [ "$your_input" == "yes" ]; then echo "start to download models..." mkdir -p $MODEL_PATH - pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + python -m pip install --upgrade-strategy eager "optimum-intel[openvino]" optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification optimum-cli export openvino --model ${LLM_MODEL} ${MODEL_PATH}/${LLM_MODEL}/INT4_compressed_weights --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 @@ -296,10 +316,10 @@ function start_vLLM_B60_services() { else echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." mkdir -p $MODEL_PATH - pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + python -m pip install --upgrade-strategy eager "optimum-intel[openvino]" optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification - pip install -U huggingface_hub + pip install huggingface_hub huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" fi echo "give permission to related path..." diff --git a/EdgeCraftRAG/ui/vue/.env.development b/EdgeCraftRAG/ui/vue/.env.development index ea6834f8a0..77a0ac7575 100644 --- a/EdgeCraftRAG/ui/vue/.env.development +++ b/EdgeCraftRAG/ui/vue/.env.development @@ -4,3 +4,4 @@ ENV = development # Local Api VITE_API_URL = / VITE_CHATBOT_URL = / + diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index 62ad211632..fa4a8b942a 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -1,4 +1,4 @@ -// Copyright (C) 2025 Intel Corporation +// Copyright (C) 2026 Intel Corporation // SPDX-License-Identifier: Apache-2.0 /* eslint-disable */ diff --git a/EdgeCraftRAG/ui/vue/src/api/agent/index.ts b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts index f4b27ed609..ee6fc29b3e 100644 --- a/EdgeCraftRAG/ui/vue/src/api/agent/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts @@ -16,6 +16,15 @@ export const getAgentDetailByName = (name: String) => { method: "get", }); }; + +export const getFreechatList = (params: Object) => { + return request({ + url: "/v1/settings/pipelines", + method: "get", + params, + }); +}; + export const requestAgentCreate = (data: Object) => { return request({ url: "/v1/settings/agents", diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css index d5a513f8c4..fee33c704a 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css @@ -1,9 +1,9 @@ @font-face { font-family: "iconfont"; /* Project id 4784207 */ src: - url("iconfont.woff2?t=1762502048420") format("woff2"), - url("iconfont.woff?t=1762502048420") format("woff"), - url("iconfont.ttf?t=1762502048420") format("truetype"); + url("iconfont.woff2?t=1767509338651") format("woff2"), + url("iconfont.woff?t=1767509338651") format("woff"), + url("iconfont.ttf?t=1767509338651") format("truetype"); } .iconfont { @@ -14,6 +14,10 @@ -moz-osx-font-smoothing: grayscale; } +.icon-agent1:before { + content: "\e64f"; +} + .icon-simple-robot:before { content: "\e604"; } diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js index 6c670f647f..acfed4936f 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js @@ -1,8 +1,8 @@ -// Copyright (C) 2025 Intel Corporation +// Copyright (C) 2026 Intel Corporation // SPDX-License-Identifier: Apache-2.0 (window._iconfont_svg_string_4784207 = - ''), + ''), ((c) => { var l = (a = (a = document.getElementsByTagName("script"))[a.length - 1]).getAttribute("data-injectcss"), a = a.getAttribute("data-disable-injectsvg"); diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json index 54eab718fe..1fe8b6a1a4 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json @@ -5,6 +5,13 @@ "css_prefix_text": "icon-", "description": "", "glyphs": [ + { + "icon_id": "44297135", + "name": "agent", + "font_class": "agent1", + "unicode": "e64f", + "unicode_decimal": 58959 + }, { "icon_id": "13542590", "name": "simple-robot", diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf index 8c90a44513..02ce567f43 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff index b18a749da0..a1d1713b5b 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 index 75fdccb56d..6275c60f1a 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 differ diff --git a/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts b/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts index d6fd8da012..d07b1f9b7a 100644 --- a/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts +++ b/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts @@ -1,4 +1,4 @@ -// Copyright (C) 2025 Intel Corporation +// Copyright (C) 2026 Intel Corporation // SPDX-License-Identifier: Apache-2.0 /* eslint-disable */ diff --git a/EdgeCraftRAG/ui/vue/src/components/TableColumns.vue b/EdgeCraftRAG/ui/vue/src/components/TableColumns.vue index 595c6e2a19..68cd88f170 100644 --- a/EdgeCraftRAG/ui/vue/src/components/TableColumns.vue +++ b/EdgeCraftRAG/ui/vue/src/components/TableColumns.vue @@ -1,10 +1,6 @@ - diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index e2425c2563..a2be5f5894 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -42,6 +42,15 @@ export default { inputTip: "Please enter ", connect: "Connect", detail: "View details", + retry: "Retry", + retrying: "Retrying", + retryAll: "Retry All", + ignore: "Ignore", + ignoreAll: "Ignore All", + replace: "Replace", + replaceAll: "Replace All", + count: "Count", + total: "Total", }, system: { title: "System Status", @@ -90,6 +99,7 @@ export default { deleteTip: "Are you sure delete this pipeline?", notActivatedTip: "There is no available pipeline. Please create or activate it first.", validErr: "Form validation failed !", + urlValidTip: "Test URL or model to proceed.", config: { basic: "Basic", nodeParser: "Node Parser", @@ -113,6 +123,8 @@ export default { rerankDevice: "Rerank run device", generator: "Generator", generatorType: "Generator Type", + generatorAgent: "Agent Generator", + generatorChat: "ChatQnA Generator", llm: "LLM Inference Type", language: "Large Language Model", llmDevice: "LLM run device", @@ -123,6 +135,8 @@ export default { modelName: "Model Name", vllm_url: "vLLM URL", kbadmin: "kbadmin", + addAgent: "Agent Configuration", + deleteAgentTip: "Are you sure you want to delete the agent generator configuration?", }, valid: { nameValid1: "Please input name", @@ -148,7 +162,7 @@ export default { rerank: "Please select Rerank Model", rerankDevice: "Please select Rerank run device", generatorType: "Please select Generator Type", - language: "Please select or enter Large Language Model", + language: "Please select Large Language Model", llmDevice: "Please select LLM run device", weights: "Please select Weights", kb_vector_url: "IP : Port, (e.g. 192.168.1.1:29530)", @@ -295,6 +309,8 @@ export default { successfully: "Successfully ", failed: "Failed", totalTip: "files", + failedFile: "Failed Files", + retryFailed: "Upload failed !", }, request: { pipeline: { @@ -390,7 +406,7 @@ export default { delConfigs: "Delete Config", noData: "No settings configured", think: "Agent Thinking", - pipeline: "Pipeline ID", + pipeline: "Pipeline Name", label: { name: "Agent Name", type: "Agent Type ", @@ -406,7 +422,7 @@ export default { configs: "Please enter agent configs", key: "Please enter config key", value: "Please enter config value", - notPipeline: "Please create Pipeline with the generator type set to FreeChat first.", + notPipeline: "Please create a pipeline first.", }, }, }; diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index bcfe4c6540..7a3fca252e 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -42,6 +42,15 @@ export default { inputTip: "请输入 ", connect: "连接", detail: "查看详情", + retry: "重试", + retrying: "重试中", + retryAll: "全部重试", + ignore: "忽略", + ignoreAll: "全部忽略", + replace: "替换", + replaceAll: "全部替换", + count: "总数", + total: "总计", }, system: { title: "系统状态", @@ -88,7 +97,7 @@ export default { deleteTip: "您确定要删除此Pipeline吗 ?此操作不可恢复。", notActivatedTip: "当前无可用Pipeline,请先配置或激活。", validErr: "表单验证失败!", - + urlValidTip: "URL 或模型测试通过后方可继续", config: { basic: "基础", nodeParser: "节点解析器", @@ -112,6 +121,8 @@ export default { rerankDevice: "模型运行设备", generator: "生成器", generatorType: "生成器类型", + generatorAgent: "智能体生成器", + generatorChat: "会话生成器", llm: "推理类型", language: "语言大模型", llmDevice: "运行设备", @@ -122,6 +133,8 @@ export default { modelName: "模型名称", vllm_url: "vLLM 地址", kbadmin: "kbadmin", + addAgent: "智能体生成器配置", + deleteAgentTip: "您确定要删除智能生成器体配置吗?", }, valid: { nameValid1: "请输入名称", @@ -147,7 +160,7 @@ export default { rerank: "请选择重排模型", rerankDevice: "请选择重排模型运行设备", generatorType: "请选择生成器类型", - language: "请选择或输入大语言模型", + language: "请选择大语言模型", llmDevice: "请选择大语言模型运行设备", weights: "请选择模型权重", kb_vector_url: "IP : 端口,(例如 192.168.1.1:29530)", @@ -291,6 +304,8 @@ export default { successfully: "成功", failed: "失败", totalTip: "个文件", + failedFile: "失败文件", + retryFailed: "上传失败!", }, request: { pipeline: { @@ -387,7 +402,7 @@ export default { delConfigs: "删除配置", noData: "暂无配置", think: "智能体思考", - pipeline: "Pipeline ID", + pipeline: "Pipeline 名称", label: { name: "智能体名称", type: "智能体类型 ", @@ -403,7 +418,7 @@ export default { configs: "请录入智能体配置", key: "请输入配置名称", value: "请输入配置内容", - notPipeline: "请先创建生成器类型为 FreeChat 的 Pipeline", + notPipeline: "请先创建Pipeline", }, }, }; diff --git a/EdgeCraftRAG/ui/vue/src/types/global.d.ts b/EdgeCraftRAG/ui/vue/src/types/global.d.ts index f21cef6f78..29cff5c80c 100644 --- a/EdgeCraftRAG/ui/vue/src/types/global.d.ts +++ b/EdgeCraftRAG/ui/vue/src/types/global.d.ts @@ -107,7 +107,7 @@ declare interface TableType { } // Table Pagination -declare interface paginationType { +declare interface PaginationType { total: number; pageNum: number; pageSize: number; @@ -117,15 +117,14 @@ declare interface paginationType { // Table Columns declare type TableColumns = { title: string; - key?: string; - dataIndex: string | string[]; - width?: number | string; - align?: "left" | "center" | "right"; - ellipsis?: boolean; + key: string; + dataIndex?: string | string[]; + children?: TableColumns[]; + disabled?: boolean; visible?: boolean; - fixed?: "left" | "right" | true | undefined; -} & { - [key: string]: any; + fixed?: string | boolean; + minWidth?: number; + ellipsis?: boolean; }; // Dialog diff --git a/EdgeCraftRAG/ui/vue/src/utils/notification.ts b/EdgeCraftRAG/ui/vue/src/utils/notification.ts index 151141e0ea..b4f496577c 100644 --- a/EdgeCraftRAG/ui/vue/src/utils/notification.ts +++ b/EdgeCraftRAG/ui/vue/src/utils/notification.ts @@ -1,9 +1,9 @@ // Copyright (C) 2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -import { h } from "vue"; -import { notification } from "ant-design-vue"; import { CheckCircleFilled, CloseCircleFilled, ExclamationCircleFilled, InfoCircleFilled } from "@ant-design/icons-vue"; +import { notification } from "ant-design-vue"; +import { h } from "vue"; const getNotificationIcon = (type: string) => { switch (type) { @@ -31,5 +31,6 @@ export const customNotification = ( message, description, icon: styledIcon, + duration: 3, }); }; diff --git a/EdgeCraftRAG/ui/vue/src/utils/validate.ts b/EdgeCraftRAG/ui/vue/src/utils/validate.ts index 9ceda5d770..f8c347150d 100644 --- a/EdgeCraftRAG/ui/vue/src/utils/validate.ts +++ b/EdgeCraftRAG/ui/vue/src/utils/validate.ts @@ -37,7 +37,7 @@ export const isValidPipelineName = (name: string): boolean => { export const validateServiceAddress = (url: string): boolean => { const regex = - /^(http:\/\/)(([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}|localhost|[0-9]{1,3}(\.[0-9]{1,3}){3})(:[0-9]+)?$/; + /^(https?:\/\/)(([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,6}|localhost|[0-9]{1,3}(\.[0-9]{1,3}){3})(:[0-9]+)?$/; return regex.test(url); }; diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue index c81057bb8e..d4abe772df 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue @@ -5,12 +5,9 @@
- {{ $t("experience.import") }} + {{ + $t("experience.import") + }} diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts index f508282abd..125d512380 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts @@ -21,8 +21,8 @@ const getTableColumns = (t: (key: string) => string): TableColumns[] => [ }, { title: t("agent.pipeline"), - dataIndex: "pipeline_idx", - key: "pipeline_idx", + dataIndex: "pipeline_name", + key: "pipeline_name", minWidth: 100, ellipsis: true, visible: true, diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DetailDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DetailDrawer.vue index 9a1853e970..cb29453790 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DetailDrawer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DetailDrawer.vue @@ -19,7 +19,7 @@

{{ $t("agent.pipeline") }} {{ formData.pipeline_idx }} + >{{ formData.pipeline_name }} diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue index 6d1498551c..7b53adec77 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue @@ -64,6 +64,7 @@ v-model:pageSize="paginationData.pageSize" showSizeChanger :total="paginationData.total" + :show-total="total => `${$t('common.total')}: ${total}`" />

@@ -92,8 +93,8 @@ }); const emit = defineEmits(["create", "update", "search", "view"]); - const paginationData = reactive({ - total: props.tableData.length || 0, + const paginationData = reactive({ + total: 0, pageNum: 1, pageSize: 10, }); @@ -132,6 +133,13 @@ }, }); }; + watch( + () => props.tableData, + newData => { + paginationData.total = newData.length; + }, + { immediate: true } + ); diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Generator.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Generator.vue index 9e2b993470..7165861930 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Generator.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Generator.vue @@ -1,235 +1,345 @@