diff --git a/common/storage/milvus/README.md b/common/storage/milvus/README.md index 53a5736..bfbda70 100644 --- a/common/storage/milvus/README.md +++ b/common/storage/milvus/README.md @@ -2,6 +2,12 @@ This module implements vector database storage using Milvus for the vCon server. +> **Optional dependency.** `pymilvus` (which pulls in pandas + grpcio, ~85MB the +> rest of vcon-server does not use) is **not** part of the base `storage` group. +> Install it explicitly to use this backend: +> `uv sync --group storage --group storage-milvus`. Without it, importing +> `storage.milvus` will fail with `ImportError`/`ModuleNotFoundError` due to the missing dependency. + ## Overview Milvus storage provides high-performance vector similarity search capabilities, ideal for storing and retrieving vector embeddings of vCon data. It's particularly useful for semantic search and similarity matching applications. diff --git a/common/storage/milvus/test_milvus.py b/common/storage/milvus/test_milvus.py index 9ed9073..174091b 100644 --- a/common/storage/milvus/test_milvus.py +++ b/common/storage/milvus/test_milvus.py @@ -1,6 +1,11 @@ import pytest from unittest.mock import patch, MagicMock, mock_open +# pymilvus/openai are optional dependencies (group: storage-milvus). storage.milvus +# imports them at module load, so skip this whole module when they aren't installed. +pytest.importorskip("pymilvus") +pytest.importorskip("openai") + from lib.vcon_redis import VconRedis from vcon import Vcon from storage.milvus import ( diff --git a/common/storage/milvus/test_milvus_branches.py b/common/storage/milvus/test_milvus_branches.py index b0e6867..9b3fd90 100644 --- a/common/storage/milvus/test_milvus_branches.py +++ b/common/storage/milvus/test_milvus_branches.py @@ -2,6 +2,11 @@ import pytest +# pymilvus/openai are optional dependencies (group: storage-milvus). storage.milvus +# imports them at module load, so skip this whole module when they aren't installed. +pytest.importorskip("pymilvus") +pytest.importorskip("openai") + from storage import milvus as milvus_module from storage.milvus import ( check_vcon_exists, diff --git a/docker/Dockerfile b/docker/Dockerfile index 1642b09..15a06d8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,9 +29,11 @@ COPY pyproject.toml uv.lock /app/ # Install all groups (conserver + api + dev) so the image works for both # running services and running pytest. +# storage-milvus is included here (but NOT in the production Dockerfile.conserver / +# Dockerfile.api images) so the Milvus storage tests run in CI rather than skip. # Venv at /opt/venv so docker-compose volume mounts don't wipe it. RUN uv venv --seed /opt/venv && \ - UV_PROJECT_ENVIRONMENT=/opt/venv uv sync --frozen --group conserver --group api --group dev + UV_PROJECT_ENVIRONMENT=/opt/venv uv sync --frozen --group conserver --group api --group dev --group storage-milvus ENV PATH="/opt/venv/bin:$PATH" # Auto-install OTel instrumentation packages for the installed libraries. diff --git a/pyproject.toml b/pyproject.toml index 556b4e8..42d0988 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,13 +34,23 @@ dependencies = [ storage = [ "pymongo>=4.7.2", "elasticsearch>=8.13.1,<9.0.0", - "pymilvus>=2.3.0", "msal>=1.32.3", "boto3>=1.34.52", "psycopg2-binary>=2.9.9", "peewee>=3.17.1", ] +# Optional: Milvus vector-database storage backend (storage.milvus). +# pymilvus drags in pandas (~48MB) and grpcio (~37MB), neither of which the +# rest of vcon-server uses, so it is kept out of the base `storage` group. +# Includes openai for the embedding calls the backend makes. Only installs +# pymilvus when explicitly requested; storage.milvus raises a clear ImportError +# otherwise. Install with: uv sync --group storage --group storage-milvus +storage-milvus = [ + "pymilvus>=2.3.0", + "openai>=1.60.0", +] + # API service dependencies — HTTP layer and API key management. # Install with: uv sync --group api api = [ diff --git a/uv.lock b/uv.lock index c343e11..070a12f 100644 --- a/uv.lock +++ b/uv.lock @@ -2336,7 +2336,6 @@ api = [ { name = "msal" }, { name = "peewee" }, { name = "psycopg2-binary" }, - { name = "pymilvus" }, { name = "pymongo" }, { name = "starlette" }, { name = "uvicorn" }, @@ -2353,7 +2352,6 @@ conserver = [ { name = "peewee" }, { name = "psycopg2-binary" }, { name = "pydub" }, - { name = "pymilvus" }, { name = "pymongo" }, { name = "slack-sdk" }, { name = "transformers" }, @@ -2377,9 +2375,12 @@ storage = [ { name = "msal" }, { name = "peewee" }, { name = "psycopg2-binary" }, - { name = "pymilvus" }, { name = "pymongo" }, ] +storage-milvus = [ + { name = "openai" }, + { name = "pymilvus" }, +] [package.metadata] requires-dist = [ @@ -2412,7 +2413,6 @@ api = [ { name = "msal", specifier = ">=1.32.3" }, { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, { name = "starlette", specifier = ">=0.40.0" }, { name = "uvicorn", specifier = "==0.23.2" }, @@ -2429,7 +2429,6 @@ conserver = [ { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, { name = "pydub", specifier = ">=0.25.1" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, { name = "slack-sdk", specifier = ">=3.27.1" }, { name = "transformers", specifier = ">=4.48.0" }, @@ -2453,9 +2452,12 @@ storage = [ { name = "msal", specifier = ">=1.32.3" }, { name = "peewee", specifier = ">=3.17.1" }, { name = "psycopg2-binary", specifier = ">=2.9.9" }, - { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, ] +storage-milvus = [ + { name = "openai", specifier = ">=1.60.0" }, + { name = "pymilvus", specifier = ">=2.3.0" }, +] [[package]] name = "watchdog"