diff --git a/conserver/links/hugging_llm_link/README.md b/conserver/links/hugging_llm_link/README.md index f06ff8c..4a21d00 100644 --- a/conserver/links/hugging_llm_link/README.md +++ b/conserver/links/hugging_llm_link/README.md @@ -20,6 +20,13 @@ The link can be configured through environment variables or the link configurati HUGGINGFACE_API_KEY=your-api-key # Required for API-based inference ``` +> **Local inference is opt-in.** The default path (`use_local_model: false`) calls the +> HuggingFace HTTP API and pulls in no ML dependencies. Setting `use_local_model: true` +> runs the model on-device via `transformers`, which is **not** part of the base +> `conserver` install. Install it with `uv sync --group conserver --group conserver-local` +> and add a model backend (e.g. `torch`) yourself. Without it, the local path raises a +> clear `ImportError` at runtime. + Additional configuration options: - `model`: HuggingFace model ID (default: "meta-llama/Llama-2-70b-chat-hf") - `use_local_model`: Toggle local model inference (default: false) diff --git a/conserver/links/hugging_llm_link/__init__.py b/conserver/links/hugging_llm_link/__init__.py index f345020..9791990 100644 --- a/conserver/links/hugging_llm_link/__init__.py +++ b/conserver/links/hugging_llm_link/__init__.py @@ -32,7 +32,6 @@ RetryError, before_sleep_log, ) -import transformers import anyio # Local imports @@ -154,6 +153,24 @@ class LocalHuggingFaceLLM(BaseLLM): def __init__(self, config: LLMConfig): super().__init__(config) + # transformers (and a backend such as torch) is an optional dependency. + # The default, API-based path (use_local_model=false) does not need it, so + # the import is deferred to here rather than loaded at module import time. + try: + import transformers + except ModuleNotFoundError as e: + # Only translate the "transformers itself is missing" case. If transformers + # is present but raises while importing a transitive dep/backend, re-raise the + # original error so the real cause isn't masked by the guidance below. + if e.name != "transformers": + raise + raise ImportError( + "Local HuggingFace inference requires the optional 'conserver-local' " + "dependency group plus a model backend (e.g. torch). " + "Install it with: uv sync --group conserver --group conserver-local. " + "The default path (use_local_model=false) calls the HuggingFace API " + "and needs none of this." + ) from e logger.info(f"Initializing local model: {self.config.model}") device = "cpu" # Always use CPU for local models logger.info(f"Using device: {device}") diff --git a/conserver/links/hugging_llm_link/main.py b/conserver/links/hugging_llm_link/main.py index a76d3a7..ce04224 100644 --- a/conserver/links/hugging_llm_link/main.py +++ b/conserver/links/hugging_llm_link/main.py @@ -1,5 +1,4 @@ -from typing import Dict, Any, Optional -from transformers import pipeline +from typing import Any, Dict AUDIT_META = { "third_party_service": "Hugging Face", @@ -21,6 +20,16 @@ def __init__(self, model_name: str = "facebook/bart-large-mnli", **kwargs): model_name: The HuggingFace model to use **kwargs: Additional arguments passed to the model pipeline """ + # transformers is an optional dependency (group: conserver-local); import lazily. + try: + from transformers import pipeline + except ModuleNotFoundError as e: + if e.name != "transformers": + raise + raise ImportError( + "HuggingLLMLink requires the optional 'conserver-local' dependency group. " + "Install it with: uv sync --group conserver --group conserver-local." + ) from e self.classifier = pipeline( "zero-shot-classification", model=model_name, **kwargs ) diff --git a/pyproject.toml b/pyproject.toml index 556b4e8..50bf72c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,6 @@ api = [ conserver = [ {include-group = "storage"}, # Processing links - "transformers>=4.48.0", "openai>=1.60.0", "groq>=0.4.0", "deepgram-sdk>=3.1.5,<4.0.0", @@ -68,6 +67,16 @@ conserver = [ "watchdog", ] +# Optional: on-device HuggingFace inference for hugging_llm_link's local-model path. +# transformers is ~48MB (plus tokenizers/huggingface_hub, ~67MB total) and additionally +# needs a model backend such as torch installed separately. The default hugging_llm_link +# path (use_local_model=false) calls the HuggingFace HTTP API and needs none of this, so +# transformers is kept out of the base conserver install. +# Install with: uv sync --group conserver --group conserver-local +conserver-local = [ + "transformers>=4.48.0", +] + # Development / test dependencies. dev = [ "black>=24.2.0", diff --git a/uv.lock b/uv.lock index c343e11..aaf9aaa 100644 --- a/uv.lock +++ b/uv.lock @@ -2356,9 +2356,11 @@ conserver = [ { name = "pymilvus" }, { name = "pymongo" }, { name = "slack-sdk" }, - { name = "transformers" }, { name = "watchdog" }, ] +conserver-local = [ + { name = "transformers" }, +] dev = [ { name = "anyio" }, { name = "black" }, @@ -2432,9 +2434,9 @@ conserver = [ { name = "pymilvus", specifier = ">=2.3.0" }, { name = "pymongo", specifier = ">=4.7.2" }, { name = "slack-sdk", specifier = ">=3.27.1" }, - { name = "transformers", specifier = ">=4.48.0" }, { name = "watchdog" }, ] +conserver-local = [{ name = "transformers", specifier = ">=4.48.0" }] dev = [ { name = "anyio", specifier = ">=4.8.0" }, { name = "black", specifier = ">=24.2.0" },