diff --git a/conserver/links/hugging_llm_link/README.md b/conserver/links/hugging_llm_link/README.md
index f06ff8c..4a21d00 100644
--- a/conserver/links/hugging_llm_link/README.md
+++ b/conserver/links/hugging_llm_link/README.md
@@ -20,6 +20,13 @@ The link can be configured through environment variables or the link configurati
 HUGGINGFACE_API_KEY=your-api-key  # Required for API-based inference
 ```
 
+> **Local inference is opt-in.** The default path (`use_local_model: false`) calls the
+> HuggingFace HTTP API and pulls in no ML dependencies. Setting `use_local_model: true`
+> runs the model on-device via `transformers`, which is **not** part of the base
+> `conserver` install. Install it with `uv sync --group conserver --group conserver-local`
+> and add a model backend (e.g. `torch`) yourself. Without it, the local path raises a
+> clear `ImportError` at runtime.
+
 Additional configuration options:
 - `model`: HuggingFace model ID (default: "meta-llama/Llama-2-70b-chat-hf")
 - `use_local_model`: Toggle local model inference (default: false)
diff --git a/conserver/links/hugging_llm_link/__init__.py b/conserver/links/hugging_llm_link/__init__.py
index f345020..9791990 100644
--- a/conserver/links/hugging_llm_link/__init__.py
+++ b/conserver/links/hugging_llm_link/__init__.py
@@ -32,7 +32,6 @@
     RetryError,
     before_sleep_log,
 )
-import transformers
 import anyio
 
 # Local imports
@@ -154,6 +153,24 @@ class LocalHuggingFaceLLM(BaseLLM):
 
     def __init__(self, config: LLMConfig):
         super().__init__(config)
+        # transformers (and a backend such as torch) is an optional dependency.
+        # The default, API-based path (use_local_model=false) does not need it, so
+        # the import is deferred to here rather than loaded at module import time.
+        try:
+            import transformers
+        except ModuleNotFoundError as e:
+            # Only translate the "transformers itself is missing" case. If transformers
+            # is present but raises while importing a transitive dep/backend, re-raise the
+            # original error so the real cause isn't masked by the guidance below.
+            if e.name != "transformers":
+                raise
+            raise ImportError(
+                "Local HuggingFace inference requires the optional 'conserver-local' "
+                "dependency group plus a model backend (e.g. torch). "
+                "Install it with: uv sync --group conserver --group conserver-local. "
+                "The default path (use_local_model=false) calls the HuggingFace API "
+                "and needs none of this."
+            ) from e
         logger.info(f"Initializing local model: {self.config.model}")
         device = "cpu"  # Always use CPU for local models
         logger.info(f"Using device: {device}")
diff --git a/conserver/links/hugging_llm_link/main.py b/conserver/links/hugging_llm_link/main.py
index a76d3a7..ce04224 100644
--- a/conserver/links/hugging_llm_link/main.py
+++ b/conserver/links/hugging_llm_link/main.py
@@ -1,5 +1,4 @@
-from typing import Dict, Any, Optional
-from transformers import pipeline
+from typing import Any, Dict
 
 AUDIT_META = {
     "third_party_service": "Hugging Face",
@@ -21,6 +20,16 @@ def __init__(self, model_name: str = "facebook/bart-large-mnli", **kwargs):
             model_name: The HuggingFace model to use
             **kwargs: Additional arguments passed to the model pipeline
         """
+        # transformers is an optional dependency (group: conserver-local); import lazily.
+        try:
+            from transformers import pipeline
+        except ModuleNotFoundError as e:
+            if e.name != "transformers":
+                raise
+            raise ImportError(
+                "HuggingLLMLink requires the optional 'conserver-local' dependency group. "
+                "Install it with: uv sync --group conserver --group conserver-local."
+            ) from e
         self.classifier = pipeline(
             "zero-shot-classification", model=model_name, **kwargs
         )
diff --git a/pyproject.toml b/pyproject.toml
index 556b4e8..50bf72c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,6 @@ api = [
 conserver = [
     {include-group = "storage"},
     # Processing links
-    "transformers>=4.48.0",
     "openai>=1.60.0",
     "groq>=0.4.0",
     "deepgram-sdk>=3.1.5,<4.0.0",
@@ -68,6 +67,16 @@ conserver = [
     "watchdog",
 ]
 
+# Optional: on-device HuggingFace inference for hugging_llm_link's local-model path.
+# transformers is ~48MB (plus tokenizers/huggingface_hub, ~67MB total) and additionally
+# needs a model backend such as torch installed separately. The default hugging_llm_link
+# path (use_local_model=false) calls the HuggingFace HTTP API and needs none of this, so
+# transformers is kept out of the base conserver install.
+# Install with: uv sync --group conserver --group conserver-local
+conserver-local = [
+    "transformers>=4.48.0",
+]
+
 # Development / test dependencies.
 dev = [
     "black>=24.2.0",
diff --git a/uv.lock b/uv.lock
index c343e11..aaf9aaa 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2356,9 +2356,11 @@ conserver = [
     { name = "pymilvus" },
     { name = "pymongo" },
     { name = "slack-sdk" },
-    { name = "transformers" },
     { name = "watchdog" },
 ]
+conserver-local = [
+    { name = "transformers" },
+]
 dev = [
     { name = "anyio" },
     { name = "black" },
@@ -2432,9 +2434,9 @@ conserver = [
     { name = "pymilvus", specifier = ">=2.3.0" },
     { name = "pymongo", specifier = ">=4.7.2" },
     { name = "slack-sdk", specifier = ">=3.27.1" },
-    { name = "transformers", specifier = ">=4.48.0" },
     { name = "watchdog" },
 ]
+conserver-local = [{ name = "transformers", specifier = ">=4.48.0" }]
 dev = [
     { name = "anyio", specifier = ">=4.8.0" },
     { name = "black", specifier = ">=24.2.0" },