diff --git a/README.md b/README.md
index 172da1b..54b7eb7 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Unified telemetry and error tracking for OpenAdapt packages.
 - **Usage Counters (PostHog)**: Lightweight product usage events for adoption metrics
 - **Privacy-First Design**: Automatic PII scrubbing and path sanitization
 - **Configurable Opt-Out**: Respects `DO_NOT_TRACK` and custom environment variables
-- **CI/Dev Mode Detection**: Automatically tags internal usage for filtering
+- **Internal Usage Tagging**: Explicit flags + CI detection with optional git heuristic
 - **GlitchTip/Sentry Compatible**: Uses the Sentry SDK for maximum compatibility
 
 ## Installation
@@ -111,6 +111,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
 | `OPENADAPT_TELEMETRY_ENABLED` | `true` | Enable/disable telemetry |
 | `OPENADAPT_INTERNAL` | `false` | Tag as internal usage |
 | `OPENADAPT_DEV` | `false` | Development mode |
+| `OPENADAPT_INTERNAL_FROM_GIT` | `false` | Optional: tag as internal when running from a git checkout |
 | `OPENADAPT_TELEMETRY_DSN` | - | GlitchTip/Sentry DSN |
 | `OPENADAPT_POSTHOG_PROJECT_API_KEY` | embedded default | PostHog ingestion project token (`phc_...`) |
 | `OPENADAPT_POSTHOG_HOST` | `https://us.i.posthog.com` | PostHog ingestion host |
@@ -120,6 +121,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
 | `OPENADAPT_TELEMETRY_ENVIRONMENT` | `production` | Environment name |
 | `OPENADAPT_TELEMETRY_SAMPLE_RATE` | `1.0` | Error sampling rate (0.0-1.0) |
 | `OPENADAPT_TELEMETRY_TRACES_SAMPLE_RATE` | `0.01` | Performance sampling rate |
+| `OPENADAPT_TELEMETRY_ANON_SALT` | generated | Optional anonymization salt override (advanced use only) |
 
 ### Configuration File
 
@@ -178,6 +180,10 @@ export OPENADAPT_TELEMETRY_ENABLED=false
 - File paths have usernames replaced with `<user>`
 - Sensitive fields (password, token, api_key, etc.) are redacted
 - Email addresses and phone numbers are scrubbed from messages
+- Top-level event messages/logentry strings are scrubbed
+- Tag keys are validated, sensitive/invalid keys are dropped, and values are scrubbed before upload
+- User IDs are HMAC-anonymized before upload (`anon:v2:<hash>`)
+- `send_default_pii` is enforced to `false` by the client
 
 ## Internal Usage Tagging
 
@@ -185,9 +191,8 @@ Internal/developer usage is automatically detected via:
 
 1. `OPENADAPT_INTERNAL=true` environment variable
 2. `OPENADAPT_DEV=true` environment variable
-3. Running from source (not frozen executable)
-4. Git repository present in working directory
-5. CI environment detected (GitHub Actions, GitLab CI, etc.)
+3. CI environment detected (GitHub Actions, GitLab CI, etc.)
+4. Optional git repository heuristic when `OPENADAPT_INTERNAL_FROM_GIT=true`
 
 Filter in GlitchTip:
 ```
diff --git a/src/openadapt_telemetry/client.py b/src/openadapt_telemetry/client.py
index 7dfb81a..d25da41 100644
--- a/src/openadapt_telemetry/client.py
+++ b/src/openadapt_telemetry/client.py
@@ -9,13 +9,17 @@
 import os
 import platform
 import sys
+import warnings
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Callable, Dict, Optional
 
 import sentry_sdk
+from sentry_sdk.types import Event, Hint
 
 from .config import TelemetryConfig, load_config
-from .privacy import create_before_send_filter
+from .privacy import anonymize_identifier, create_before_send_filter
+
+BeforeSendFn = Callable[[Event, Hint], Optional[Event]]
 
 
 def is_running_from_executable() -> bool:
@@ -56,12 +60,11 @@ def is_ci_environment() -> bool:
 def is_internal_user() -> bool:
     """Determine if current usage is from internal team.
 
-    Uses multiple heuristics to detect internal/developer usage:
+    Uses multiple signals to detect internal/developer usage:
     1. Explicit OPENADAPT_INTERNAL environment variable
     2. OPENADAPT_DEV environment variable
-    3. Not running from frozen executable
-    4. Git repository present in current directory
-    5. CI environment detected
+    3. CI environment detected
+    4. Optional git repository heuristic when OPENADAPT_INTERNAL_FROM_GIT=true
 
     Returns:
         True if this appears to be internal usage.
@@ -74,21 +77,30 @@ def is_internal_user() -> bool:
     if os.getenv("OPENADAPT_DEV", "").lower() in ("true", "1", "yes"):
         return True
 
-    # Method 3: Not running from executable (indicates dev mode)
-    if not is_running_from_executable():
-        return True
-
-    # Method 4: Git repository present (development checkout)
-    if Path(".git").exists() or Path("../.git").exists():
-        return True
-
-    # Method 5: CI/CD environment
+    # Method 3: CI/CD environment
     if is_ci_environment():
         return True
 
+    # Method 4: optional git heuristic
+    if os.getenv("OPENADAPT_INTERNAL_FROM_GIT", "").lower() in ("true", "1", "yes"):
+        if Path(".git").exists() or Path("../.git").exists():
+            return True
+
     return False
 
 
+def _compose_before_send(base: BeforeSendFn, extra: BeforeSendFn) -> BeforeSendFn:
+    """Compose custom before_send before final privacy filtering."""
+
+    def composed(event: Event, hint: Hint) -> Optional[Event]:
+        modified = extra(event, hint)
+        if modified is None:
+            return None
+        return base(modified, hint)
+
+    return composed
+
+
 class TelemetryClient:
     """Unified telemetry client for all OpenAdapt packages.
 
@@ -128,20 +140,13 @@ def reset_instance(cls) -> None:
     def _check_enabled(self) -> bool:
         """Check if telemetry should be enabled.
 
-        Checks environment variables for opt-out signals.
+        Uses merged config with defaults/env/file precedence.
 
         Returns:
             True if telemetry should be enabled.
         """
-        # Universal opt-out (DO_NOT_TRACK standard)
-        if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
-            return False
-
-        # Package-specific opt-out
-        if os.getenv("OPENADAPT_TELEMETRY_ENABLED", "").lower() in ("false", "0", "no"):
-            return False
-
-        return True
+        self._config = load_config()
+        return bool(self._config.enabled)
 
     @property
     def enabled(self) -> bool:
@@ -187,10 +192,8 @@ def initialize(
         Returns:
             True if initialization succeeded, False if disabled or already initialized.
         """
-        if not self._enabled:
-            return False
-
-        if self._initialized and not kwargs.get("force", False):
+        force = bool(kwargs.pop("force", False))
+        if self._initialized and not force:
             return True
 
         # Load configuration
@@ -201,13 +204,35 @@ def initialize(
             self._config.dsn = dsn
         if environment:
             self._config.environment = environment
+        self._enabled = bool(self._config.enabled)
+
+        if not self._enabled:
+            return False
 
         # Skip if no DSN configured
         if not self._config.dsn:
             return False
 
-        # Create privacy filter
-        before_send = create_before_send_filter()
+        # Always enforce privacy scrubber first; optional custom filter can run afterward.
+        base_before_send = create_before_send_filter()
+        custom_before_send = kwargs.pop("before_send", None)
+        if custom_before_send is not None:
+            if not callable(custom_before_send):
+                raise TypeError("before_send must be callable")
+            warnings.warn(
+                "Custom before_send runs before OpenAdapt privacy filtering; final payload is always scrubbed.",
+                stacklevel=2,
+            )
+            before_send = _compose_before_send(base_before_send, custom_before_send)
+        else:
+            before_send = base_before_send
+
+        if "send_default_pii" in kwargs:
+            kwargs.pop("send_default_pii")
+            warnings.warn(
+                "Ignoring sentry init override for send_default_pii; OpenAdapt telemetry enforces send_default_pii=False.",
+                stacklevel=2,
+            )
 
         # Initialize Sentry SDK
         sentry_kwargs = {
@@ -215,14 +240,13 @@ def initialize(
             "environment": self._config.environment,
             "sample_rate": self._config.sample_rate,
             "traces_sample_rate": self._config.traces_sample_rate,
-            "send_default_pii": self._config.send_default_pii,
+            # Enforced for privacy safety across all callers/configs.
+            "send_default_pii": False,
             "before_send": before_send,
         }
 
         # Merge in any additional kwargs
         sentry_kwargs.update(kwargs)
-        # Remove our internal kwargs
-        sentry_kwargs.pop("force", None)
 
         sentry_sdk.init(**sentry_kwargs)
 
@@ -314,12 +338,13 @@ def set_user(
         Note: Only sets anonymous user ID. Never set email, name, or other PII.
 
         Args:
-            user_id: Anonymous user identifier.
-            **kwargs: Additional user properties (id only recommended).
+            user_id: User identifier to hash before sending.
+            **kwargs: Ignored. Additional user fields are dropped.
         """
         if not self._enabled or not self._initialized:
             return
-        sentry_sdk.set_user({"id": user_id, **kwargs})
+        _ = kwargs
+        sentry_sdk.set_user({"id": anonymize_identifier(user_id)})
 
     def set_tag(self, key: str, value: str) -> None:
         """Set a custom tag for all subsequent events.
diff --git a/src/openadapt_telemetry/config.py b/src/openadapt_telemetry/config.py
index 6bbf4c1..8129f79 100644
--- a/src/openadapt_telemetry/config.py
+++ b/src/openadapt_telemetry/config.py
@@ -11,6 +11,8 @@
 
 import json
 import os
+import secrets
+import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Optional
@@ -27,11 +29,13 @@
     "performance_tracking": True,
     "feature_usage": True,
     "send_default_pii": False,
+    "anon_salt": None,
 }
 
 # Config file location
 CONFIG_DIR = Path.home() / ".config" / "openadapt"
 CONFIG_FILE = CONFIG_DIR / "telemetry.json"
+_INVALID_ANON_SALT_WARNED = False
 
 
 @dataclass
@@ -48,6 +52,7 @@ class TelemetryConfig:
     performance_tracking: bool = True
     feature_usage: bool = True
     send_default_pii: bool = False
+    anon_salt: Optional[str] = None
 
     _loaded: bool = field(default=False, repr=False)
 
@@ -73,7 +78,8 @@ def _load_config_file() -> dict[str, Any]:
 
     try:
         with open(CONFIG_FILE) as f:
-            return json.load(f)
+            data = json.load(f)
+            return data if isinstance(data, dict) else {}
     except (json.JSONDecodeError, OSError):
         return {}
 
@@ -82,15 +88,15 @@ def _get_env_config() -> dict[str, Any]:
     """Get configuration from environment variables."""
     config: dict[str, Any] = {}
 
-    # Universal opt-out (DO_NOT_TRACK standard)
-    if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
-        config["enabled"] = False
-
-    # Package-specific opt-out
+    # Package-specific toggle
     enabled_env = os.getenv("OPENADAPT_TELEMETRY_ENABLED", "")
     if enabled_env:
         config["enabled"] = _parse_bool(enabled_env)
 
+    # Universal opt-out (DO_NOT_TRACK standard) always wins.
+    if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
+        config["enabled"] = False
+
     # Internal/developer flags
     if os.getenv("OPENADAPT_INTERNAL", "").lower() in ("true", "1", "yes"):
         config["internal"] = True
@@ -122,9 +128,72 @@ def _get_env_config() -> dict[str, Any]:
         except ValueError:
             pass
 
+    # Optional override for deterministic anonymization in controlled environments.
+    anon_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
+    if anon_salt:
+        if _is_valid_anon_salt(anon_salt):
+            config["anon_salt"] = anon_salt.strip()
+        else:
+            _warn_invalid_anon_salt_once()
+
     return config
 
 
+def _is_valid_anon_salt(value: Any) -> bool:
+    """Check whether a salt value is valid for HMAC anonymization."""
+    return isinstance(value, str) and len(value.strip()) >= 32
+
+
+def _warn_invalid_anon_salt_once() -> None:
+    """Warn once per process when OPENADAPT_TELEMETRY_ANON_SALT is invalid."""
+    global _INVALID_ANON_SALT_WARNED
+    if _INVALID_ANON_SALT_WARNED:
+        return
+    warnings.warn(
+        "Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT; must be >= 32 chars.",
+        stacklevel=2,
+    )
+    _INVALID_ANON_SALT_WARNED = True
+
+
+def _generate_anon_salt() -> str:
+    """Generate a high-entropy random salt."""
+    return secrets.token_hex(32)
+
+
+def get_or_create_anon_salt() -> str:
+    """Get anonymization salt from env/config, creating one if missing.
+
+    Priority:
+    1. OPENADAPT_TELEMETRY_ANON_SALT (if valid)
+    2. telemetry config file `anon_salt` (if valid)
+    3. generated and persisted random salt
+    """
+    env_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
+    if env_salt:
+        if _is_valid_anon_salt(env_salt):
+            return env_salt.strip()
+        _warn_invalid_anon_salt_once()
+
+    config_data = _load_config_file()
+    file_salt = config_data.get("anon_salt")
+    if _is_valid_anon_salt(file_salt):
+        return str(file_salt).strip()
+
+    generated = _generate_anon_salt()
+    config_data["anon_salt"] = generated
+    try:
+        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+        with open(CONFIG_FILE, "w") as f:
+            json.dump(config_data, f, indent=2)
+    except OSError:
+        warnings.warn(
+            "Failed to persist telemetry anonymization salt; using ephemeral salt for this process.",
+            stacklevel=2,
+        )
+    return generated
+
+
 def load_config() -> TelemetryConfig:
     """Load telemetry configuration from all sources.
 
@@ -148,7 +217,7 @@ def load_config() -> TelemetryConfig:
     merged.update(env_config)
 
     # Remove None values for fields that should use defaults
-    config_dict = {k: v for k, v in merged.items() if v is not None or k == "dsn"}
+    config_dict = {k: v for k, v in merged.items() if v is not None or k in {"dsn", "anon_salt"}}
 
     return TelemetryConfig(**config_dict, _loaded=True)
 
@@ -172,6 +241,7 @@ def save_config(config: TelemetryConfig) -> None:
         "performance_tracking": config.performance_tracking,
         "feature_usage": config.feature_usage,
         "send_default_pii": config.send_default_pii,
+        "anon_salt": config.anon_salt,
     }
 
     with open(CONFIG_FILE, "w") as f:
diff --git a/src/openadapt_telemetry/privacy.py b/src/openadapt_telemetry/privacy.py
index 8117581..027d10b 100644
--- a/src/openadapt_telemetry/privacy.py
+++ b/src/openadapt_telemetry/privacy.py
@@ -9,9 +9,14 @@
 
 from __future__ import annotations
 
+import hashlib
+import hmac
 import re
+from functools import lru_cache
 from typing import Any, Dict, List, Optional, Set
 
+from .config import get_or_create_anon_salt
+
 # Sensitive field names that should have their values redacted
 PII_DENYLIST: Set[str] = {
     # Authentication
@@ -94,6 +99,97 @@
     re.compile(r"[A-Za-z0-9+/]{40,}={0,2}"),
 ]
 
+# Whitelisted tags that retain observability value and are safe to keep.
+ALLOWED_OBSERVABILITY_KEYS: Set[str] = {
+    "package",
+    "package_version",
+    "python_version",
+    "os",
+    "os_version",
+    "ci",
+    "internal",
+}
+
+ANON_VERSION = "v2"
+TAG_KEY_PATTERN = re.compile(r"^[a-zA-Z0-9._:-]{1,64}$")
+MAX_TAGS = 64
+MAX_TAG_VALUE_CHARS = 256
+
+
+@lru_cache(maxsize=1)
+def _get_anon_salt_cached() -> str:
+    """Memoize anon salt lookup to avoid repeated filesystem reads."""
+    return get_or_create_anon_salt()
+
+
+def _is_already_anonymized(value: str, prefix: str = "anon") -> bool:
+    """Return True only for canonical anonymous ID formats."""
+    escaped_prefix = re.escape(prefix)
+    patterns = (
+        rf"^{escaped_prefix}:v2:[0-9a-f]{{16}}$",
+        rf"^{escaped_prefix}:v1:[0-9a-f]{{16}}$",
+        rf"^{escaped_prefix}:[0-9a-f]{{16}}$",
+        rf"^{escaped_prefix}:v2:unknown$",
+    )
+    return any(re.match(pattern, value) for pattern in patterns)
+
+
+def _scrub_top_level_messages(event: Dict[str, Any]) -> None:
+    """Scrub top-level message fields that may contain free-form text."""
+    if isinstance(event.get("message"), str):
+        event["message"] = scrub_string(event["message"])
+    if isinstance(event.get("logentry"), dict):
+        logentry = event["logentry"]
+        if isinstance(logentry.get("message"), str):
+            logentry["message"] = scrub_string(logentry["message"])
+        if isinstance(logentry.get("formatted"), str):
+            logentry["formatted"] = scrub_string(logentry["formatted"])
+
+
+def _is_safe_tag_key(key: Any) -> bool:
+    """Return True when a tag key is safe to keep."""
+    if not isinstance(key, str):
+        return False
+    if not TAG_KEY_PATTERN.match(key):
+        return False
+    if is_sensitive_key(key):
+        return False
+    return True
+
+
+def _normalize_tag_value(value: Any) -> str:
+    """Convert a tag value to a scrubbed bounded string."""
+    scrubbed = scrub_string(str(value))
+    return scrubbed[:MAX_TAG_VALUE_CHARS]
+
+
+def _scrub_tags(tags: Dict[str, Any]) -> Dict[str, Any]:
+    """Keep safe tags, scrub values, and bound payload size."""
+    sanitized: Dict[str, Any] = {}
+    dropped_count = 0
+
+    # Preserve explicitly listed observability keys first so they survive capping.
+    for key in ALLOWED_OBSERVABILITY_KEYS:
+        if key in tags and _is_safe_tag_key(key):
+            sanitized[key] = _normalize_tag_value(tags[key])
+
+    for key, value in tags.items():
+        if key in sanitized:
+            continue
+        if len(sanitized) >= MAX_TAGS:
+            dropped_count += 1
+            continue
+        if not _is_safe_tag_key(key):
+            dropped_count += 1
+            continue
+        sanitized[key] = _normalize_tag_value(value)
+
+    # Keep hard cap strict, including metadata markers.
+    if dropped_count > 0 and len(sanitized) < MAX_TAGS:
+        sanitized["_dropped_tag_count"] = str(dropped_count)
+
+    return sanitized
+
 
 def sanitize_path(path: str) -> str:
     """Remove username from file paths.
@@ -157,6 +253,23 @@ def scrub_string(value: str) -> str:
     return value
 
 
+def anonymize_identifier(value: str, prefix: str = "anon") -> str:
+    """Deterministically anonymize an identifier using versioned HMAC-SHA256."""
+    normalized = str(value or "").strip()
+    if not normalized:
+        return f"{prefix}:{ANON_VERSION}:unknown"
+    if _is_already_anonymized(normalized, prefix=prefix):
+        return normalized
+
+    salt = _get_anon_salt_cached()
+    digest = hmac.new(
+        salt.encode("utf-8"),
+        normalized.encode("utf-8"),
+        hashlib.sha256,
+    ).hexdigest()[:16]
+    return f"{prefix}:{ANON_VERSION}:{digest}"
+
+
 def scrub_dict(
     data: Dict[str, Any],
     deep: bool = True,
@@ -181,6 +294,8 @@ def scrub_dict(
             result[key] = scrub_dict(value, deep=True, scrub_values=scrub_values)
         elif isinstance(value, list) and deep:
             result[key] = scrub_list(value, scrub_values=scrub_values)
+        elif isinstance(value, tuple) and deep:
+            result[key] = tuple(scrub_list(list(value), scrub_values=scrub_values))
         elif isinstance(value, str):
             if scrub_values:
                 result[key] = scrub_string(value)
@@ -209,6 +324,8 @@ def scrub_list(data: List[Any], scrub_values: bool = False) -> List[Any]:
             result.append(scrub_dict(item, deep=True, scrub_values=scrub_values))
         elif isinstance(item, list):
             result.append(scrub_list(item, scrub_values=scrub_values))
+        elif isinstance(item, tuple):
+            result.append(tuple(scrub_list(list(item), scrub_values=scrub_values)))
         elif isinstance(item, str) and scrub_values:
             result.append(scrub_string(item))
         else:
@@ -258,43 +375,63 @@ def create_before_send_filter():
 
     def before_send(event: Event, hint: Hint) -> Optional[Event]:
         """Filter and sanitize events before sending to Sentry/GlitchTip."""
-        # Scrub exception data
-        if "exception" in event:
-            scrub_exception_data(event["exception"])
-
-        # Scrub breadcrumbs
-        if "breadcrumbs" in event and "values" in event["breadcrumbs"]:
-            for breadcrumb in event["breadcrumbs"]["values"]:
-                if "message" in breadcrumb and isinstance(breadcrumb["message"], str):
-                    breadcrumb["message"] = scrub_string(breadcrumb["message"])
-                if "data" in breadcrumb and isinstance(breadcrumb["data"], dict):
-                    breadcrumb["data"] = scrub_dict(
-                        breadcrumb["data"], deep=True, scrub_values=True
-                    )
-
-        # Scrub extra data
-        if "extra" in event and isinstance(event["extra"], dict):
-            event["extra"] = scrub_dict(event["extra"], deep=True, scrub_values=True)
-
-        # Scrub contexts
-        if "contexts" in event and isinstance(event["contexts"], dict):
-            event["contexts"] = scrub_dict(event["contexts"], deep=True, scrub_values=False)
-
-        # Scrub tags
-        if "tags" in event and isinstance(event["tags"], dict):
-            event["tags"] = scrub_dict(event["tags"], deep=False, scrub_values=False)
-
-        # Scrub request data (if present)
-        if "request" in event:
-            request = event["request"]
-            if "headers" in request:
-                request["headers"] = scrub_dict(request["headers"], deep=False, scrub_values=False)
-            if "data" in request:
-                if isinstance(request["data"], dict):
-                    request["data"] = scrub_dict(request["data"], deep=True, scrub_values=True)
-                elif isinstance(request["data"], str):
-                    request["data"] = scrub_string(request["data"])
-
-        return event
+        try:
+            _scrub_top_level_messages(event)
+
+            # Scrub exception data
+            if "exception" in event:
+                scrub_exception_data(event["exception"])
+
+            # Scrub breadcrumbs
+            if "breadcrumbs" in event and "values" in event["breadcrumbs"]:
+                for breadcrumb in event["breadcrumbs"]["values"]:
+                    if "message" in breadcrumb and isinstance(breadcrumb["message"], str):
+                        breadcrumb["message"] = scrub_string(breadcrumb["message"])
+                    if "data" in breadcrumb and isinstance(breadcrumb["data"], dict):
+                        breadcrumb["data"] = scrub_dict(
+                            breadcrumb["data"], deep=True, scrub_values=True
+                        )
+
+            # Scrub extra data
+            if "extra" in event and isinstance(event["extra"], dict):
+                event["extra"] = scrub_dict(event["extra"], deep=True, scrub_values=True)
+
+            # Scrub contexts
+            if "contexts" in event and isinstance(event["contexts"], dict):
+                event["contexts"] = scrub_dict(event["contexts"], deep=True, scrub_values=True)
+
+            # Scrub tags (allowlist + scrub values)
+            if "tags" in event and isinstance(event["tags"], dict):
+                event["tags"] = _scrub_tags(event["tags"])
+
+            # Scrub request data (if present)
+            if "request" in event and isinstance(event["request"], dict):
+                request = event["request"]
+                if "headers" in request:
+                    if isinstance(request["headers"], dict):
+                        request["headers"] = scrub_dict(
+                            request["headers"], deep=False, scrub_values=True
+                        )
+                    elif isinstance(request["headers"], list):
+                        request["headers"] = scrub_list(request["headers"], scrub_values=True)
+                    elif isinstance(request["headers"], str):
+                        request["headers"] = scrub_string(request["headers"])
+                if "data" in request:
+                    if isinstance(request["data"], dict):
+                        request["data"] = scrub_dict(request["data"], deep=True, scrub_values=True)
+                    elif isinstance(request["data"], list):
+                        request["data"] = scrub_list(request["data"], scrub_values=True)
+                    elif isinstance(request["data"], str):
+                        request["data"] = scrub_string(request["data"])
+
+            # Keep only anonymous user IDs (drop all other user attributes).
+            if "user" in event and isinstance(event["user"], dict):
+                user_id = event["user"].get("id")
+                event["user"] = {"id": anonymize_identifier(str(user_id))} if user_id else {}
+
+            return event
+        except Exception:
+            # Fail closed on privacy filter errors.
+            return None
 
     return before_send
diff --git a/tests/test_client.py b/tests/test_client.py
index cebcdca..cc2606f 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -3,6 +3,8 @@
 import os
 from unittest.mock import patch
 
+import pytest
+
 from openadapt_telemetry.client import (
     TelemetryClient,
     get_telemetry,
@@ -96,6 +98,39 @@ def test_ci_indicates_internal(self):
         with patch.dict(os.environ, {"CI": "true"}, clear=False):
             assert is_internal_user() is True
 
+    @patch("openadapt_telemetry.client.Path.exists", return_value=False)
+    @patch("openadapt_telemetry.client.is_ci_environment", return_value=False)
+    def test_no_signals_indicates_external(self, _mock_ci, _mock_exists):
+        """Without explicit internal signals, usage should be treated as external."""
+        with patch.dict(os.environ, {"OPENADAPT_INTERNAL": "", "OPENADAPT_DEV": ""}, clear=False):
+            assert is_internal_user() is False
+
+    @patch("openadapt_telemetry.client.Path.exists", return_value=True)
+    @patch("openadapt_telemetry.client.is_ci_environment", return_value=False)
+    def test_git_repo_not_internal_by_default(self, _mock_ci, _mock_exists):
+        """Git repos should not be internal unless explicitly enabled."""
+        with patch.dict(
+            os.environ,
+            {
+                "OPENADAPT_INTERNAL": "",
+                "OPENADAPT_DEV": "",
+                "OPENADAPT_INTERNAL_FROM_GIT": "",
+            },
+            clear=False,
+        ):
+            assert is_internal_user() is False
+
+    @patch("openadapt_telemetry.client.Path.exists", return_value=True)
+    @patch("openadapt_telemetry.client.is_ci_environment", return_value=False)
+    def test_git_repo_internal_when_opted_in(self, _mock_ci, _mock_exists):
+        """Git repos should be internal when OPENADAPT_INTERNAL_FROM_GIT is enabled."""
+        with patch.dict(
+            os.environ,
+            {"OPENADAPT_INTERNAL_FROM_GIT": "true", "OPENADAPT_INTERNAL": "", "OPENADAPT_DEV": ""},
+            clear=False,
+        ):
+            assert is_internal_user() is True
+
 
 class TestTelemetryClient:
     """Tests for TelemetryClient class."""
@@ -181,6 +216,8 @@ def test_initialize_with_dsn(self, mock_sentry):
             assert result is True
             assert client.initialized is True
             mock_sentry.init.assert_called_once()
+            init_kwargs = mock_sentry.init.call_args.kwargs
+            assert init_kwargs["send_default_pii"] is False
 
     @patch("openadapt_telemetry.client.sentry_sdk")
     def test_capture_exception_when_enabled(self, mock_sentry):
@@ -231,6 +268,74 @@ def test_set_tag(self, mock_sentry):
 
             mock_sentry.set_tag.assert_called_with("test_key", "test_value")
 
+    @patch("openadapt_telemetry.client.sentry_sdk")
+    def test_set_user_hashes_identifier_and_drops_extra_fields(self, mock_sentry):
+        """set_user should only send an anonymized ID."""
+        with patch.dict(os.environ, {"DO_NOT_TRACK": ""}, clear=False):
+            TelemetryClient.reset_instance()
+            client = TelemetryClient.get_instance()
+            client.initialize(dsn="https://test@example.com/1")
+
+            client.set_user("user@example.com", email="user@example.com", name="User")
+
+            mock_sentry.set_user.assert_called_once()
+            payload = mock_sentry.set_user.call_args.args[0]
+            assert set(payload.keys()) == {"id"}
+            assert payload["id"].startswith("anon:v2:")
+            assert payload["id"] != "user@example.com"
+
+    @patch("openadapt_telemetry.client.sentry_sdk")
+    def test_custom_before_send_is_composed_after_privacy_filter(self, mock_sentry):
+        """Custom before_send should run before built-in scrubbing."""
+
+        def custom_before_send(event, hint):
+            event.setdefault("logentry", {})["message"] = "secret user@example.com"
+            event.setdefault("tags", {})["package"] = "openadapt-evals"
+            return event
+
+        with patch.dict(os.environ, {"DO_NOT_TRACK": ""}, clear=False):
+            TelemetryClient.reset_instance()
+            client = TelemetryClient.get_instance()
+            with pytest.warns(
+                UserWarning,
+                match="Custom before_send runs before OpenAdapt privacy filtering",
+            ):
+                client.initialize(
+                    dsn="https://test@example.com/1",
+                    before_send=custom_before_send,
+                )
+
+            before_send = mock_sentry.init.call_args.kwargs["before_send"]
+            event = {"user": {"id": "user@example.com", "email": "user@example.com"}}
+            output = before_send(event, hint={})
+            assert output is not None
+            assert output["user"]["id"].startswith("anon:v2:")
+            assert "email" not in output["user"]
+            assert output["logentry"]["message"] == "secret [REDACTED]"
+
+    @patch("openadapt_telemetry.client.sentry_sdk")
+    def test_send_default_pii_override_is_ignored(self, mock_sentry):
+        """send_default_pii should always be enforced to False."""
+        with patch.dict(os.environ, {"DO_NOT_TRACK": ""}, clear=False):
+            TelemetryClient.reset_instance()
+            client = TelemetryClient.get_instance()
+            with pytest.warns(UserWarning, match="Ignoring sentry init override for send_default_pii"):
+                client.initialize(
+                    dsn="https://test@example.com/1",
+                    send_default_pii=True,
+                )
+
+            assert mock_sentry.init.call_args.kwargs["send_default_pii"] is False
+
+    def test_initialize_rejects_non_callable_before_send(self):
+        """Non-callable before_send should raise TypeError."""
+        with patch.dict(os.environ, {"DO_NOT_TRACK": ""}, clear=False):
+            TelemetryClient.reset_instance()
+            client = TelemetryClient.get_instance()
+            with patch("openadapt_telemetry.client.sentry_sdk"):
+                with pytest.raises(TypeError, match="before_send must be callable"):
+                    client.initialize(dsn="https://test@example.com/1", before_send="invalid")
+
     @patch("openadapt_telemetry.client.sentry_sdk")
     def test_add_breadcrumb(self, mock_sentry):
         """add_breadcrumb should call sentry when enabled."""
diff --git a/tests/test_config.py b/tests/test_config.py
index 4809daa..9be445f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -3,6 +3,7 @@
 import json
 import os
 import tempfile
+import warnings
 from pathlib import Path
 from unittest.mock import patch
 
@@ -10,9 +11,12 @@
 
 from openadapt_telemetry.config import (
     TelemetryConfig,
+    _generate_anon_salt,
     _get_env_config,
+    _is_valid_anon_salt,
     _load_config_file,
     _parse_bool,
+    get_or_create_anon_salt,
     load_config,
     save_config,
 )
@@ -110,6 +114,16 @@ def test_explicit_disable(self):
             config = _get_env_config()
             assert config.get("enabled") is False
 
+    def test_do_not_track_overrides_explicit_enable(self):
+        """DO_NOT_TRACK should disable telemetry even if explicitly enabled."""
+        with patch.dict(
+            os.environ,
+            {"DO_NOT_TRACK": "1", "OPENADAPT_TELEMETRY_ENABLED": "true"},
+            clear=False,
+        ):
+            config = _get_env_config()
+            assert config.get("enabled") is False
+
     def test_internal_flag(self):
         """OPENADAPT_INTERNAL should set internal flag."""
         with patch.dict(os.environ, {"OPENADAPT_INTERNAL": "true"}, clear=False):
@@ -147,6 +161,19 @@ def test_invalid_sample_rate_ignored(self):
             config = _get_env_config()
             assert "sample_rate" not in config
 
+    def test_anon_salt_env_override(self):
+        """OPENADAPT_TELEMETRY_ANON_SALT should be loaded when set."""
+        with patch.dict(os.environ, {"OPENADAPT_TELEMETRY_ANON_SALT": "x" * 32}, clear=False):
+            config = _get_env_config()
+            assert config.get("anon_salt") == "x" * 32
+
+    def test_invalid_anon_salt_env_ignored(self):
+        """Invalid OPENADAPT_TELEMETRY_ANON_SALT should be ignored in env config."""
+        with patch.dict(os.environ, {"OPENADAPT_TELEMETRY_ANON_SALT": "short"}, clear=False):
+            with pytest.warns(UserWarning, match="Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT"):
+                config = _get_env_config()
+        assert "anon_salt" not in config
+
 
 class TestConfigFile:
     """Tests for configuration file loading."""
@@ -183,6 +210,18 @@ def test_load_invalid_json(self):
 
             os.unlink(f.name)
 
+    def test_load_non_dict_json_returns_empty(self):
+        """Loading valid JSON that is not an object should return empty dict."""
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            json.dump(["not", "a", "dict"], f)
+            f.flush()
+
+            with patch("openadapt_telemetry.config.CONFIG_FILE", Path(f.name)):
+                config = _load_config_file()
+                assert config == {}
+
+            os.unlink(f.name)
+
 
 class TestLoadConfig:
     """Tests for full configuration loading."""
@@ -239,3 +278,61 @@ def test_save_and_load(self):
                     assert loaded.enabled is False
                     assert loaded.environment == "test"
                     assert loaded.sample_rate == 0.5
+
+
+class TestAnonSalt:
+    """Tests for per-install anonymization salt handling."""
+
+    def test_validate_anon_salt(self):
+        assert _is_valid_anon_salt("x" * 32) is True
+        assert _is_valid_anon_salt("short") is False
+        assert _is_valid_anon_salt(None) is False
+
+    def test_generate_anon_salt(self):
+        salt = _generate_anon_salt()
+        assert isinstance(salt, str)
+        assert len(salt) >= 32
+
+    def test_get_or_create_anon_salt_from_env(self):
+        with patch.dict(os.environ, {"OPENADAPT_TELEMETRY_ANON_SALT": "y" * 32}, clear=False):
+            assert get_or_create_anon_salt() == "y" * 32
+
+    def test_get_or_create_anon_salt_persists_when_missing(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_dir = Path(tmpdir) / "openadapt"
+            config_file = config_dir / "telemetry.json"
+            with patch("openadapt_telemetry.config.CONFIG_DIR", config_dir):
+                with patch("openadapt_telemetry.config.CONFIG_FILE", config_file):
+                    with patch.dict(os.environ, {"OPENADAPT_TELEMETRY_ANON_SALT": ""}, clear=False):
+                        first = get_or_create_anon_salt()
+                        second = get_or_create_anon_salt()
+                        assert first == second
+                        with open(config_file) as f:
+                            data = json.load(f)
+                        assert data["anon_salt"] == first
+
+    def test_get_or_create_anon_salt_recovers_from_corrupt_file(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_dir = Path(tmpdir) / "openadapt"
+            config_dir.mkdir(parents=True, exist_ok=True)
+            config_file = config_dir / "telemetry.json"
+            config_file.write_text("{not-valid-json")
+            with patch("openadapt_telemetry.config.CONFIG_DIR", config_dir):
+                with patch("openadapt_telemetry.config.CONFIG_FILE", config_file):
+                    salt = get_or_create_anon_salt()
+                    assert isinstance(salt, str)
+                    assert len(salt) >= 32
+
+    def test_invalid_anon_salt_warns_once_across_config_paths(self):
+        with patch("openadapt_telemetry.config._INVALID_ANON_SALT_WARNED", False):
+            with patch.dict(os.environ, {"OPENADAPT_TELEMETRY_ANON_SALT": "short"}, clear=False):
+                with warnings.catch_warnings(record=True) as caught:
+                    warnings.simplefilter("always")
+                    _get_env_config()
+                    get_or_create_anon_salt()
+                matches = [
+                    warning
+                    for warning in caught
+                    if "Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT" in str(warning.message)
+                ]
+                assert len(matches) == 1
diff --git a/tests/test_privacy.py b/tests/test_privacy.py
index af37ae0..67f4f31 100644
--- a/tests/test_privacy.py
+++ b/tests/test_privacy.py
@@ -1,7 +1,12 @@
 """Tests for privacy filtering and PII scrubbing."""
 
+from unittest.mock import patch
 
 from openadapt_telemetry.privacy import (
+    ALLOWED_OBSERVABILITY_KEYS,
+    MAX_TAGS,
+    anonymize_identifier,
+    create_before_send_filter,
     is_sensitive_key,
     sanitize_path,
     scrub_dict,
@@ -273,3 +278,143 @@ def test_local_variables_scrubbing(self):
         frame = exception_data["values"][0]["stacktrace"]["frames"][0]
         assert frame["vars"]["password"] == "[REDACTED]"
         assert frame["vars"]["username"] == "john"
+
+
+class TestAnonymizeIdentifier:
+    """Tests for deterministic identifier anonymization."""
+
+    def test_stable_hash_with_same_salt(self):
+        with patch("openadapt_telemetry.privacy._get_anon_salt_cached", return_value="a" * 32):
+            first = anonymize_identifier("user@example.com")
+            second = anonymize_identifier("user@example.com")
+        assert first == second
+        assert first.startswith("anon:v2:")
+        assert first != "user@example.com"
+
+    def test_different_salts_produce_different_ids(self):
+        with patch("openadapt_telemetry.privacy._get_anon_salt_cached", return_value="a" * 32):
+            first = anonymize_identifier("user@example.com")
+        with patch("openadapt_telemetry.privacy._get_anon_salt_cached", return_value="b" * 32):
+            second = anonymize_identifier("user@example.com")
+        assert first != second
+
+    def test_empty_value(self):
+        assert anonymize_identifier("") == "anon:v2:unknown"
+
+    def test_already_anonymized_id_is_not_rehashed(self):
+        with patch("openadapt_telemetry.privacy._get_anon_salt_cached", return_value="a" * 32):
+            anon_id = anonymize_identifier("user@example.com")
+        assert anonymize_identifier(anon_id) == anon_id
+        assert anonymize_identifier("anon:v1:1234567890abcdef") == "anon:v1:1234567890abcdef"
+        assert anonymize_identifier("anon:1234567890abcdef") == "anon:1234567890abcdef"
+
+    def test_non_canonical_anon_prefix_is_rehashed(self):
+        with patch("openadapt_telemetry.privacy._get_anon_salt_cached", return_value="a" * 32):
+            value = anonymize_identifier("anon:user@example.com")
+        assert value.startswith("anon:v2:")
+        assert value != "anon:user@example.com"
+
+
+class TestBeforeSendUserScrubbing:
+    """Tests for user context anonymization in before_send."""
+
+    def test_only_anonymous_user_id_remains(self):
+        before_send = create_before_send_filter()
+        event = {
+            "user": {
+                "id": "user@example.com",
+                "email": "user@example.com",
+                "username": "alice",
+            },
+        }
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert "user" in sanitized
+        assert set(sanitized["user"].keys()) == {"id"}
+        assert sanitized["user"]["id"].startswith("anon:v2:")
+        assert sanitized["user"]["id"] != "user@example.com"
+
+
+class TestBeforeSendEventScrubbing:
+    """Tests for event-level before_send filtering behavior."""
+
+    def test_top_level_message_is_scrubbed(self):
+        before_send = create_before_send_filter()
+        event = {"message": "Failure for user@example.com"}
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert "user@example.com" not in sanitized["message"]
+
+    def test_logentry_message_is_scrubbed(self):
+        before_send = create_before_send_filter()
+        event = {"logentry": {"message": "token=Bearer abc123", "formatted": "email user@example.com"}}
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert "abc123" not in sanitized["logentry"]["message"]
+        assert "user@example.com" not in sanitized["logentry"]["formatted"]
+
+    def test_context_values_are_scrubbed(self):
+        before_send = create_before_send_filter()
+        event = {"contexts": {"runtime": {"note": "contact me at user@example.com"}}}
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert sanitized["contexts"]["runtime"]["note"] == "contact me at [REDACTED]"
+
+    def test_tags_are_allowlisted_and_scrubbed(self):
+        before_send = create_before_send_filter()
+        event = {
+            "tags": {
+                "package": "openadapt-evals",
+                "internal": "false",
+                "task_id": "abc123",
+                "email": "user@example.com",
+                "not valid": "drop-me",
+            },
+        }
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert set(ALLOWED_OBSERVABILITY_KEYS).intersection(set(sanitized["tags"].keys()))
+        assert sanitized["tags"]["task_id"] == "abc123"
+        assert "email" not in sanitized["tags"]
+        assert "not valid" not in sanitized["tags"]
+        assert sanitized["tags"]["_dropped_tag_count"] == "2"
+
+    def test_filter_fails_closed_on_unexpected_error(self):
+        before_send = create_before_send_filter()
+        with patch("openadapt_telemetry.privacy.scrub_exception_data", side_effect=RuntimeError("boom")):
+            event = {"exception": {"values": []}}
+            assert before_send(event, hint={}) is None
+
+    def test_request_non_dict_does_not_drop_event(self):
+        before_send = create_before_send_filter()
+        event = {"request": "unexpected-request-shape", "message": "ok"}
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert sanitized["request"] == "unexpected-request-shape"
+
+    def test_request_headers_tuple_list_is_scrubbed(self):
+        before_send = create_before_send_filter()
+        event = {
+            "request": {
+                "headers": [
+                    ("Authorization", "Bearer abc123"),
+                    ("X-User", "user@example.com"),
+                ],
+            },
+        }
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        assert sanitized["request"]["headers"][0][1] == "[REDACTED]"
+        assert sanitized["request"]["headers"][1][1] == "[REDACTED]"
+
+    def test_tag_cap_is_hard_limited(self):
+        before_send = create_before_send_filter()
+        many_tags = {f"k{i}": f"v{i}" for i in range(MAX_TAGS + 20)}
+        many_tags.update({"package": "openadapt-evals", "internal": "false"})
+        event = {"tags": many_tags}
+        sanitized = before_send(event, hint={})
+        assert sanitized is not None
+        tags = sanitized["tags"]
+        assert "package" in tags
+        assert "internal" in tags
+        assert len(tags) <= MAX_TAGS