Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Unified telemetry and error tracking for OpenAdapt packages.
- **Usage Counters (PostHog)**: Lightweight product usage events for adoption metrics
- **Privacy-First Design**: Automatic PII scrubbing and path sanitization
- **Configurable Opt-Out**: Respects `DO_NOT_TRACK` and custom environment variables
- **CI/Dev Mode Detection**: Automatically tags internal usage for filtering
- **Internal Usage Tagging**: Explicit flags + CI detection with optional git heuristic
- **GlitchTip/Sentry Compatible**: Uses the Sentry SDK for maximum compatibility

## Installation
Expand Down Expand Up @@ -111,6 +111,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
| `OPENADAPT_TELEMETRY_ENABLED` | `true` | Enable/disable telemetry |
| `OPENADAPT_INTERNAL` | `false` | Tag as internal usage |
| `OPENADAPT_DEV` | `false` | Development mode |
| `OPENADAPT_INTERNAL_FROM_GIT` | `false` | Optional: tag as internal when running from a git checkout |
| `OPENADAPT_TELEMETRY_DSN` | - | GlitchTip/Sentry DSN |
| `OPENADAPT_POSTHOG_PROJECT_API_KEY` | embedded default | PostHog ingestion project token (`phc_...`) |
| `OPENADAPT_POSTHOG_HOST` | `https://us.i.posthog.com` | PostHog ingestion host |
Expand All @@ -120,6 +121,7 @@ with TelemetrySpan("indexing", "build_faiss_index") as span:
| `OPENADAPT_TELEMETRY_ENVIRONMENT` | `production` | Environment name |
| `OPENADAPT_TELEMETRY_SAMPLE_RATE` | `1.0` | Error sampling rate (0.0-1.0) |
| `OPENADAPT_TELEMETRY_TRACES_SAMPLE_RATE` | `0.01` | Performance sampling rate |
| `OPENADAPT_TELEMETRY_ANON_SALT` | generated | Optional anonymization salt override (advanced use only) |

### Configuration File

Expand Down Expand Up @@ -178,16 +180,19 @@ export OPENADAPT_TELEMETRY_ENABLED=false
- File paths have usernames replaced with `<user>`
- Sensitive fields (password, token, api_key, etc.) are redacted
- Email addresses and phone numbers are scrubbed from messages
- Top-level event messages/logentry strings are scrubbed
- Tag keys are validated, sensitive/invalid keys are dropped, and values are scrubbed before upload
- User IDs are HMAC-anonymized before upload (`anon:v2:<hash>`)
- `send_default_pii` is enforced to `false` by the client

## Internal Usage Tagging

Internal/developer usage is automatically detected via:

1. `OPENADAPT_INTERNAL=true` environment variable
2. `OPENADAPT_DEV=true` environment variable
3. Running from source (not frozen executable)
4. Git repository present in working directory
5. CI environment detected (GitHub Actions, GitLab CI, etc.)
3. CI environment detected (GitHub Actions, GitLab CI, etc.)
4. Optional git repository heuristic when `OPENADAPT_INTERNAL_FROM_GIT=true`

Filter in GlitchTip:
```
Expand Down
99 changes: 62 additions & 37 deletions src/openadapt_telemetry/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@
import os
import platform
import sys
import warnings
from pathlib import Path
from typing import Any, Dict, Optional
from typing import Any, Callable, Dict, Optional

import sentry_sdk
from sentry_sdk.types import Event, Hint

from .config import TelemetryConfig, load_config
from .privacy import create_before_send_filter
from .privacy import anonymize_identifier, create_before_send_filter

BeforeSendFn = Callable[[Event, Hint], Optional[Event]]


def is_running_from_executable() -> bool:
Expand Down Expand Up @@ -56,12 +60,11 @@ def is_ci_environment() -> bool:
def is_internal_user() -> bool:
"""Determine if current usage is from internal team.

Uses multiple heuristics to detect internal/developer usage:
Uses multiple signals to detect internal/developer usage:
1. Explicit OPENADAPT_INTERNAL environment variable
2. OPENADAPT_DEV environment variable
3. Not running from frozen executable
4. Git repository present in current directory
5. CI environment detected
3. CI environment detected
4. Optional git repository heuristic when OPENADAPT_INTERNAL_FROM_GIT=true

Returns:
True if this appears to be internal usage.
Expand All @@ -74,21 +77,30 @@ def is_internal_user() -> bool:
if os.getenv("OPENADAPT_DEV", "").lower() in ("true", "1", "yes"):
return True

# Method 3: Not running from executable (indicates dev mode)
if not is_running_from_executable():
return True

# Method 4: Git repository present (development checkout)
if Path(".git").exists() or Path("../.git").exists():
return True

# Method 5: CI/CD environment
# Method 3: CI/CD environment
if is_ci_environment():
return True

# Method 4: optional git heuristic
if os.getenv("OPENADAPT_INTERNAL_FROM_GIT", "").lower() in ("true", "1", "yes"):
if Path(".git").exists() or Path("../.git").exists():
return True

return False


def _compose_before_send(base: BeforeSendFn, extra: BeforeSendFn) -> BeforeSendFn:
"""Compose custom before_send before final privacy filtering."""

def composed(event: Event, hint: Hint) -> Optional[Event]:
modified = extra(event, hint)
if modified is None:
return None
return base(modified, hint)

return composed


class TelemetryClient:
"""Unified telemetry client for all OpenAdapt packages.

Expand Down Expand Up @@ -128,20 +140,13 @@ def reset_instance(cls) -> None:
def _check_enabled(self) -> bool:
"""Check if telemetry should be enabled.

Checks environment variables for opt-out signals.
Uses merged config with defaults/env/file precedence.

Returns:
True if telemetry should be enabled.
"""
# Universal opt-out (DO_NOT_TRACK standard)
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
return False

# Package-specific opt-out
if os.getenv("OPENADAPT_TELEMETRY_ENABLED", "").lower() in ("false", "0", "no"):
return False

return True
self._config = load_config()
return bool(self._config.enabled)

@property
def enabled(self) -> bool:
Expand Down Expand Up @@ -187,10 +192,8 @@ def initialize(
Returns:
True if initialization succeeded, False if disabled or already initialized.
"""
if not self._enabled:
return False

if self._initialized and not kwargs.get("force", False):
force = bool(kwargs.pop("force", False))
if self._initialized and not force:
return True

# Load configuration
Expand All @@ -201,28 +204,49 @@ def initialize(
self._config.dsn = dsn
if environment:
self._config.environment = environment
self._enabled = bool(self._config.enabled)

if not self._enabled:
return False

# Skip if no DSN configured
if not self._config.dsn:
return False

# Create privacy filter
before_send = create_before_send_filter()
# Always enforce privacy scrubber first; optional custom filter can run afterward.
base_before_send = create_before_send_filter()
custom_before_send = kwargs.pop("before_send", None)
if custom_before_send is not None:
if not callable(custom_before_send):
raise TypeError("before_send must be callable")
warnings.warn(
"Custom before_send runs before OpenAdapt privacy filtering; final payload is always scrubbed.",
stacklevel=2,
)
before_send = _compose_before_send(base_before_send, custom_before_send)
else:
before_send = base_before_send

if "send_default_pii" in kwargs:
kwargs.pop("send_default_pii")
warnings.warn(
"Ignoring sentry init override for send_default_pii; OpenAdapt telemetry enforces send_default_pii=False.",
stacklevel=2,
)

# Initialize Sentry SDK
sentry_kwargs = {
"dsn": self._config.dsn,
"environment": self._config.environment,
"sample_rate": self._config.sample_rate,
"traces_sample_rate": self._config.traces_sample_rate,
"send_default_pii": self._config.send_default_pii,
# Enforced for privacy safety across all callers/configs.
"send_default_pii": False,
"before_send": before_send,
}

# Merge in any additional kwargs
sentry_kwargs.update(kwargs)
# Remove our internal kwargs
sentry_kwargs.pop("force", None)

sentry_sdk.init(**sentry_kwargs)

Expand Down Expand Up @@ -314,12 +338,13 @@ def set_user(
Note: Only sets anonymous user ID. Never set email, name, or other PII.

Args:
user_id: Anonymous user identifier.
**kwargs: Additional user properties (id only recommended).
user_id: User identifier to hash before sending.
**kwargs: Ignored. Additional user fields are dropped.
"""
if not self._enabled or not self._initialized:
return
sentry_sdk.set_user({"id": user_id, **kwargs})
_ = kwargs
sentry_sdk.set_user({"id": anonymize_identifier(user_id)})

def set_tag(self, key: str, value: str) -> None:
"""Set a custom tag for all subsequent events.
Expand Down
84 changes: 77 additions & 7 deletions src/openadapt_telemetry/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import json
import os
import secrets
import warnings
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
Expand All @@ -27,11 +29,13 @@
"performance_tracking": True,
"feature_usage": True,
"send_default_pii": False,
"anon_salt": None,
}

# Config file location
CONFIG_DIR = Path.home() / ".config" / "openadapt"
CONFIG_FILE = CONFIG_DIR / "telemetry.json"
_INVALID_ANON_SALT_WARNED = False


@dataclass
Expand All @@ -48,6 +52,7 @@ class TelemetryConfig:
performance_tracking: bool = True
feature_usage: bool = True
send_default_pii: bool = False
anon_salt: Optional[str] = None

_loaded: bool = field(default=False, repr=False)

Expand All @@ -73,7 +78,8 @@ def _load_config_file() -> dict[str, Any]:

try:
with open(CONFIG_FILE) as f:
return json.load(f)
data = json.load(f)
return data if isinstance(data, dict) else {}
except (json.JSONDecodeError, OSError):
return {}

Expand All @@ -82,15 +88,15 @@ def _get_env_config() -> dict[str, Any]:
"""Get configuration from environment variables."""
config: dict[str, Any] = {}

# Universal opt-out (DO_NOT_TRACK standard)
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
config["enabled"] = False

# Package-specific opt-out
# Package-specific toggle
enabled_env = os.getenv("OPENADAPT_TELEMETRY_ENABLED", "")
if enabled_env:
config["enabled"] = _parse_bool(enabled_env)

# Universal opt-out (DO_NOT_TRACK standard) always wins.
if os.getenv("DO_NOT_TRACK", "").lower() in ("1", "true"):
config["enabled"] = False

# Internal/developer flags
if os.getenv("OPENADAPT_INTERNAL", "").lower() in ("true", "1", "yes"):
config["internal"] = True
Expand Down Expand Up @@ -122,9 +128,72 @@ def _get_env_config() -> dict[str, Any]:
except ValueError:
pass

# Optional override for deterministic anonymization in controlled environments.
anon_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
if anon_salt:
if _is_valid_anon_salt(anon_salt):
config["anon_salt"] = anon_salt.strip()
else:
_warn_invalid_anon_salt_once()

return config


def _is_valid_anon_salt(value: Any) -> bool:
"""Check whether a salt value is valid for HMAC anonymization."""
return isinstance(value, str) and len(value.strip()) >= 32


def _warn_invalid_anon_salt_once() -> None:
"""Warn once per process when OPENADAPT_TELEMETRY_ANON_SALT is invalid."""
global _INVALID_ANON_SALT_WARNED
if _INVALID_ANON_SALT_WARNED:
return
warnings.warn(
"Ignoring invalid OPENADAPT_TELEMETRY_ANON_SALT; must be >= 32 chars.",
stacklevel=2,
)
_INVALID_ANON_SALT_WARNED = True


def _generate_anon_salt() -> str:
"""Generate a high-entropy random salt."""
return secrets.token_hex(32)


def get_or_create_anon_salt() -> str:
"""Get anonymization salt from env/config, creating one if missing.

Priority:
1. OPENADAPT_TELEMETRY_ANON_SALT (if valid)
2. telemetry config file `anon_salt` (if valid)
3. generated and persisted random salt
"""
env_salt = os.getenv("OPENADAPT_TELEMETRY_ANON_SALT")
if env_salt:
if _is_valid_anon_salt(env_salt):
return env_salt.strip()
_warn_invalid_anon_salt_once()

config_data = _load_config_file()
file_salt = config_data.get("anon_salt")
if _is_valid_anon_salt(file_salt):
return str(file_salt).strip()

generated = _generate_anon_salt()
config_data["anon_salt"] = generated
try:
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
with open(CONFIG_FILE, "w") as f:
json.dump(config_data, f, indent=2)
except OSError:
warnings.warn(
"Failed to persist telemetry anonymization salt; using ephemeral salt for this process.",
stacklevel=2,
)
return generated


def load_config() -> TelemetryConfig:
"""Load telemetry configuration from all sources.

Expand All @@ -148,7 +217,7 @@ def load_config() -> TelemetryConfig:
merged.update(env_config)

# Remove None values for fields that should use defaults
config_dict = {k: v for k, v in merged.items() if v is not None or k == "dsn"}
config_dict = {k: v for k, v in merged.items() if v is not None or k in {"dsn", "anon_salt"}}

return TelemetryConfig(**config_dict, _loaded=True)

Expand All @@ -172,6 +241,7 @@ def save_config(config: TelemetryConfig) -> None:
"performance_tracking": config.performance_tracking,
"feature_usage": config.feature_usage,
"send_default_pii": config.send_default_pii,
"anon_salt": config.anon_salt,
}

with open(CONFIG_FILE, "w") as f:
Expand Down
Loading
Loading