Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/skills/code-standards/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ description: NetAlertX coding standards and conventions. Use this when writing c

# Code Standards

- ask me to review before going to each next step (mention n step out of x)
- before starting, prepare implementation plan
- ask me to review it and ask any clarifying questions first
- add test creation as last step - follow repo architecture patterns - do not place in the root of /test
- code has to be maintainable, no duplicate code
- follow DRY principle
- code files should be less than 500 LOC for better maintainability

## File Length

Keep code files under 500 lines. Split larger files into modules.
Expand Down
29 changes: 29 additions & 0 deletions server/api_server/api_server_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .dbquery_endpoint import read_query, write_query, update_query, delete_query # noqa: E402 [flake8 lint suppression]
from .sync_endpoint import handle_sync_post, handle_sync_get # noqa: E402 [flake8 lint suppression]
from .logs_endpoint import clean_log # noqa: E402 [flake8 lint suppression]
from .health_endpoint import get_health_status # noqa: E402 [flake8 lint suppression]
from models.user_events_queue_instance import UserEventsQueueInstance # noqa: E402 [flake8 lint suppression]

from models.event_instance import EventInstance # noqa: E402 [flake8 lint suppression]
Expand Down Expand Up @@ -86,6 +87,7 @@
RecentEventsResponse, LastEventsResponse,
NetworkTopologyResponse,
InternetInfoResponse, NetworkInterfacesResponse,
HealthCheckResponse,
CreateEventRequest, CreateSessionRequest,
DeleteSessionRequest, CreateNotificationRequest,
SyncPushRequest, SyncPullResponse,
Expand Down Expand Up @@ -1930,6 +1932,33 @@ def check_auth(payload=None):
if request.method == "GET":
return jsonify({"success": True, "message": "Authentication check successful"}), 200


# --------------------------
# Health endpoint
# --------------------------
@app.route("/health", methods=["GET"])
@validate_request(
operation_id="check_health",
summary="System Health Check",
description="Retrieve system vitality metrics including database size, memory pressure, system load, disk usage, and CPU temperature.",
response_model=HealthCheckResponse,
tags=["system", "health"],
auth_callable=is_authorized
)
def check_health(payload=None):
"""Get system health metrics for monitoring and diagnostics."""
try:
health_data = get_health_status()
return jsonify({"success": True, **health_data}), 200
except Exception as e:
mylog("none", [f"[health] Error retrieving health status: {e}"])
return jsonify({
"success": False,
"error": "Failed to retrieve health status",
"message": "Internal server error"
}), 500


# --------------------------
# Background Server Start
# --------------------------
Expand Down
137 changes: 137 additions & 0 deletions server/api_server/health_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Health check endpoint for NetAlertX system vitality monitoring."""

import os
import psutil
from pathlib import Path

from const import dbPath, dataPath
from logger import mylog


# ===============================================================================
# Database Vitality
# ===============================================================================

def get_db_size_mb():
"""
Calculate total database size in MB (app.db + app.db-wal).

Returns:
float: Size in MB, or 0 if database files don't exist.
"""
try:
db_file = Path(dbPath)
wal_file = Path(f"{dbPath}-wal")

size_bytes = 0
if db_file.exists():
size_bytes += db_file.stat().st_size
if wal_file.exists():
size_bytes += wal_file.stat().st_size

return round(size_bytes / (1024 * 1024), 2)
except Exception as e:
mylog("verbose", [f"[health] Error calculating DB size: {e}"])
return 0.0


# ===============================================================================
# Memory Pressure
# ===============================================================================

def get_mem_usage_pct():
"""
Calculate memory usage percentage (used / total * 100).

Returns:
int: Memory usage as integer percentage (0-100), or None on error.
"""
try:
vm = psutil.virtual_memory()
pct = int((vm.used / vm.total) * 100)
return max(0, min(100, pct)) # Clamp to 0-100
except Exception as e:
mylog("verbose", [f"[health] Error calculating memory usage: {e}"])
return None

def get_load_avg_1m():
"""
Get 1-minute load average.

Returns:
float: 1-minute load average, or -1 on error.
"""
try:
load_1m, _, _ = os.getloadavg()
return round(load_1m, 2)
except Exception as e:
mylog("verbose", [f"[health] Error getting load average: {e}"])
return -1.0


# ===============================================================================
# Disk Headroom
# ===============================================================================

def get_storage_pct():
"""
Calculate disk usage percentage of /data mount.

Returns:
int: Disk usage as integer percentage (0-100), or None on error.
"""
try:
stat = os.statvfs(dataPath)
total = stat.f_blocks * stat.f_frsize
used = (stat.f_blocks - stat.f_bfree) * stat.f_frsize
pct = int((used / total) * 100) if total > 0 else 0
return max(0, min(100, pct)) # Clamp to 0-100
except Exception as e:
mylog("verbose", [f"[health] Error calculating storage usage: {e}"])
return None

def get_cpu_temp():
"""
Get CPU temperature from hardware sensors if available.

Returns:
int: CPU temperature in Celsius, or None if unavailable.
"""
try:
temps = psutil.sensors_temperatures()
if not temps:
return None

# Prefer 'coretemp' (Intel), fallback to first available
if "coretemp" in temps and temps["coretemp"]:
return int(temps["coretemp"][0].current)

# Fallback to first sensor with data
for sensor_type, readings in temps.items():
if readings:
return int(readings[0].current)

return None
except Exception as e:
mylog("verbose", [f"[health] Error reading CPU temperature: {e}"])
return None


# ===============================================================================
# Aggregator
# ===============================================================================

def get_health_status():
"""
Collect all health metrics into a single dict.

Returns:
dict: Dictionary with all health metrics.
"""
return {
"db_size_mb": get_db_size_mb(),
"mem_usage_pct": get_mem_usage_pct(),
"load_1m": get_load_avg_1m(),
"storage_pct": get_storage_pct(),
"cpu_temp": get_cpu_temp(),
}
28 changes: 28 additions & 0 deletions server/api_server/openapi/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,34 @@ class NetworkInterfacesResponse(BaseResponse):
interfaces: Dict[str, Any] = Field(..., description="Details about network interfaces.")


# =============================================================================
# HEALTH CHECK SCHEMAS
# =============================================================================


class HealthCheckResponse(BaseResponse):
"""System health check with vitality metrics."""
model_config = ConfigDict(
extra="allow",
json_schema_extra={
"examples": [{
"success": True,
"db_size_mb": 125.45,
"mem_usage_pct": 65,
"load_1m": 2.15,
"storage_pct": 42,
"cpu_temp": 58
}]
}
)

db_size_mb: float = Field(..., description="Database size in MB (app.db + app.db-wal)")
mem_usage_pct: Optional[int] = Field(None, ge=0, le=100, description="Memory usage percentage (0-100, nullable if unavailable)")
load_1m: float = Field(..., description="1-minute load average")
storage_pct: Optional[int] = Field(None, ge=0, le=100, description="Disk usage percentage of /data mount (0-100, nullable if unavailable)")
cpu_temp: Optional[int] = Field(None, description="CPU temperature in Celsius (nullable if unavailable)")


# =============================================================================
# EVENTS SCHEMAS
# =============================================================================
Expand Down
Loading