Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions Dashboard/Analysis/SqlServerAnomalyDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,250 @@ public async Task<List<Fact>> DetectAnomaliesAsync(AnalysisContext context)
await DetectSessionAnomalies(context, anomalies);
await DetectQueryDurationAnomalies(context, anomalies);
await DetectMemoryAnomalies(context, anomalies);
await DetectObjectStatsAnomalies(context, anomalies);

return anomalies;
}

/// <summary>
/// Day-over-day object/index detection (delta-based, not stddev-baseline) since the
/// index_object_stats collector runs daily and its counters are cumulative. Emits
/// ANOMALY_OBJECT_GROWTH for the biggest table grower over threshold and
/// ANOMALY_OBJECT_CONTENTION for the index with the largest new lock-wait time.
/// </summary>
private const decimal ObjectGrowthMbThreshold = 100m;
private const double ObjectGrowthPctThreshold = 20.0;
private const long ObjectLockWaitMsDeltaThreshold = 60000;

private async Task DetectObjectStatsAnomalies(AnalysisContext context, List<Fact> anomalies)
{
try
{
using var connection = new SqlConnection(_connectionString);
await connection.OpenAsync();

// Growth: biggest day-over-day table grower (indexes rolled up) over threshold.
using (var cmd = connection.CreateCommand())
{
cmd.CommandText = @"
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;

WITH
snaps AS
(
SELECT TOP (2)
collection_time
FROM
(
SELECT DISTINCT
collection_time
FROM collect.index_object_stats
) AS d
ORDER BY
collection_time DESC
),
boundaries AS
(
SELECT
latest_time = MAX(collection_time),
prior_time = MIN(collection_time)
FROM snaps
),
cur AS
(
SELECT
database_name,
object_id,
schema_name = MAX(schema_name),
table_name = MAX(table_name),
mb = SUM(reserved_mb)
FROM collect.index_object_stats
WHERE collection_time =
(
SELECT b.latest_time
FROM boundaries AS b
)
GROUP BY
database_name,
object_id
),
prv AS
(
SELECT
database_name,
object_id,
mb = SUM(reserved_mb)
FROM collect.index_object_stats
WHERE collection_time =
(
SELECT b.prior_time
FROM boundaries AS b
)
GROUP BY
database_name,
object_id
)
SELECT TOP (1)
cur.database_name,
cur.schema_name,
cur.table_name,
prior_mb = prv.mb,
current_mb = cur.mb,
growth_mb = cur.mb - prv.mb,
growth_pct =
CASE
WHEN prv.mb > 0
THEN (cur.mb - prv.mb) * 100.0 / prv.mb
ELSE 0
END
FROM cur
JOIN prv
ON prv.database_name = cur.database_name
AND prv.object_id = cur.object_id
CROSS JOIN boundaries AS b
WHERE b.latest_time <> b.prior_time
AND cur.mb - prv.mb >= @growthMb
AND CASE WHEN prv.mb > 0 THEN (cur.mb - prv.mb) * 100.0 / prv.mb ELSE 0 END >= @growthPct
ORDER BY
cur.mb - prv.mb DESC
OPTION(MAXDOP 1, RECOMPILE);";
cmd.Parameters.Add(new SqlParameter("@growthMb", ObjectGrowthMbThreshold));
cmd.Parameters.Add(new SqlParameter("@growthPct", ObjectGrowthPctThreshold));

using var reader = await cmd.ExecuteReaderAsync();
if (await reader.ReadAsync())
{
var db = reader.GetString(0);
var growthMb = Convert.ToDouble(reader.GetValue(5));
var growthPct = Convert.ToDouble(reader.GetValue(6));
anomalies.Add(new Fact
{
Source = "anomaly",
Key = "ANOMALY_OBJECT_GROWTH",
Value = growthMb,
ServerId = context.ServerId,
DatabaseName = db,
Metadata = new Dictionary<string, double>
{
["prior_mb"] = Convert.ToDouble(reader.GetValue(3)),
["current_mb"] = Convert.ToDouble(reader.GetValue(4)),
["growth_mb"] = growthMb,
["growth_pct"] = growthPct,
["growth_ratio"] = growthPct / ObjectGrowthPctThreshold
}
});
}
}

// Contention: index with the largest new row-lock wait time (no reset).
using (var cmd = connection.CreateCommand())
{
cmd.CommandText = @"
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;

WITH
snaps AS
(
SELECT TOP (2)
collection_time
FROM
(
SELECT DISTINCT
collection_time
FROM collect.index_object_stats
) AS d
ORDER BY
collection_time DESC
),
boundaries AS
(
SELECT
latest_time = MAX(collection_time),
prior_time = MIN(collection_time)
FROM snaps
),
cur AS
(
SELECT
database_name,
object_id,
index_id,
schema_name,
table_name,
index_name,
ms = ISNULL(row_lock_wait_in_ms, 0),
esc = ISNULL(index_lock_promotion_count, 0)
FROM collect.index_object_stats
WHERE collection_time =
(
SELECT b.latest_time
FROM boundaries AS b
)
),
prv AS
(
SELECT
database_name,
object_id,
index_id,
ms = ISNULL(row_lock_wait_in_ms, 0),
esc = ISNULL(index_lock_promotion_count, 0)
FROM collect.index_object_stats
WHERE collection_time =
(
SELECT b.prior_time
FROM boundaries AS b
)
)
SELECT TOP (1)
cur.database_name,
cur.schema_name,
cur.table_name,
cur.index_name,
ms_delta = cur.ms - prv.ms,
esc_delta = cur.esc - prv.esc
FROM cur
JOIN prv
ON prv.database_name = cur.database_name
AND prv.object_id = cur.object_id
AND prv.index_id = cur.index_id
CROSS JOIN boundaries AS b
WHERE b.latest_time <> b.prior_time
AND cur.ms >= prv.ms
AND cur.ms - prv.ms >= @msDelta
ORDER BY
cur.ms - prv.ms DESC
OPTION(MAXDOP 1, RECOMPILE);";
cmd.Parameters.Add(new SqlParameter("@msDelta", ObjectLockWaitMsDeltaThreshold));

using var reader = await cmd.ExecuteReaderAsync();
if (await reader.ReadAsync())
{
var db = reader.GetString(0);
var msDelta = Convert.ToDouble(reader.GetValue(4));
anomalies.Add(new Fact
{
Source = "anomaly",
Key = "ANOMALY_OBJECT_CONTENTION",
Value = msDelta,
ServerId = context.ServerId,
DatabaseName = db,
Metadata = new Dictionary<string, double>
{
["lock_wait_ms_delta"] = msDelta,
["escalation_delta"] = Convert.ToDouble(reader.GetValue(5)),
["contention_ratio"] = msDelta / ObjectLockWaitMsDeltaThreshold
}
});
}
}
}
catch (Exception ex)
{
Logger.Error($"[SqlServerAnomalyDetector] Object stats anomaly detection failed: {ex.Message}");
}
}

/// <summary>
/// Checks if the server has enough historical data for meaningful baselines.
/// Uses wait_stats and cpu_utilization_stats as canary.
Expand Down
Loading
Loading