diff --git a/Dashboard/Analysis/SqlServerAnomalyDetector.cs b/Dashboard/Analysis/SqlServerAnomalyDetector.cs index 02a303d5..a95ebfdc 100644 --- a/Dashboard/Analysis/SqlServerAnomalyDetector.cs +++ b/Dashboard/Analysis/SqlServerAnomalyDetector.cs @@ -101,10 +101,250 @@ public async Task> DetectAnomaliesAsync(AnalysisContext context) await DetectSessionAnomalies(context, anomalies); await DetectQueryDurationAnomalies(context, anomalies); await DetectMemoryAnomalies(context, anomalies); + await DetectObjectStatsAnomalies(context, anomalies); return anomalies; } + /// + /// Day-over-day object/index detection (delta-based, not stddev-baseline) since the + /// index_object_stats collector runs daily and its counters are cumulative. Emits + /// ANOMALY_OBJECT_GROWTH for the biggest table grower over threshold and + /// ANOMALY_OBJECT_CONTENTION for the index with the largest new lock-wait time. + /// + private const decimal ObjectGrowthMbThreshold = 100m; + private const double ObjectGrowthPctThreshold = 20.0; + private const long ObjectLockWaitMsDeltaThreshold = 60000; + + private async Task DetectObjectStatsAnomalies(AnalysisContext context, List anomalies) + { + try + { + using var connection = new SqlConnection(_connectionString); + await connection.OpenAsync(); + + // Growth: biggest day-over-day table grower (indexes rolled up) over threshold. + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +WITH + snaps AS + ( + SELECT TOP (2) + collection_time + FROM + ( + SELECT DISTINCT + collection_time + FROM collect.index_object_stats + ) AS d + ORDER BY + collection_time DESC + ), + boundaries AS + ( + SELECT + latest_time = MAX(collection_time), + prior_time = MIN(collection_time) + FROM snaps + ), + cur AS + ( + SELECT + database_name, + object_id, + schema_name = MAX(schema_name), + table_name = MAX(table_name), + mb = SUM(reserved_mb) + FROM collect.index_object_stats + WHERE collection_time = + ( + SELECT b.latest_time + FROM boundaries AS b + ) + GROUP BY + database_name, + object_id + ), + prv AS + ( + SELECT + database_name, + object_id, + mb = SUM(reserved_mb) + FROM collect.index_object_stats + WHERE collection_time = + ( + SELECT b.prior_time + FROM boundaries AS b + ) + GROUP BY + database_name, + object_id + ) +SELECT TOP (1) + cur.database_name, + cur.schema_name, + cur.table_name, + prior_mb = prv.mb, + current_mb = cur.mb, + growth_mb = cur.mb - prv.mb, + growth_pct = + CASE + WHEN prv.mb > 0 + THEN (cur.mb - prv.mb) * 100.0 / prv.mb + ELSE 0 + END +FROM cur +JOIN prv + ON prv.database_name = cur.database_name + AND prv.object_id = cur.object_id +CROSS JOIN boundaries AS b +WHERE b.latest_time <> b.prior_time +AND cur.mb - prv.mb >= @growthMb +AND CASE WHEN prv.mb > 0 THEN (cur.mb - prv.mb) * 100.0 / prv.mb ELSE 0 END >= @growthPct +ORDER BY + cur.mb - prv.mb DESC +OPTION(MAXDOP 1, RECOMPILE);"; + cmd.Parameters.Add(new SqlParameter("@growthMb", ObjectGrowthMbThreshold)); + cmd.Parameters.Add(new SqlParameter("@growthPct", ObjectGrowthPctThreshold)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (await reader.ReadAsync()) + { + var db = reader.GetString(0); + var growthMb = Convert.ToDouble(reader.GetValue(5)); + var growthPct = Convert.ToDouble(reader.GetValue(6)); + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_OBJECT_GROWTH", + Value = growthMb, + ServerId = context.ServerId, + DatabaseName = db, + Metadata = new Dictionary + { + ["prior_mb"] = Convert.ToDouble(reader.GetValue(3)), + ["current_mb"] = Convert.ToDouble(reader.GetValue(4)), + ["growth_mb"] = growthMb, + ["growth_pct"] = growthPct, + ["growth_ratio"] = growthPct / ObjectGrowthPctThreshold + } + }); + } + } + + // Contention: index with the largest new row-lock wait time (no reset). + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = @" +SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; + +WITH + snaps AS + ( + SELECT TOP (2) + collection_time + FROM + ( + SELECT DISTINCT + collection_time + FROM collect.index_object_stats + ) AS d + ORDER BY + collection_time DESC + ), + boundaries AS + ( + SELECT + latest_time = MAX(collection_time), + prior_time = MIN(collection_time) + FROM snaps + ), + cur AS + ( + SELECT + database_name, + object_id, + index_id, + schema_name, + table_name, + index_name, + ms = ISNULL(row_lock_wait_in_ms, 0), + esc = ISNULL(index_lock_promotion_count, 0) + FROM collect.index_object_stats + WHERE collection_time = + ( + SELECT b.latest_time + FROM boundaries AS b + ) + ), + prv AS + ( + SELECT + database_name, + object_id, + index_id, + ms = ISNULL(row_lock_wait_in_ms, 0), + esc = ISNULL(index_lock_promotion_count, 0) + FROM collect.index_object_stats + WHERE collection_time = + ( + SELECT b.prior_time + FROM boundaries AS b + ) + ) +SELECT TOP (1) + cur.database_name, + cur.schema_name, + cur.table_name, + cur.index_name, + ms_delta = cur.ms - prv.ms, + esc_delta = cur.esc - prv.esc +FROM cur +JOIN prv + ON prv.database_name = cur.database_name + AND prv.object_id = cur.object_id + AND prv.index_id = cur.index_id +CROSS JOIN boundaries AS b +WHERE b.latest_time <> b.prior_time +AND cur.ms >= prv.ms +AND cur.ms - prv.ms >= @msDelta +ORDER BY + cur.ms - prv.ms DESC +OPTION(MAXDOP 1, RECOMPILE);"; + cmd.Parameters.Add(new SqlParameter("@msDelta", ObjectLockWaitMsDeltaThreshold)); + + using var reader = await cmd.ExecuteReaderAsync(); + if (await reader.ReadAsync()) + { + var db = reader.GetString(0); + var msDelta = Convert.ToDouble(reader.GetValue(4)); + anomalies.Add(new Fact + { + Source = "anomaly", + Key = "ANOMALY_OBJECT_CONTENTION", + Value = msDelta, + ServerId = context.ServerId, + DatabaseName = db, + Metadata = new Dictionary + { + ["lock_wait_ms_delta"] = msDelta, + ["escalation_delta"] = Convert.ToDouble(reader.GetValue(5)), + ["contention_ratio"] = msDelta / ObjectLockWaitMsDeltaThreshold + } + }); + } + } + } + catch (Exception ex) + { + Logger.Error($"[SqlServerAnomalyDetector] Object stats anomaly detection failed: {ex.Message}"); + } + } + /// /// Checks if the server has enough historical data for meaningful baselines. /// Uses wait_stats and cpu_utilization_stats as canary. diff --git a/Dashboard/Controls/FinOpsContent.xaml b/Dashboard/Controls/FinOpsContent.xaml index 157e1eeb..b0b31c9d 100644 --- a/Dashboard/Controls/FinOpsContent.xaml +++ b/Dashboard/Controls/FinOpsContent.xaml @@ -913,6 +913,186 @@ + + + + + + + + + +