Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ linters:
- third_party$
- builtin$
- examples$
rules:
- path: 'internal/collector/cib.go'
linters:
- gocognit
- gocyclo
formatters:
enable:
- gofmt
Expand Down
236 changes: 194 additions & 42 deletions internal/collector/cib.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,51 +9,133 @@ import (
)

type Icinga2CIBCollector struct {
icingaClient *icinga.Client
logger *slog.Logger
uptime *prometheus.Desc
num_hosts_up *prometheus.Desc
num_hosts_down *prometheus.Desc
num_services_ok *prometheus.Desc
num_services_critical *prometheus.Desc
avg_execution_time *prometheus.Desc
avg_latency *prometheus.Desc
max_execution_time *prometheus.Desc
max_latency *prometheus.Desc
min_execution_time *prometheus.Desc
min_latency *prometheus.Desc
icingaClient *icinga.Client
logger *slog.Logger

// Icinga Statistics
uptime *prometheus.Desc
avg_execution_time *prometheus.Desc
avg_latency *prometheus.Desc
max_execution_time *prometheus.Desc
max_latency *prometheus.Desc
min_execution_time *prometheus.Desc
min_latency *prometheus.Desc
current_concurrent_checks *prometheus.Desc
current_pending_callbacks *prometheus.Desc
remote_check_queue *prometheus.Desc
passive_host_checks *prometheus.Desc
passive_service_checks *prometheus.Desc
active_host_checks *prometheus.Desc
active_service_checks *prometheus.Desc

// Num Hosts
num_hosts_up *prometheus.Desc
num_hosts_down *prometheus.Desc
num_hosts_acknowledged *prometheus.Desc
num_hosts_flapping *prometheus.Desc
num_hosts_handled *prometheus.Desc
num_hosts_in_downtime *prometheus.Desc
num_hosts_pending *prometheus.Desc
num_hosts_problem *prometheus.Desc
num_hosts_unreachable *prometheus.Desc

// Num Services
num_services_ok *prometheus.Desc
num_services_critical *prometheus.Desc
num_services_acknowledged *prometheus.Desc
num_services_flapping *prometheus.Desc
num_services_handled *prometheus.Desc
num_services_in_downtime *prometheus.Desc
num_services_pending *prometheus.Desc
num_services_problem *prometheus.Desc
num_services_unknown *prometheus.Desc
num_services_unreachable *prometheus.Desc
num_services_warning *prometheus.Desc
}

func NewIcinga2CIBCollector(client *icinga.Client, logger *slog.Logger) *Icinga2CIBCollector {
return &Icinga2CIBCollector{
icingaClient: client,
logger: logger,
uptime: prometheus.NewDesc("icinga2_uptime", "Uptime of the instance", nil, nil),
num_hosts_up: prometheus.NewDesc("icinga2_num_hosts_up", "Number of Hosts Up", nil, nil),
num_hosts_down: prometheus.NewDesc("icinga2_num_hosts_down", "Number of Hosts Down", nil, nil),
num_services_ok: prometheus.NewDesc("icinga2_num_services_ok", "Number of Services OK", nil, nil),
num_services_critical: prometheus.NewDesc("icinga2_num_services_critical", "Number of Services Critical", nil, nil),
avg_execution_time: prometheus.NewDesc("icinga2_avg_execution_time", "Average execution time", nil, nil),
avg_latency: prometheus.NewDesc("icinga2_avg_latency", "Average latency", nil, nil),
max_execution_time: prometheus.NewDesc("icinga2_max_execution_time", "Maximum execution time", nil, nil),
max_latency: prometheus.NewDesc("icinga2_max_latency", "Maximum latency", nil, nil),
min_execution_time: prometheus.NewDesc("icinga2_min_execution_time", "Minimum execution time", nil, nil),
min_latency: prometheus.NewDesc("icinga2_min_latency", "Minimum latency", nil, nil),
icingaClient: client,
logger: logger,

// Icinga Statistics
uptime: prometheus.NewDesc("icinga2_uptime", "Uptime of the instance", nil, nil),
avg_execution_time: prometheus.NewDesc("icinga2_avg_execution_time", "Average execution time", nil, nil),
avg_latency: prometheus.NewDesc("icinga2_avg_latency", "Average latency", nil, nil),
max_execution_time: prometheus.NewDesc("icinga2_max_execution_time", "Maximum execution time", nil, nil),
max_latency: prometheus.NewDesc("icinga2_max_latency", "Maximum latency", nil, nil),
min_execution_time: prometheus.NewDesc("icinga2_min_execution_time", "Minimum execution time", nil, nil),
min_latency: prometheus.NewDesc("icinga2_min_latency", "Minimum latency", nil, nil),
current_concurrent_checks: prometheus.NewDesc("icinga2_current_concurrent_checks", "Current concurrent checks", nil, nil),
current_pending_callbacks: prometheus.NewDesc("icinga2_current_pending_callbacks", "Current pending callbacks", nil, nil),
remote_check_queue: prometheus.NewDesc("icinga2_remote_check_queue", "Remote check queue size", nil, nil),
passive_host_checks: prometheus.NewDesc("icinga2_passive_host_checks", "Passive host checks", nil, nil),
passive_service_checks: prometheus.NewDesc("icinga2_passive_service_checks", "Passive service checks", nil, nil),
active_host_checks: prometheus.NewDesc("icinga2_active_host_checks", "Active host checks", nil, nil),
active_service_checks: prometheus.NewDesc("icinga2_active_service_checks", "Active service checks", nil, nil),
// Num Hosts
num_hosts_up: prometheus.NewDesc("icinga2_num_hosts_up", "Number of hosts Up", nil, nil),
num_hosts_down: prometheus.NewDesc("icinga2_num_hosts_down", "Number of hosts Down", nil, nil),
num_hosts_acknowledged: prometheus.NewDesc("icinga2_num_hosts_acknowledged", "Number of hosts acknowledged", nil, nil),
num_hosts_flapping: prometheus.NewDesc("icinga2_num_hosts_flapping", "Number of hosts flapping", nil, nil),
num_hosts_handled: prometheus.NewDesc("icinga2_num_hosts_handled", "Number of hosts handled", nil, nil),
num_hosts_in_downtime: prometheus.NewDesc("icinga2_num_hosts_in_downtime", "Number of hosts in downtime", nil, nil),
num_hosts_pending: prometheus.NewDesc("icinga2_num_hosts_pending", "Number of hosts pending", nil, nil),
num_hosts_problem: prometheus.NewDesc("icinga2_num_hosts_problem", "Number of hosts with problem", nil, nil),
num_hosts_unreachable: prometheus.NewDesc("icinga2_num_hosts_unreachable", "Number of hosts unreachable", nil, nil),
// Num Services
num_services_ok: prometheus.NewDesc("icinga2_num_services_ok", "Number of services OK", nil, nil),
num_services_critical: prometheus.NewDesc("icinga2_num_services_critical", "Number of services Critical", nil, nil),
num_services_acknowledged: prometheus.NewDesc("icinga2_num_services_acknowledged", "Number of services acknowledged", nil, nil),
num_services_flapping: prometheus.NewDesc("icinga2_num_services_flapping", "Number of services flapping", nil, nil),
num_services_handled: prometheus.NewDesc("icinga2_num_services_handled", "Number of services handled", nil, nil),
num_services_in_downtime: prometheus.NewDesc("icinga2_num_services_in_downtime", "Number of services in downtime", nil, nil),
num_services_pending: prometheus.NewDesc("icinga2_num_services_pending", "Number of services pending", nil, nil),
num_services_problem: prometheus.NewDesc("icinga2_num_services_problem", "Number of services with problem", nil, nil),
num_services_unknown: prometheus.NewDesc("icinga2_num_services_unknown", "Number of services unknown", nil, nil),
num_services_unreachable: prometheus.NewDesc("icinga2_num_services_unreachable", "Number of services unreachable", nil, nil),
num_services_warning: prometheus.NewDesc("icinga2_num_services_warning", "Number of services warning", nil, nil),
}
}

func (collector *Icinga2CIBCollector) Describe(ch chan<- *prometheus.Desc) {
// Icinga Statistics
ch <- collector.uptime
ch <- collector.num_hosts_up
ch <- collector.num_hosts_down
ch <- collector.num_services_ok
ch <- collector.num_services_critical
ch <- collector.avg_execution_time
ch <- collector.avg_latency
ch <- collector.max_execution_time
ch <- collector.max_latency
ch <- collector.min_execution_time
ch <- collector.min_latency
ch <- collector.current_concurrent_checks
ch <- collector.current_pending_callbacks
ch <- collector.remote_check_queue
ch <- collector.passive_host_checks
ch <- collector.passive_service_checks
ch <- collector.active_host_checks
ch <- collector.active_service_checks
// Num Hosts
ch <- collector.num_hosts_up
ch <- collector.num_hosts_down
ch <- collector.num_hosts_acknowledged
ch <- collector.num_hosts_flapping
ch <- collector.num_hosts_handled
ch <- collector.num_hosts_in_downtime
ch <- collector.num_hosts_pending
ch <- collector.num_hosts_problem
ch <- collector.num_hosts_unreachable
// Num Services
ch <- collector.num_services_ok
ch <- collector.num_services_critical
ch <- collector.num_services_acknowledged
ch <- collector.num_services_flapping
ch <- collector.num_services_handled
ch <- collector.num_services_in_downtime
ch <- collector.num_services_pending
ch <- collector.num_services_problem
ch <- collector.num_services_unknown
ch <- collector.num_services_unreachable
ch <- collector.num_services_warning
}

func (collector *Icinga2CIBCollector) Collect(ch chan<- prometheus.Metric) {
Expand All @@ -74,18 +156,6 @@ func (collector *Icinga2CIBCollector) Collect(ch chan<- prometheus.Metric) {
if v, ok := r.Status["uptime"]; ok {
ch <- prometheus.MustNewConstMetric(collector.uptime, prometheus.CounterValue, v)
}
if v, ok := r.Status["num_hosts_up"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_up, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_down"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_down, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_ok"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_ok, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_critical"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_critical, prometheus.GaugeValue, v)
}
if v, ok := r.Status["avg_execution_time"]; ok {
ch <- prometheus.MustNewConstMetric(collector.avg_execution_time, prometheus.GaugeValue, v)
}
Expand All @@ -104,4 +174,86 @@ func (collector *Icinga2CIBCollector) Collect(ch chan<- prometheus.Metric) {
if v, ok := r.Status["min_latency"]; ok {
ch <- prometheus.MustNewConstMetric(collector.min_latency, prometheus.GaugeValue, v)
}
if v, ok := r.Status["current_concurrent_checks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.current_concurrent_checks, prometheus.GaugeValue, v)
}
if v, ok := r.Status["current_pending_callbacks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.current_pending_callbacks, prometheus.GaugeValue, v)
}
if v, ok := r.Status["remote_check_queue"]; ok {
ch <- prometheus.MustNewConstMetric(collector.remote_check_queue, prometheus.GaugeValue, v)
}
if v, ok := r.Status["passive_host_checks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.passive_host_checks, prometheus.GaugeValue, v)
}
if v, ok := r.Status["passive_service_checks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.passive_service_checks, prometheus.GaugeValue, v)
}
if v, ok := r.Status["active_host_checks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.active_host_checks, prometheus.GaugeValue, v)
}
if v, ok := r.Status["active_service_checks"]; ok {
ch <- prometheus.MustNewConstMetric(collector.active_service_checks, prometheus.GaugeValue, v)
}

// Hosts
if v, ok := r.Status["num_hosts_up"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_up, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_down"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_down, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_acknowledged"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_acknowledged, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_flapping"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_flapping, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_handled"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_handled, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_in_downtime"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_in_downtime, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_pending"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_pending, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_problem"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_problem, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_hosts_unreachable"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_hosts_unreachable, prometheus.GaugeValue, v)
}

// Services
if v, ok := r.Status["num_services_ok"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_ok, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_critical"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_critical, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_acknowledged"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_acknowledged, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_flapping"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_flapping, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_handled"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_handled, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_in_downtime"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_in_downtime, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_pending"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_pending, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_problem"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_problem, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_unreachable"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_unreachable, prometheus.GaugeValue, v)
}
if v, ok := r.Status["num_services_warning"]; ok {
ch <- prometheus.MustNewConstMetric(collector.num_services_warning, prometheus.GaugeValue, v)
}
}