diff --git a/docs/checks/commands/check_drive_io.md b/docs/checks/commands/check_drive_io.md index 6688ba1e..62518eef 100644 --- a/docs/checks/commands/check_drive_io.md +++ b/docs/checks/commands/check_drive_io.md @@ -54,7 +54,7 @@ Naemon Config | empty-syntax | %(status) - No drives found | | top-syntax | %(status) - %(list) | | ok-syntax | %(status) - %(list) | -| detail-syntax | %(drive){{ IF label ne '' }} (%(label)){{ END }} >%(write_bytes_rate) <%(read_bytes_rate) %(utilization)% | +| detail-syntax | %(drive){{ IF label ne '' }} (%(label)){{ END }} >%(write_bytes_rate_humanized) <%(read_bytes_rate_humanized) %(utilization)% | ## Check Specific Arguments @@ -69,27 +69,29 @@ Naemon Config these can be used in filters and thresholds (along with the default attributes): -| Attribute | Description | -| ---------------- | -------------------------------------------------------------------------------------------------- | -| drive | Name(s) of the drives to check the io stats for. If left empty, it will check all drives. For Windows this is the drive letter. For UNIX it is the logical name of the drive. | -| lookback | Lookback period for which the value change rate and utilization is calculated. | -| read_count | Total number of read operations completed successfully | -| read_count_rate | Number of read operations per second during the lookback period | -| read_bytes | Total number of bytes read from the disk | -| read_bytes_rate | Average bytes read per second during the lookback period | -| read_time | Total time spent on read operations (milliseconds). | -| write_count | Total number of write operations completed successfully | -| write_count_rate | Number of write operations per second during the lookback period | -| write_bytes | Total number of bytes written to the disk | -| write_bytes_rate | Average bytes written per second during the lookback period | -| write_time | Total time spent on write operations (milliseconds). | -| label | Label of the drive. Windows does not report this. | -| io_time | Total time during which the disk had at least one active I/O (milliseconds). Windows does not report this. | -| io_time_rate | Change in I/O time per second. Windows does not report this. | -| weighted_io | Measure of both I/O completion time and the number of backlogged requests. Windows does not report this. | -| utilization | Percentage of time the disk was busy (0-100%). Windows does not report this. | -| iops_in_progress | Number of I/O operations currently in flight. Windows does not report this. | -| idle_time | Count of the 100 ns periods the disk was idle. Windows only | -| query_time | The time the performance query was sent. Count of 100 ns periods since the Win32 epoch of 01.01.1601. Windows only | -| queue_depth | The depth of the IO queue. Windows only. | -| split_count | The cumulative count of IOs that are associated IOs. Windows only. | +| Attribute | Description | +| -------------------------- | ---------------------------------------------------------------------------------------- | +| drive | Name(s) of the drives to check the io stats for. If left empty, it will check all drives. For Windows this is the drive letter. For UNIX it is the logical name of the drive. | +| lookback | Lookback period for which the value change rate and utilization is calculated. | +| read_count | Total number of read operations completed successfully | +| read_count_rate | Number of read operations per second during the lookback period | +| read_bytes | Total number of bytes read from the disk | +| read_bytes_rate | Average bytes read per second during the lookback period | +| read_bytes_rate_humanized | Average bytes read per second during the lookback period, written in humanized format | +| read_time | Total time spent on read operations (milliseconds). | +| write_count | Total number of write operations completed successfully | +| write_count_rate | Number of write operations per second during the lookback period | +| write_bytes | Total number of bytes written to the disk | +| write_bytes_rate | Average bytes written per second during the lookback period | +| write_bytes_rate_humanized | Average bytes read per second during the lookback period, written in humanized format | +| write_time | Total time spent on write operations (milliseconds). | +| label | Label of the drive. Windows does not report this. | +| io_time | Total time during which the disk had at least one active I/O (milliseconds). Windows does not report this. | +| io_time_rate | Change in I/O time per second. Windows does not report this. | +| weighted_io | Measure of both I/O completion time and the number of backlogged requests. Windows does not report this. | +| utilization | Percentage of time the disk was busy (0-100%). Windows does not report this. | +| iops_in_progress | Number of I/O operations currently in flight. Windows does not report this. | +| idle_time | Count of the 100 ns periods the disk was idle. Windows only | +| query_time | The time the performance query was sent. Count of 100 ns periods since the Win32 epoch of 01.01.1601. Windows only | +| queue_depth | The depth of the IO queue. Windows only. | +| split_count | The cumulative count of IOs that are associated IOs. Windows only. | diff --git a/pkg/snclient/check_drive_io.go b/pkg/snclient/check_drive_io.go index 8e6a135d..10510c5b 100644 --- a/pkg/snclient/check_drive_io.go +++ b/pkg/snclient/check_drive_io.go @@ -10,7 +10,6 @@ import ( "time" "github.com/consol-monitoring/snclient/pkg/convert" - "github.com/consol-monitoring/snclient/pkg/humanize" "github.com/shirou/gopsutil/v4/disk" ) @@ -53,7 +52,7 @@ func (l *CheckDriveIO) Build() *CheckData { defaultWarning: "utilization > 95", defaultCritical: "", okSyntax: "%(status) - %(list)", - detailSyntax: "%(drive){{ IF label ne '' }} (%(label)){{ END }} >%(write_bytes_rate) <%(read_bytes_rate) %(utilization)%", + detailSyntax: "%(drive){{ IF label ne '' }} (%(label)){{ END }} >%(write_bytes_rate_humanized) <%(read_bytes_rate_humanized) %(utilization)%", topSyntax: "%(status) - %(list)", emptyState: CheckExitUnknown, emptySyntax: "%(status) - No drives found", @@ -65,11 +64,13 @@ func (l *CheckDriveIO) Build() *CheckData { {name: "read_count_rate", description: "Number of read operations per second during the lookback period"}, {name: "read_bytes", description: "Total number of bytes read from the disk", unit: UByte}, {name: "read_bytes_rate", description: "Average bytes read per second during the lookback period", unit: UByte}, + {name: "read_bytes_rate_humanized", description: "Average bytes read per second during the lookback period, written in humanized format"}, {name: "read_time", description: "Total time spent on read operations (milliseconds)."}, {name: "write_count", description: "Total number of write operations completed successfully"}, {name: "write_count_rate", description: "Number of write operations per second during the lookback period"}, {name: "write_bytes", description: "Total number of bytes written to the disk", unit: UByte}, {name: "write_bytes_rate", description: "Average bytes written per second during the lookback period", unit: UByte}, + {name: "write_bytes_rate_humanized", description: "Average bytes read per second during the lookback period, written in humanized format"}, {name: "write_time", description: "Total time spent on write operations (milliseconds)."}, // Windows does not report these @@ -207,9 +208,7 @@ func (l *CheckDriveIO) addRateToEntry(snc *Agent, entry map[string]string, entry log.Debugf("Error when getting the counter rate, lookback: %d, counterCategory: %s, counterKey: %s, err: %s", l.lookback, counterCategory, counterKey, err.Error()) } - humanizedBytes := humanize.IBytesF(uint64(rate), 1) - - entry[entryKey] = fmt.Sprintf("%v/s", humanizedBytes) + entry[entryKey] = fmt.Sprintf("%f", rate) } func cleanupDriveName(drive string) (deviceLogicalNameOrLetter string) { diff --git a/pkg/snclient/check_drive_io_other.go b/pkg/snclient/check_drive_io_other.go index 69c281c6..3122d308 100644 --- a/pkg/snclient/check_drive_io_other.go +++ b/pkg/snclient/check_drive_io_other.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/consol-monitoring/snclient/pkg/convert" + "github.com/consol-monitoring/snclient/pkg/humanize" "github.com/shirou/gopsutil/v4/disk" ) @@ -50,12 +51,18 @@ func (l *CheckDriveIO) buildEntry(snc *Agent, diskIOCounters any, deviceLogicalN l.addRateToEntry(snc, entry, "read_count_rate", counterCategory, "read_count") entry["read_bytes"] = fmt.Sprintf("%d", counters.ReadBytes) l.addRateToEntry(snc, entry, "read_bytes_rate", counterCategory, "read_bytes") + readBytesRateFloat64 := convert.Float64(entry["read_bytes_rate"]) + humanizedReadBytesRate := humanize.IBytesF(uint64(readBytesRateFloat64), 1) + entry["read_bytes_rate_humanized"] = humanizedReadBytesRate + "/s" entry["read_time"] = fmt.Sprintf("%d", counters.ReadTime) entry["write_count"] = fmt.Sprintf("%d", counters.WriteCount) l.addRateToEntry(snc, entry, "write_count_rate", counterCategory, "write_count") entry["write_bytes"] = fmt.Sprintf("%d", counters.WriteBytes) l.addRateToEntry(snc, entry, "write_bytes_rate", counterCategory, "write_bytes") + writeBytesRateFloat64 := convert.Float64(entry["write_bytes_rate"]) + humanizedWriteBytesRate := humanize.IBytesF(uint64(writeBytesRateFloat64), 1) + entry["write_bytes_rate_humanized"] = humanizedWriteBytesRate + "/s" entry["write_time"] = fmt.Sprintf("%d", counters.WriteTime) entry["io_time"] = fmt.Sprintf("%d", counters.IoTime)