diff --git a/src/bio/bio_internal.h b/src/bio/bio_internal.h index dcdcf0f93c4..721309eef05 100644 --- a/src/bio/bio_internal.h +++ b/src/bio/bio_internal.h @@ -175,6 +175,9 @@ struct bio_dma_buffer { X(bdh_unsafe_shutdowns, "unsafe_shutdowns", \ "Number of unsafe shutdowns (no notification prior to power loss)", \ "shutdowns", D_TM_COUNTER) \ + X(bdh_percentage_used, "percentage_used", \ + "Percentage as canonical NAND-life indicator, hits 100 when the drive has consumed its rated endurance",\ + "percents", D_TM_GAUGE) \ X(bdh_temp, "temp/current", \ "Current SSD temperature", \ "kelvins", D_TM_GAUGE) \ diff --git a/src/bio/bio_monitor.c b/src/bio/bio_monitor.c index d00061f7d3f..bf6a7a70887 100644 --- a/src/bio/bio_monitor.c +++ b/src/bio/bio_monitor.c @@ -512,6 +512,8 @@ populate_health_stats(struct bio_dev_health *bdh) dev_state->unsafe_shutdowns = page->unsafe_shutdowns[0]; d_tm_set_counter(bdh->bdh_unsafe_shutdowns, page->unsafe_shutdowns[0]); + dev_state->percentage_used = page->percentage_used; + d_tm_set_gauge(bdh->bdh_percentage_used, page->percentage_used); /** temperature */ dev_state->warn_temp_time = page->warning_temp_time; diff --git a/src/control/lib/spdk/src/nvme_control_common.c b/src/control/lib/spdk/src/nvme_control_common.c index ac0d8eaea88..81af7cbd59f 100644 --- a/src/control/lib/spdk/src/nvme_control_common.c +++ b/src/control/lib/spdk/src/nvme_control_common.c @@ -352,6 +352,7 @@ populate_dev_health(struct nvme_stats *stats, stats->read_only_warn = cw.bits.read_only ? true : false; stats->volatile_mem_warn = cw.bits.volatile_memory_backup ? true : false; + stats->percentage_used = hp->percentage_used; /* Intel Smart Information Attributes */ if ((cdata == NULL) || (cdata->vid != SPDK_PCI_VID_INTEL)) diff --git a/src/include/daos_srv/control.h b/src/include/daos_srv/control.h index c1d8771b777..416f56ba109 100644 --- a/src/include/daos_srv/control.h +++ b/src/include/daos_srv/control.h @@ -105,6 +105,7 @@ struct nvme_stats { uint64_t unsafe_shutdowns; uint64_t media_errs; uint64_t err_log_entries; + uint8_t percentage_used; /* I/O error counters */ uint32_t bio_read_errs; uint32_t bio_write_errs; diff --git a/src/proto/ctl/smd.proto b/src/proto/ctl/smd.proto index 57cb414634a..f6aec631ad3 100644 --- a/src/proto/ctl/smd.proto +++ b/src/proto/ctl/smd.proto @@ -79,6 +79,7 @@ message BioHealthResp { uint32 link_max_width = 51; // maximum width (number of lanes) float link_neg_speed = 52; // negotiated speed in transactions per second uint32 link_neg_width = 53; // negotiated width (number of lanes) + uint32 percentage_used = 54; } enum NvmeDevState {