diff --git a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs index fc478cf6f..93b2b077c 100644 --- a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs +++ b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/cpu_util_table.rs @@ -1,14 +1,20 @@ use crate::common::format::human_size; +use crate::dashboard::data::DashboardData; +use itertools::Itertools; use ratatui::layout::{Constraint, Rect}; -use ratatui::style::Style; use ratatui::widgets::{Cell, Row, Table}; use std::cmp; +use tako::WorkerId; use tako::hwstats::MemoryStats; +use tako::resources::{ + CPU_RESOURCE_NAME, ResourceDescriptorItem, ResourceDescriptorKind, ResourceIndex, +}; +use tako::worker::WorkerConfiguration; -use crate::dashboard::ui::styles; +use crate::dashboard::ui::styles::{self, table_style_deselected}; use crate::dashboard::ui::terminal::DashboardFrame; use crate::dashboard::ui::widgets::progressbar::{ - ProgressPrintStyle, get_progress_bar_color, render_progress_bar_at, + ProgressPrintStyle, get_cpu_progress_bar_color, render_progress_bar_at, }; use crate::dashboard::utils::calculate_average; @@ -16,66 +22,331 @@ const CPU_METER_PROGRESSBAR_WIDTH: u8 = 18; // 4 characters for the label const CPU_METER_WIDTH: u8 = CPU_METER_PROGRESSBAR_WIDTH + 4; -pub fn render_cpu_util_table( - cpu_util_list: &[f64], - mem_util: &MemoryStats, - rect: Rect, - frame: &mut DashboardFrame, - table_style: Style, -) { - if cpu_util_list.is_empty() { - return; +#[derive(Default)] +pub struct CpuUtilTable { + utilization: Option, + cpu_view_mode: CpuViewMode, + cpu_scope: CpuScope, +} + +#[derive(Default, PartialEq)] +pub enum CpuViewMode { + Global, + #[default] + WorkerManaged, + WorkerAssigned, +} + +impl CpuViewMode { + pub fn get_visible_indices( + &self, + cpu_scope: &CpuScope, + total_cpus: usize, + used_cpus: &[ResourceIndex], + ) -> Vec { + match self { + CpuViewMode::Global => (0..total_cpus) + .map(|idx| ResourceIndex::new(idx as u32)) + .collect(), + CpuViewMode::WorkerManaged => match cpu_scope { + CpuScope::Node => (0..total_cpus) + .map(|idx| ResourceIndex::new(idx as u32)) + .collect(), + CpuScope::Subset(managed_cpus) => managed_cpus.clone(), + }, + CpuViewMode::WorkerAssigned => used_cpus.to_vec(), + } } - let constraints = get_column_constraints(rect, cpu_util_list.len()); +} - let width = constraints.len(); - let height = (cpu_util_list.len() as f64 / width as f64).ceil() as usize; +#[derive(Debug, Default)] +pub enum CpuScope { + #[default] + Node, + Subset(Vec), +} + +impl CpuScope { + fn estimate_scope( + detected_cpus: &mut Vec, + managed_cpus: Vec<&ResourceDescriptorItem>, + ) -> Option { + let mut all_managed_cpus = vec![]; + for resource in managed_cpus { + match &resource.kind { + ResourceDescriptorKind::List { values } => { + if let Ok(indices) = values + .iter() + .map(|s| s.parse::()) + .collect::, _>>() + { + all_managed_cpus.extend(indices); + } else { + return None; + } + } + ResourceDescriptorKind::Range { start, end } => { + for idx in (u32::from(*start))..=(u32::from(*end)) { + all_managed_cpus.push(ResourceIndex::new(idx)); + } + } + ResourceDescriptorKind::Groups { groups } => { + for group in groups { + if let Ok(indices) = group + .iter() + .map(|s| s.parse::()) + .collect::, _>>() + { + all_managed_cpus.extend(indices); + } else { + return None; + } + } + } + // Based on Resource kind `sum` cannot be used with CPUs. CPUs must have identity + ResourceDescriptorKind::Sum { .. } => unreachable!(), + } + } + + detected_cpus.sort(); + all_managed_cpus.sort(); + + if *detected_cpus == all_managed_cpus { + Some(CpuScope::Node) + } else { + Some(CpuScope::Subset(all_managed_cpus)) + } + } +} + +impl CpuViewMode { + fn next(&mut self, cpu_manager_state: &CpuScope) { + match cpu_manager_state { + CpuScope::Node => { + *self = match self { + CpuViewMode::WorkerManaged => CpuViewMode::WorkerAssigned, + CpuViewMode::WorkerAssigned => CpuViewMode::WorkerManaged, + CpuViewMode::Global => CpuViewMode::WorkerManaged, // To skip out of the global in case the state changes + } + } + CpuScope::Subset(_items) => { + *self = match self { + CpuViewMode::Global => CpuViewMode::WorkerManaged, + CpuViewMode::WorkerManaged => CpuViewMode::WorkerAssigned, + CpuViewMode::WorkerAssigned => CpuViewMode::Global, + } + } + } + } - let mut rows: Vec> = vec![vec![]; height]; - for (position, &cpu_util) in cpu_util_list.iter().enumerate() { - let row = position % height; - rows[row].push((cpu_util, position)); + fn next_text(&self, cpu_manager_state: &CpuScope) -> &str { + match cpu_manager_state { + CpuScope::Node => { + match self { + CpuViewMode::WorkerManaged => "Show worker assigned CPU utilization", + CpuViewMode::WorkerAssigned => "Show worker managed CPU utilization", + CpuViewMode::Global => "Show worker managed CPU utilization", // To skip out of the global in case the state changes + } + } + CpuScope::Subset(_items) => match self { + CpuViewMode::Global => "Show worker managed CPU utilization", + CpuViewMode::WorkerManaged => "Show worker assigned CPU utilization", + CpuViewMode::WorkerAssigned => "Show global CPU utilization", + }, + } } - let rows: Vec = rows - .into_iter() - .map(|targets| { - let columns: Vec = targets + fn set_default(&mut self) { + *self = CpuViewMode::WorkerManaged; + } +} + +struct Utilization { + cpu: Vec, + memory: MemoryStats, + used_cpus: Vec, +} + +impl CpuUtilTable { + pub fn update( + &mut self, + data: &DashboardData, + worker_id: WorkerId, + worker_config: Option<&WorkerConfiguration>, + ) { + if let Some(overview) = data + .workers() + .query_worker_overview_at(worker_id, data.current_time()) + { + let worker_used_cpus: Vec = match self.cpu_view_mode { + CpuViewMode::WorkerManaged | CpuViewMode::WorkerAssigned => overview + .item + .running_tasks + .iter() + .flat_map(|(_id, task_resource_alloc)| { + task_resource_alloc + .resources + .iter() + .filter_map(|resource_alloc| { + if resource_alloc.resource == CPU_RESOURCE_NAME { + Some(resource_alloc.indices.iter().map(|(index, _)| *index)) + } else { + None + } + }) + }) + .flatten() + .collect(), + CpuViewMode::Global => vec![], + }; + + if let Some(hw_state) = overview.item.hw_state.as_ref() { + self.utilization = Some(Utilization { + cpu: hw_state + .state + .cpu_usage + .cpu_per_core_percent_usage + .iter() + .map(|&v| v as f64) + .collect(), + memory: hw_state.state.memory_usage.clone(), + used_cpus: worker_used_cpus, + }) + } + + if let Some(configuration) = worker_config { + let managed_cpus: Vec<&ResourceDescriptorItem> = configuration + .resources + .resources + .iter() + .filter(|resource| resource.name == CPU_RESOURCE_NAME) + .collect(); + if let Some(hw_state) = overview.item.hw_state.as_ref() { + let mut detected_cpus: Vec = hw_state + .state + .cpu_usage + .cpu_per_core_percent_usage + .iter() + .enumerate() + .map(|(idx, _)| ResourceIndex::new(idx as u32)) + .collect(); + let cpu_scope = CpuScope::estimate_scope(&mut detected_cpus, managed_cpus); + if let Some(cpu_scope) = cpu_scope { + self.cpu_scope = cpu_scope; + } + } + } + } + } + + pub fn draw(&mut self, rect: Rect, frame: &mut DashboardFrame) { + if let Some(util) = &self.utilization { + if util.cpu.is_empty() { + return; + } + + let visible_indices = self.cpu_view_mode.get_visible_indices( + &self.cpu_scope, + util.cpu.len(), + &util.used_cpus, + ); + + let cell_data: Vec<(u32, f64, bool)> = visible_indices .into_iter() - .map(|(cpu_util, position)| { - let progress = cpu_util / 100.00; - Cell::from(render_progress_bar_at( - Some(format!("{position:>3} ")), - progress, - CPU_METER_PROGRESSBAR_WIDTH, - ProgressPrintStyle::default(), - )) - .style(get_progress_bar_color(progress)) + .map(|idx| { + let val = util + .cpu + .get(idx.as_num() as usize) + .copied() + .unwrap_or_default(); + let is_used = util.used_cpus.contains(&idx); + (idx.as_num(), val, is_used) }) + .sorted_by_key(|&(idx, _, used)| (std::cmp::Reverse(used), idx)) .collect(); - Row::new(columns) - }) - .collect(); - let avg_cpu = calculate_average(cpu_util_list); - - let mem_used = mem_util.total - mem_util.free; - let title = styles::table_title(format!( - "Worker Utilization ({} CPUs), Avg CPU = {:.0}%, Mem = {:.0}% ({}/{})", - cpu_util_list.len(), - avg_cpu, - (mem_used as f64 / mem_util.total as f64) * 100.0, - human_size(mem_used), - human_size(mem_util.total) - )); - let body_block = styles::table_block_with_title(title); - - let table = Table::new(rows, constraints) - .block(body_block) - .row_highlight_style(styles::style_table_highlight()) - .style(table_style); - - frame.render_widget(table, rect); + let constraints = get_column_constraints(rect, cell_data.len()); + + let width = constraints.len(); + + let rows: Vec = if width > 0 { + cell_data + .chunks(width) + .map(|chunk| { + let cells: Vec = chunk + .iter() + .map(|(id, cpu_util, used)| { + let progress = cpu_util / 100.0; + let style = get_cpu_progress_bar_color( + progress, + *used, + &self.cpu_view_mode, + ); + + Cell::from(render_progress_bar_at( + Some(format!("{id:>3} ")), + progress, + CPU_METER_PROGRESSBAR_WIDTH, + ProgressPrintStyle::default(), + )) + .style(style) + }) + .collect(); + Row::new(cells) + }) + .collect() + } else { + vec![] + }; + + let mem_used = util.memory.total - util.memory.free; + let (which_util, num_cpus, avg_cpu) = create_title_info( + &util.cpu, + &util.used_cpus, + &self.cpu_view_mode, + &self.cpu_scope, + ); + + let title = styles::table_title(format!( + "{} Utilization ({} CPUs), Avg CPU = {:.0}%, Mem = {:.0}% ({}/{})", + which_util, + num_cpus, + avg_cpu, + (mem_used as f64 / util.memory.total as f64) * 100.0, + human_size(mem_used), + human_size(util.memory.total) + )); + let body_block = styles::table_block_with_title(title); + + let table = Table::new(rows, constraints) + .block(body_block) + .row_highlight_style(styles::style_table_highlight()) + .style(table_style_deselected()); + + frame.render_widget(table, rect); + } + } + + pub fn next_view(&mut self) { + self.cpu_view_mode.next(&self.cpu_scope); + } + + pub fn next_text(&mut self) -> &str { + self.cpu_view_mode.next_text(&self.cpu_scope) + } + + pub fn clear_table(&mut self) { + self.clear_util(); + self.set_default_view(); + } + + fn clear_util(&mut self) { + self.utilization = None; + } + + fn set_default_view(&mut self) { + self.cpu_view_mode.set_default(); + } } /// Creates the column sizes for the cpu_util_table, each column divides the row equally. @@ -83,9 +354,49 @@ fn get_column_constraints(rect: Rect, num_cpus: usize) -> Vec { let max_columns = (rect.width / CPU_METER_WIDTH as u16) as usize; let num_columns = cmp::min(max_columns, num_cpus); - std::iter::repeat_n( - Constraint::Percentage((100 / num_columns) as u16), - num_columns, - ) - .collect() + if num_columns > 0 { + std::iter::repeat_n( + Constraint::Percentage((100 / num_columns) as u16), + num_columns, + ) + .collect() + } else { + vec![] + } +} + +fn create_title_info( + cpu_util_list: &[f64], + used_cpus: &[ResourceIndex], + util_render_mode: &CpuViewMode, + cpu_manager_state: &CpuScope, +) -> (String, usize, f64) { + let which_util = match util_render_mode { + CpuViewMode::Global => "Node", + CpuViewMode::WorkerManaged => "Worker Managed", + CpuViewMode::WorkerAssigned => "Worker Assigned", + } + .to_string(); + + let filtered_utils: Vec = cpu_util_list + .iter() + .enumerate() + .filter(|(idx, _util)| { + let res_idx = ResourceIndex::new(*idx as u32); + match util_render_mode { + CpuViewMode::Global => true, + CpuViewMode::WorkerAssigned => used_cpus.contains(&res_idx), + CpuViewMode::WorkerManaged => match cpu_manager_state { + CpuScope::Node => true, + CpuScope::Subset(managed) => managed.contains(&res_idx), + }, + } + }) + .map(|(_, &util)| util) + .collect(); + + let num_cpus = filtered_utils.len(); + let avg_usage = calculate_average(&filtered_utils); + + (which_util, num_cpus, avg_usage) } diff --git a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs index d5e608e54..012dc0b4d 100644 --- a/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs +++ b/crates/hyperqueue/src/dashboard/ui/screens/cluster/worker/mod.rs @@ -1,20 +1,17 @@ use crate::dashboard::data::DashboardData; use crate::dashboard::data::timelines::job_timeline::TaskInfo; -use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::render_cpu_util_table; +use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::CpuUtilTable; use crate::dashboard::ui::screens::cluster::worker::worker_config_table::WorkerConfigTable; use crate::dashboard::ui::screens::cluster::worker::worker_utilization_chart::WorkerUtilizationChart; -use crate::dashboard::ui::styles::{ - style_footer, style_header_text, table_style_deselected, table_style_selected, -}; +use crate::dashboard::ui::styles::{style_footer, style_header_text, table_style_selected}; use crate::dashboard::ui::terminal::DashboardFrame; use crate::dashboard::ui::widgets::tasks_table::TasksTable; use crate::dashboard::ui::widgets::text::draw_text; use crossterm::event::{KeyCode, KeyEvent}; use ratatui::layout::{Constraint, Direction, Layout, Rect}; -use tako::hwstats::MemoryStats; use tako::{JobTaskId, WorkerId}; -mod cpu_util_table; +pub mod cpu_util_table; mod worker_config_table; mod worker_utilization_chart; @@ -24,8 +21,7 @@ pub struct WorkerDetail { utilization_history: WorkerUtilizationChart, worker_config_table: WorkerConfigTable, worker_tasks_table: TasksTable, - - utilization: Option, + cpu_util_table: CpuUtilTable, } impl Default for WorkerDetail { @@ -35,20 +31,15 @@ impl Default for WorkerDetail { utilization_history: Default::default(), worker_config_table: Default::default(), worker_tasks_table: TasksTable::non_interactive(), - utilization: None, + cpu_util_table: Default::default(), } } } -struct Utilization { - cpu: Vec, - memory: MemoryStats, -} - impl WorkerDetail { pub fn clear_worker_id(&mut self) { self.worker_id = None; - self.utilization = None; + self.cpu_util_table.clear_table(); } pub fn set_worker_id(&mut self, worker_id: WorkerId) { @@ -66,17 +57,19 @@ impl WorkerDetail { frame, style_header_text(), ); - draw_text(": Back", layout.footer, frame, style_footer()); - - if let Some(util) = &self.utilization { - render_cpu_util_table( - &util.cpu, - &util.memory, - layout.current_utilization, - frame, - table_style_deselected(), - ); - } + + draw_text( + format!( + ": Back, : {}", + self.cpu_util_table.next_text() + ) + .as_str(), + layout.footer, + frame, + style_footer(), + ); + + self.cpu_util_table.draw(layout.current_utilization, frame); self.utilization_history .draw(layout.utilization_history, frame); @@ -94,32 +87,19 @@ impl WorkerDetail { pub fn update(&mut self, data: &DashboardData) { if let Some(worker_id) = self.worker_id { self.utilization_history.update(data, worker_id); + let mut worker_config = None; - if let Some((cpu_util, mem_util)) = data - .workers() - .query_worker_overview_at(worker_id, data.current_time()) - .and_then(|overview| overview.item.hw_state.as_ref()) - .map(|hw_state| { - ( - &hw_state.state.cpu_usage.cpu_per_core_percent_usage, - &hw_state.state.memory_usage, - ) - }) - { - self.utilization = Some(Utilization { - cpu: cpu_util.iter().map(|&v| v as f64).collect(), - memory: mem_util.clone(), - }); + if let Some(configuration) = data.workers().query_worker_config_for(worker_id) { + self.worker_config_table.update(configuration); + worker_config = Some(configuration); } + self.cpu_util_table.update(data, worker_id, worker_config); + let tasks_info: Vec<(JobTaskId, &TaskInfo)> = data.query_task_history_for_worker(worker_id).collect(); self.worker_tasks_table .update(tasks_info, data.current_time()); - - if let Some(configuration) = data.workers().query_worker_config_for(worker_id) { - self.worker_config_table.update(configuration); - } } } @@ -127,6 +107,7 @@ impl WorkerDetail { pub fn handle_key(&mut self, key: KeyEvent) { match key.code { KeyCode::Backspace => self.worker_tasks_table.clear_selection(), + KeyCode::Char('c') => self.cpu_util_table.next_view(), _ => self.worker_tasks_table.handle_key(key), } } diff --git a/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs b/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs index 4d97126f4..dc3f56dd3 100644 --- a/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs +++ b/crates/hyperqueue/src/dashboard/ui/widgets/progressbar.rs @@ -1,6 +1,8 @@ use ratatui::style::{Color, Modifier, Style}; use unicode_width::UnicodeWidthStr; +use crate::dashboard::ui::screens::cluster::worker::cpu_util_table::CpuViewMode; + const GREEN_THRESHOLD: f64 = 0.5; const YELLOW_THRESHOLD: f64 = 0.7; @@ -30,6 +32,39 @@ pub fn get_progress_bar_color(progress: f64) -> Style { } } +pub fn get_cpu_progress_bar_color( + progress: f64, + used: bool, + util_render_mode: &CpuViewMode, +) -> Style { + let color = match util_render_mode { + CpuViewMode::Global | CpuViewMode::WorkerAssigned => { + if progress <= GREEN_THRESHOLD { + Color::Green + } else if progress <= YELLOW_THRESHOLD { + Color::Yellow + } else { + Color::Red + } + } + CpuViewMode::WorkerManaged => match (progress, used) { + (progress, true) if progress <= GREEN_THRESHOLD => Color::Green, + (progress, true) if progress <= YELLOW_THRESHOLD => Color::Yellow, + (_, true) => Color::Red, + (progress, false) if progress <= GREEN_THRESHOLD => Color::LightBlue, + (progress, false) if progress <= YELLOW_THRESHOLD => Color::Cyan, + (_, false) => Color::Magenta, + }, + }; + + Style { + fg: Some(color), + bg: None, + add_modifier: Modifier::empty(), + sub_modifier: Modifier::empty(), + } +} + /** * Creates a string progress bar for 0 < progress < 1 */