diff --git a/e2e-tests/src/lib.rs b/e2e-tests/src/lib.rs index 083c08e..ec2af0b 100644 --- a/e2e-tests/src/lib.rs +++ b/e2e-tests/src/lib.rs @@ -140,6 +140,10 @@ max_client_to_self_delay = 1024 min_payment_size_msat = 0 max_payment_size_msat = 1000000000 client_trusts_lsp = true + +[metrics] +enabled = true +poll_metrics_interval = 1 "#, storage_dir = storage_dir.display(), ); diff --git a/e2e-tests/tests/e2e.rs b/e2e-tests/tests/e2e.rs index 577b74c..0407c54 100644 --- a/e2e-tests/tests/e2e.rs +++ b/e2e-tests/tests/e2e.rs @@ -827,3 +827,95 @@ async fn test_hodl_invoice_fail() { events_a.iter().map(|e| &e.event).collect::>() ); } + +#[tokio::test] +async fn test_metrics_endpoint() { + let bitcoind = TestBitcoind::new(); + + // Test with metrics enabled + let server_a = LdkServerHandle::start(&bitcoind).await; + let server_b = LdkServerHandle::start(&bitcoind).await; + + let client = server_a.client(); + let metrics_result = client.get_metrics().await; + + assert!(metrics_result.is_ok(), "Expected metrics to succeed when enabled"); + let metrics = metrics_result.unwrap(); + + // Verify initial state + assert!(metrics.contains("ldk_server_total_peers_count 0")); + assert!(metrics.contains("ldk_server_total_payments_count 0")); + assert!(metrics.contains("ldk_server_total_successful_payments_count 0")); + assert!(metrics.contains("ldk_server_total_pending_payments_count 0")); + assert!(metrics.contains("ldk_server_total_failed_payments_count 0")); + assert!(metrics.contains("ldk_server_total_channels_count 0")); + assert!(metrics.contains("ldk_server_total_public_channels_count 0")); + assert!(metrics.contains("ldk_server_total_private_channels_count 0")); + assert!(metrics.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(metrics.contains("ldk_server_total_lightning_balance_sats 0")); + + // Set up channel and make a payment to trigger metrics update + setup_funded_channel(&bitcoind, &server_a, &server_b, 100_000).await; + + // Poll for channel, peer and balance metrics. + let timeout = Duration::from_secs(10); + let start = std::time::Instant::now(); + loop { + let metrics = client.get_metrics().await.unwrap(); + if metrics.contains("ldk_server_total_peers_count 1") + && metrics.contains("ldk_server_total_channels_count 1") + && metrics.contains("ldk_server_total_public_channels_count 1") + && metrics.contains("ldk_server_total_payments_count 2") + && !metrics.contains("ldk_server_total_lightning_balance_sats 0") + && !metrics.contains("ldk_server_total_onchain_balance_sats 0") + && !metrics.contains("ldk_server_spendable_onchain_balance_sats 0") + && !metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0") + { + break; + } + + if start.elapsed() > timeout { + let current_metrics = client.get_metrics().await.unwrap(); + panic!( + "Timed out waiting for channel, peer and balance metrics to update. Current metrics:\n{}", + current_metrics + ); + } + tokio::time::sleep(Duration::from_secs(1)).await; + } + + let invoice_resp = server_b + .client() + .bolt11_receive(Bolt11ReceiveRequest { + amount_msat: Some(10_000_000), + description: Some(Bolt11InvoiceDescription { + kind: Some(bolt11_invoice_description::Kind::Direct("metrics test".to_string())), + }), + expiry_secs: 3600, + }) + .await + .unwrap(); + + run_cli(&server_a, &["bolt11-send", &invoice_resp.invoice]); + + // Wait to receive the PaymentSuccessful event and update metrics + let timeout = Duration::from_secs(30); + let start = std::time::Instant::now(); + loop { + let metrics = client.get_metrics().await.unwrap(); + if metrics.contains("ldk_server_total_successful_payments_count 1") + && !metrics.contains("ldk_server_total_lightning_balance_sats 0") + && !metrics.contains("ldk_server_total_onchain_balance_sats 0") + && !metrics.contains("ldk_server_spendable_onchain_balance_sats 0") + && !metrics.contains("ldk_server_total_anchor_channels_reserve_sats 0") + { + break; + } + if start.elapsed() > timeout { + panic!("Timed out waiting for payment metrics to update"); + } + tokio::time::sleep(Duration::from_millis(500)).await; + } +} diff --git a/ldk-server-client/src/client.rs b/ldk-server-client/src/client.rs index 75459a4..a03a24a 100644 --- a/ldk-server-client/src/client.rs +++ b/ldk-server-client/src/client.rs @@ -38,14 +38,15 @@ use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_PATH, BOLT11_RECEIVE_VARIABLE_AMOUNT_VIA_JIT_CHANNEL_PATH, BOLT11_RECEIVE_VIA_JIT_CHANNEL_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, - FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, - GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, - LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, - ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, - SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, - VERIFY_SIGNATURE_PATH, + FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, + GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, + GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, + LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, + ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, + SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, }; use ldk_server_protos::error::{ErrorCode, ErrorResponse}; +use prost::bytes::Bytes; use prost::Message; use reqwest::header::CONTENT_TYPE; use reqwest::{Certificate, Client}; @@ -69,6 +70,11 @@ pub struct LdkServerClient { api_key: String, } +enum RequestType { + Get, + Post, +} + impl LdkServerClient { /// Constructs a [`LdkServerClient`] using `base_url` as the ldk-server endpoint. /// @@ -114,6 +120,18 @@ impl LdkServerClient { self.post_request(&request, &url).await } + /// Retrieve the node metrics in Prometheus format. + pub async fn get_metrics(&self) -> Result { + let url = format!("https://{}/{GET_METRICS_PATH}", self.base_url); + let payload = self.make_request(&url, RequestType::Get, None, false).await?; + String::from_utf8(payload.to_vec()).map_err(|e| { + LdkServerError::new( + InternalError, + format!("Failed to decode metrics response as string: {}", e), + ) + }) + } + /// Retrieves an overview of all known balances. /// For API contract/usage, refer to docs for [`GetBalancesRequest`] and [`GetBalancesResponse`]. pub async fn get_balances( @@ -431,18 +449,37 @@ impl LdkServerClient { &self, request: &Rq, url: &str, ) -> Result { let request_body = request.encode_to_vec(); - let auth_header = self.compute_auth_header(&request_body); - let response_raw = self - .client - .post(url) - .header(CONTENT_TYPE, APPLICATION_OCTET_STREAM) - .header("X-Auth", auth_header) - .body(request_body) - .send() - .await - .map_err(|e| { - LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) - })?; + let payload = self.make_request(url, RequestType::Post, Some(request_body), true).await?; + Rs::decode(&payload[..]).map_err(|e| { + LdkServerError::new(InternalError, format!("Failed to decode success response: {}", e)) + }) + } + + async fn make_request( + &self, url: &str, request_type: RequestType, body: Option>, authenticated: bool, + ) -> Result { + let builder = match request_type { + RequestType::Get => self.client.get(url), + RequestType::Post => self.client.post(url), + }; + + let builder = if authenticated { + let body_for_auth = body.as_deref().unwrap_or(&[]); + let auth_header = self.compute_auth_header(body_for_auth); + builder.header("X-Auth", auth_header) + } else { + builder + }; + + let builder = if let Some(body_content) = body { + builder.header(CONTENT_TYPE, APPLICATION_OCTET_STREAM).body(body_content) + } else { + builder + }; + + let response_raw = builder.send().await.map_err(|e| { + LdkServerError::new(InternalError, format!("HTTP request failed: {}", e)) + })?; let status = response_raw.status(); let payload = response_raw.bytes().await.map_err(|e| { @@ -450,12 +487,7 @@ impl LdkServerClient { })?; if status.is_success() { - Ok(Rs::decode(&payload[..]).map_err(|e| { - LdkServerError::new( - InternalError, - format!("Failed to decode success response: {}", e), - ) - })?) + Ok(payload) } else { let error_response = ErrorResponse::decode(&payload[..]).map_err(|e| { LdkServerError::new( diff --git a/ldk-server-protos/src/endpoints.rs b/ldk-server-protos/src/endpoints.rs index c6818de..836be5d 100644 --- a/ldk-server-protos/src/endpoints.rs +++ b/ldk-server-protos/src/endpoints.rs @@ -43,3 +43,4 @@ pub const GRAPH_LIST_CHANNELS_PATH: &str = "GraphListChannels"; pub const GRAPH_GET_CHANNEL_PATH: &str = "GraphGetChannel"; pub const GRAPH_LIST_NODES_PATH: &str = "GraphListNodes"; pub const GRAPH_GET_NODE_PATH: &str = "GraphGetNode"; +pub const GET_METRICS_PATH: &str = "metrics"; diff --git a/ldk-server/ldk-server-config.toml b/ldk-server/ldk-server-config.toml index d4345bc..152f409 100644 --- a/ldk-server/ldk-server-config.toml +++ b/ldk-server/ldk-server-config.toml @@ -88,3 +88,8 @@ client_trusts_lsp = false ## A token we may require to be sent by the clients. ## If set, only requests matching this token will be accepted. (uncomment and set if required) # require_token = "" + +# Metrics settings +[metrics] +enabled = false +poll_metrics_interval = 60 # The polling interval for metrics in seconds. Defaults to 60secs if unset and metrics enabled. diff --git a/ldk-server/src/main.rs b/ldk-server/src/main.rs index 3de2a40..8bbf663 100644 --- a/ldk-server/src/main.rs +++ b/ldk-server/src/main.rs @@ -16,7 +16,7 @@ use std::fs; use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use clap::Parser; use hex::DisplayHex; @@ -50,6 +50,7 @@ use crate::io::persist::{ use crate::service::NodeService; use crate::util::config::{load_config, ArgsConfig, ChainSource}; use crate::util::logger::ServerLogger; +use crate::util::metrics::Metrics; use crate::util::proto_adapter::{forwarded_payment_to_proto, payment_to_proto}; use crate::util::systemd; use crate::util::tls::get_or_generate_tls_config; @@ -273,6 +274,28 @@ fn main() { } }; let event_node = Arc::clone(&node); + + let metrics: Option> = if config_file.metrics_enabled { + let poll_metrics_interval = Duration::from_secs(config_file.poll_metrics_interval.unwrap_or(60)); + let metrics_node = Arc::clone(&node); + let mut interval = tokio::time::interval(poll_metrics_interval); + let metrics = Arc::new(Metrics::new()); + let metrics_bg = Arc::clone(&metrics); + + // Initialize metrics that are event-driven to ensure they start with correct values from persistence + metrics.initialize_payment_metrics(&metrics_node); + + runtime.spawn(async move { + loop { + interval.tick().await; + metrics_bg.update_all_pollable_metrics(&metrics_node); + } + }); + Some(metrics) + } else { + None + }; + let rest_svc_listener = TcpListener::bind(config_file.rest_service_addr) .await .expect("Failed to bind listening port"); @@ -313,7 +336,24 @@ fn main() { if let Err(e) = event_node.event_handled() { error!("Failed to mark event as handled: {e}"); } + + if let Some(metrics) = &metrics { + metrics.update_channels_count(false); + } }, + Event::ChannelClosed { channel_id, counterparty_node_id, .. } => { + info!( + "CHANNEL_CLOSED: {} from counterparty {:?}", + channel_id, counterparty_node_id + ); + if let Err(e) = event_node.event_handled() { + error!("Failed to mark event as handled: {e}"); + } + + if let Some(metrics) = &metrics { + metrics.update_channels_count(true); + } + } Event::PaymentReceived { payment_id, payment_hash, amount_msat, .. } => { info!( "PAYMENT_RECEIVED: with id {:?}, hash {}, amount_msat {}", @@ -328,6 +368,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_all_balances(&event_node); + } }, Event::PaymentSuccessful {payment_id, ..} => { let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1"); @@ -339,6 +383,11 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(true); + metrics.update_all_balances(&event_node); + } }, Event::PaymentFailed {payment_id, ..} => { let payment_id = payment_id.expect("PaymentId expected for ldk-server >=0.1"); @@ -350,6 +399,10 @@ fn main() { &event_node, Arc::clone(&event_publisher), Arc::clone(&paginated_store)).await; + + if let Some(metrics) = &metrics { + metrics.update_payments_count(false); + } }, Event::PaymentClaimable {payment_id, ..} => { publish_event_and_upsert_payment(&payment_id, @@ -435,7 +488,7 @@ fn main() { res = rest_svc_listener.accept() => { match res { Ok((stream, _)) => { - let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone()); + let node_service = NodeService::new(Arc::clone(&node), Arc::clone(&paginated_store), api_key.clone(), metrics.clone()); let acceptor = tls_acceptor.clone(); runtime.spawn(async move { match acceptor.accept(stream).await { diff --git a/ldk-server/src/service.rs b/ldk-server/src/service.rs index 05004ae..03e7d8d 100644 --- a/ldk-server/src/service.rs +++ b/ldk-server/src/service.rs @@ -23,12 +23,12 @@ use ldk_server_protos::endpoints::{ BOLT11_RECEIVE_PATH, BOLT11_RECEIVE_VARIABLE_AMOUNT_VIA_JIT_CHANNEL_PATH, BOLT11_RECEIVE_VIA_JIT_CHANNEL_PATH, BOLT11_SEND_PATH, BOLT12_RECEIVE_PATH, BOLT12_SEND_PATH, CLOSE_CHANNEL_PATH, CONNECT_PEER_PATH, DISCONNECT_PEER_PATH, EXPORT_PATHFINDING_SCORES_PATH, - FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_NODE_INFO_PATH, GET_PAYMENT_DETAILS_PATH, - GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, - LIST_CHANNELS_PATH, LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, - ONCHAIN_RECEIVE_PATH, ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, - SPLICE_OUT_PATH, SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, - VERIFY_SIGNATURE_PATH, + FORCE_CLOSE_CHANNEL_PATH, GET_BALANCES_PATH, GET_METRICS_PATH, GET_NODE_INFO_PATH, + GET_PAYMENT_DETAILS_PATH, GRAPH_GET_CHANNEL_PATH, GRAPH_GET_NODE_PATH, + GRAPH_LIST_CHANNELS_PATH, GRAPH_LIST_NODES_PATH, LIST_CHANNELS_PATH, + LIST_FORWARDED_PAYMENTS_PATH, LIST_PAYMENTS_PATH, LIST_PEERS_PATH, ONCHAIN_RECEIVE_PATH, + ONCHAIN_SEND_PATH, OPEN_CHANNEL_PATH, SIGN_MESSAGE_PATH, SPLICE_IN_PATH, SPLICE_OUT_PATH, + SPONTANEOUS_SEND_PATH, UNIFIED_SEND_PATH, UPDATE_CHANNEL_CONFIG_PATH, VERIFY_SIGNATURE_PATH, }; use prost::Message; @@ -70,6 +70,7 @@ use crate::api::unified_send::handle_unified_send_request; use crate::api::update_channel_config::handle_update_channel_config_request; use crate::api::verify_signature::handle_verify_signature_request; use crate::io::persist::paginated_kv_store::PaginatedKVStore; +use crate::util::metrics::Metrics; use crate::util::proto_adapter::to_error_response; // Maximum request body size: 10 MB @@ -81,13 +82,15 @@ pub struct NodeService { node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, } impl NodeService { pub(crate) fn new( node: Arc, paginated_kv_store: Arc, api_key: String, + metrics: Option>, ) -> Self { - Self { node, paginated_kv_store, api_key } + Self { node, paginated_kv_store, api_key, metrics } } } @@ -171,6 +174,29 @@ impl Service> for NodeService { type Future = Pin> + Send>>; fn call(&self, req: Request) -> Self::Future { + // Handle metrics endpoint separately to bypass auth and return plain text + if req.method() == hyper::Method::GET + && req.uri().path().len() > 1 + && &req.uri().path()[1..] == GET_METRICS_PATH + { + if let Some(metrics) = &self.metrics { + let metrics = Arc::clone(metrics); + return Box::pin(async move { + Ok(Response::builder() + .header("Content-Type", "text/plain") + .body(Full::new(Bytes::from(metrics.gather_metrics()))) + .unwrap()) + }); + } else { + return Box::pin(async move { + Ok(Response::builder() + .status(StatusCode::NOT_FOUND) + .body(Full::new(Bytes::from("Not Found"))) + .unwrap()) + }); + } + } + // Extract auth params from headers (validation happens after body is read) let auth_params = match extract_auth_params(&req) { Ok(params) => params, diff --git a/ldk-server/src/util/config.rs b/ldk-server/src/util/config.rs index c950987..ac0054c 100644 --- a/ldk-server/src/util/config.rs +++ b/ldk-server/src/util/config.rs @@ -58,6 +58,8 @@ pub struct Config { pub log_level: LevelFilter, pub log_file_path: Option, pub pathfinding_scores_source_url: Option, + pub metrics_enabled: bool, + pub poll_metrics_interval: Option, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -103,6 +105,8 @@ struct ConfigBuilder { log_level: Option, log_file_path: Option, pathfinding_scores_source_url: Option, + metrics_enabled: Option, + poll_metrics_interval: Option, } impl ConfigBuilder { @@ -162,6 +166,12 @@ impl ConfigBuilder { hosts: tls.hosts.unwrap_or_default(), }); } + + if let Some(metrics) = toml.metrics { + self.metrics_enabled = metrics.enabled.or(self.metrics_enabled); + self.poll_metrics_interval = + metrics.poll_metrics_interval.or(self.poll_metrics_interval); + } } fn merge_args(&mut self, args: &ArgsConfig) { @@ -204,6 +214,14 @@ impl ConfigBuilder { if let Some(pathfinding_scores_source_url) = &args.pathfinding_scores_source_url { self.pathfinding_scores_source_url = Some(pathfinding_scores_source_url.clone()); } + + if args.metrics_enabled { + self.metrics_enabled = Some(true); + } + + if let Some(poll_metrics_interval) = &args.poll_metrics_interval { + self.poll_metrics_interval = Some(*poll_metrics_interval); + } } fn build(self) -> io::Result { @@ -362,6 +380,10 @@ impl ConfigBuilder { let pathfinding_scores_source_url = self.pathfinding_scores_source_url; + let metrics_enabled = self.metrics_enabled.unwrap_or(false); + + let poll_metrics_interval = self.poll_metrics_interval; + Ok(Config { network, listening_addrs, @@ -379,6 +401,8 @@ impl ConfigBuilder { log_level, log_file_path: self.log_file_path, pathfinding_scores_source_url, + metrics_enabled, + poll_metrics_interval, }) } } @@ -395,6 +419,7 @@ pub struct TomlConfig { liquidity: Option, log: Option, tls: Option, + metrics: Option, } #[derive(Deserialize, Serialize)] @@ -454,6 +479,12 @@ struct TomlTlsConfig { hosts: Option>, } +#[derive(Deserialize, Serialize)] +struct MetricsTomlConfig { + enabled: Option, + poll_metrics_interval: Option, +} + #[derive(Deserialize, Serialize)] struct LiquidityConfig { lsps2_client: Option, @@ -612,6 +643,21 @@ pub struct ArgsConfig { help = "The external scores source that is merged into the local scoring system to improve routing." )] pathfinding_scores_source_url: Option, + + #[arg( + long, + env = "LDK_SERVER_METRICS_ENABLED", + help = "The option to enable the metrics endpoint. WARNING: This endpoint is unauthenticated." + )] + metrics_enabled: bool, + + #[arg( + long, + env = "LDK_SERVER_POLL_METRICS_INTERVAL", + help = "The polling interval for metrics in seconds. Required when + metrics is enabled, but defaults to 60secs if unset." + )] + poll_metrics_interval: Option, } pub fn load_config(args: &ArgsConfig) -> io::Result { @@ -745,6 +791,8 @@ mod tests { storage_dir_path: Some(String::from("/tmp_cli")), node_alias: Some(String::from("LDK Server CLI")), pathfinding_scores_source_url: Some(String::from("https://example.com/")), + metrics_enabled: false, + poll_metrics_interval: None, } } @@ -761,6 +809,8 @@ mod tests { bitcoind_rpc_password: None, storage_dir_path: None, pathfinding_scores_source_url: None, + metrics_enabled: false, + poll_metrics_interval: None, } } @@ -837,6 +887,8 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: None, + metrics_enabled: false, + poll_metrics_interval: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -855,6 +907,7 @@ mod tests { assert_eq!(config.log_level, expected.log_level); assert_eq!(config.log_file_path, expected.log_file_path); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); // Test case where only electrum is set @@ -1161,6 +1214,8 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, + poll_metrics_interval: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1174,6 +1229,7 @@ mod tests { assert_eq!(config.rabbitmq_exchange_name, expected.rabbitmq_exchange_name); assert!(config.lsps2_service_config.is_none()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1273,6 +1329,8 @@ mod tests { log_level: LevelFilter::Trace, log_file_path: Some("/var/log/ldk-server.log".to_string()), pathfinding_scores_source_url: Some("https://example.com/".to_string()), + metrics_enabled: false, + poll_metrics_interval: None, }; assert_eq!(config.listening_addrs, expected.listening_addrs); @@ -1288,6 +1346,7 @@ mod tests { #[cfg(feature = "experimental-lsps2-support")] assert_eq!(config.lsps2_service_config.is_some(), expected.lsps2_service_config.is_some()); assert_eq!(config.pathfinding_scores_source_url, expected.pathfinding_scores_source_url); + assert_eq!(config.metrics_enabled, expected.metrics_enabled); } #[test] @@ -1309,4 +1368,47 @@ mod tests { let err = result.unwrap_err(); assert_eq!(err.kind(), io::ErrorKind::InvalidInput); } + + #[test] + fn test_metrics_enabled_config() { + let storage_path = std::env::temp_dir(); + let config_file_name = "test_metrics_enabled.toml"; + + let toml_config = r#" + [node] + network = "regtest" + rest_service_address = "127.0.0.1:3002" + + [bitcoind] + rpc_address = "127.0.0.1:8332" + rpc_user = "user" + rpc_password = "password" + + [metrics] + enabled = true + + [rabbitmq] + connection_string = "rabbitmq_connection_string" + exchange_name = "rabbitmq_exchange_name" + + [liquidity.lsps2_service] + advertise_service = false + channel_opening_fee_ppm = 1000 # 0.1% fee + channel_over_provisioning_ppm = 500000 # 50% extra capacity + min_channel_opening_fee_msat = 10000000 # 10,000 satoshis + min_channel_lifetime = 4320 # ~30 days + max_client_to_self_delay = 1440 # ~10 days + min_payment_size_msat = 10000000 # 10,000 satoshis + max_payment_size_msat = 25000000000 # 0.25 BTC + client_trusts_lsp = true + "#; + + fs::write(storage_path.join(config_file_name), toml_config).unwrap(); + let mut args_config = empty_args_config(); + args_config.config_file = + Some(storage_path.join(config_file_name).to_string_lossy().to_string()); + + let config = load_config(&args_config).unwrap(); + assert!(config.metrics_enabled); + } } diff --git a/ldk-server/src/util/metrics.rs b/ldk-server/src/util/metrics.rs new file mode 100644 index 0000000..40424d3 --- /dev/null +++ b/ldk-server/src/util/metrics.rs @@ -0,0 +1,352 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + +//! This module provides metrics for monitoring the LDK Server node in a Prometheus-compatible format. +//! +//! The `Metrics` struct holds atomic counters and gauges for various aspects of the node's +//! operation, such as peer connections, channels and payments statuses, and balances. +//! +//! The metrics are updated through two main mechanisms: +//! 1. **Periodic Polling**: The `update_all_pollable_metrics` function is called at a regular +//! interval (`poll_metrics_interval`) configurable via the config file but defaults to 60secs if unset, to perform a full recount of metrics like peer count, +//! payments count, and channels metrics. +//! 2. **Event-Driven Updates**: For metrics that can change frequently and where a full recount +//! would be inefficient (e.g., total_successful_payments_count, balances), a hybrid approach is used. +//! - `initialize_payment_metrics` is called once at startup to get the accurate persisted state. +//! - `update_payments_count` is called incrementally whenever a relevant event (like +//! `PaymentSuccessful` or `PaymentFailed`) occurs. +//! - `update_all_balances` is called when we receive a `PaymentSuccessful` event to update all balance metrics. +//! - `update_channels_count` is called when we receive a `ChannelReady` or `ChannelClosed` event to update the channels metrics. +//! +//! The `gather_metrics` function collects all current metric values and formats them into the +//! plain-text format that Prometheus scrapers expect. This output is exposed via an +//! unauthenticated `/metrics` HTTP endpoint on the rest service address. + +use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; + +use ldk_node::payment::PaymentStatus; +use ldk_node::Node; + +/// Holds all the metrics that are tracked for LDK Server. +/// +/// These metrics are exposed in a Prometheus-compatible format. The values are stored +/// in atomic types to allow for safe concurrent access. +pub struct Metrics { + pub total_peers_count: AtomicI64, + pub total_payments_count: AtomicI64, + pub total_successful_payments_count: AtomicI64, + pub total_pending_payments_count: AtomicI64, + pub total_failed_payments_count: AtomicI64, + pub total_channels_count: AtomicI64, + pub total_public_channels_count: AtomicI64, + pub total_private_channels_count: AtomicI64, + pub total_onchain_balance_sats: AtomicU64, + pub spendable_onchain_balance_sats: AtomicU64, + pub total_anchor_channels_reserve_sats: AtomicU64, + pub total_lightning_balance_sats: AtomicU64, +} + +impl Metrics { + pub fn new() -> Self { + Self { + total_peers_count: AtomicI64::new(0), + total_payments_count: AtomicI64::new(0), + total_successful_payments_count: AtomicI64::new(0), + total_pending_payments_count: AtomicI64::new(0), + total_failed_payments_count: AtomicI64::new(0), + total_channels_count: AtomicI64::new(0), + total_public_channels_count: AtomicI64::new(0), + total_private_channels_count: AtomicI64::new(0), + total_onchain_balance_sats: AtomicU64::new(0), + spendable_onchain_balance_sats: AtomicU64::new(0), + total_anchor_channels_reserve_sats: AtomicU64::new(0), + total_lightning_balance_sats: AtomicU64::new(0), + } + } + + fn update_peer_count(&self, node: &Node) { + let total_peers_count = node.list_peers().len() as i64; + self.total_peers_count.store(total_peers_count, Ordering::Relaxed); + } + + pub fn update_payments_count(&self, is_successful: bool) { + if is_successful { + self.total_successful_payments_count.fetch_add(1, Ordering::Relaxed); + } else { + self.total_failed_payments_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn update_channels_count(&self, is_closed: bool) { + if is_closed { + self.total_channels_count.fetch_sub(1, Ordering::Relaxed); + } else { + self.total_channels_count.fetch_add(1, Ordering::Relaxed); + } + } + + pub fn initialize_payment_metrics(&self, node: &Node) { + let mut successful_payments_count = 0; + let mut failed_payments_count = 0; + let mut pending_payments_count = 0; + + for payment_details in node.list_payments() { + match payment_details.status { + PaymentStatus::Succeeded => successful_payments_count += 1, + PaymentStatus::Failed => failed_payments_count += 1, + PaymentStatus::Pending => pending_payments_count += 1, + } + } + self.total_successful_payments_count.store(successful_payments_count, Ordering::Relaxed); + self.total_failed_payments_count.store(failed_payments_count, Ordering::Relaxed); + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + + let channels_count = node.list_channels().len() as i64; + self.total_channels_count.store(channels_count, Ordering::Relaxed); + + self.update_all_balances(node); + } + + pub fn update_all_balances(&self, node: &Node) { + let all_balances = node.list_balances(); + self.total_onchain_balance_sats + .store(all_balances.total_onchain_balance_sats, Ordering::Relaxed); + + self.spendable_onchain_balance_sats + .store(all_balances.spendable_onchain_balance_sats, Ordering::Relaxed); + + self.total_anchor_channels_reserve_sats + .store(all_balances.total_anchor_channels_reserve_sats, Ordering::Relaxed); + + self.total_lightning_balance_sats + .store(all_balances.total_lightning_balance_sats, Ordering::Relaxed); + } + + pub fn update_all_pollable_metrics(&self, node: &Node) { + let all_payments = node.list_payments(); + let all_channels = node.list_channels(); + + let payments_count = all_payments.len() as i64; + self.total_payments_count.store(payments_count, Ordering::Relaxed); + + let pending_payments_count = all_payments + .iter() + .filter(|payment_details| payment_details.status == PaymentStatus::Pending) + .count() as i64; + self.total_pending_payments_count.store(pending_payments_count, Ordering::Relaxed); + + let public_channels_count = + all_channels.iter().filter(|channel_details| channel_details.is_announced).count() + as i64; + self.total_public_channels_count.store(public_channels_count, Ordering::Relaxed); + + let private_channels_count = + all_channels.iter().filter(|channel_details| !channel_details.is_announced).count() + as i64; + self.total_private_channels_count.store(private_channels_count, Ordering::Relaxed); + + self.update_peer_count(node); + self.update_all_balances(node); + } + + /// Gathers all metrics and formats them into the Prometheus text-based format. + /// + /// This function is called by the `/metrics` endpoint to provide the current state + /// of all tracked metrics to a Prometheus scraper. The format is a series of lines, + /// each containing a metric name, and its value, preceded by + /// HELP and TYPE lines as per the Prometheus exposition format specification. + pub fn gather_metrics(&self) -> String { + let mut buffer = String::new(); + + fn format_metric( + buffer: &mut String, name: &str, help: &str, metric_type: &str, + value: impl std::fmt::Display, + ) { + use std::fmt::Write; + let _ = writeln!(buffer, "# HELP {} {}", name, help); + let _ = writeln!(buffer, "# TYPE {} {}", name, metric_type); + let _ = writeln!(buffer, "{} {}", name, value); + } + + format_metric( + &mut buffer, + "ldk_server_total_peers_count", + "Total number of peers", + "gauge", + self.total_peers_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_payments_count", + "Total number of payments", + "counter", + self.total_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_pending_payments_count", + "Total number of pending payments", + "gauge", + self.total_pending_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_successful_payments_count", + "Total number of successful payments", + "counter", + self.total_successful_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_failed_payments_count", + "Total number of failed payments", + "counter", + self.total_failed_payments_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_channels_count", + "Total number of channels", + "gauge", + self.total_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_public_channels_count", + "Total number of public channels", + "gauge", + self.total_public_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_private_channels_count", + "Total number of private channels", + "gauge", + self.total_private_channels_count.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_onchain_balance_sats", + "Total onchain balance in sats", + "gauge", + self.total_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_spendable_onchain_balance_sats", + "Spendable onchain balance in sats", + "gauge", + self.spendable_onchain_balance_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_anchor_channels_reserve_sats", + "Total anchor channels reserve in sats", + "gauge", + self.total_anchor_channels_reserve_sats.load(Ordering::Relaxed), + ); + + format_metric( + &mut buffer, + "ldk_server_total_lightning_balance_sats", + "Total lightning balance in sats", + "gauge", + self.total_lightning_balance_sats.load(Ordering::Relaxed), + ); + + buffer + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_initial_metrics_values() { + let metrics = Metrics::new(); + let result = metrics.gather_metrics(); + + // Check that all metrics are present and empty + assert!(result.contains("ldk_server_total_peers_count 0")); + assert!(result.contains("ldk_server_total_payments_count 0")); + assert!(result.contains("ldk_server_total_successful_payments_count 0")); + assert!(result.contains("ldk_server_total_pending_payments_count 0")); + assert!(result.contains("ldk_server_total_failed_payments_count 0")); + assert!(result.contains("ldk_server_total_channels_count 0")); + assert!(result.contains("ldk_server_total_public_channels_count 0")); + assert!(result.contains("ldk_server_total_private_channels_count 0")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 0")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 0")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 0")); + } + + #[test] + fn test_update_payments_count() { + let metrics = Metrics::new(); + + metrics.total_successful_payments_count.store(10, Ordering::Relaxed); + metrics.total_failed_payments_count.store(5, Ordering::Relaxed); + + metrics.update_payments_count(true); + metrics.update_payments_count(false); + + assert_eq!(metrics.total_successful_payments_count.load(Ordering::Relaxed), 11); + assert_eq!(metrics.total_failed_payments_count.load(Ordering::Relaxed), 6); + } + + #[test] + fn test_metrics_update_and_gather() { + let metrics = Metrics::new(); + + // Manually update metrics to simulate node activity + metrics.total_peers_count.store(5, Ordering::Relaxed); + metrics.total_payments_count.store(10, Ordering::Relaxed); + metrics.total_pending_payments_count.store(1, Ordering::Relaxed); + metrics.total_successful_payments_count.store(8, Ordering::Relaxed); + metrics.total_failed_payments_count.store(2, Ordering::Relaxed); + metrics.total_channels_count.store(3, Ordering::Relaxed); + metrics.total_public_channels_count.store(1, Ordering::Relaxed); + metrics.total_private_channels_count.store(2, Ordering::Relaxed); + metrics.total_onchain_balance_sats.store(100_000, Ordering::Relaxed); + metrics.spendable_onchain_balance_sats.store(50_000, Ordering::Relaxed); + metrics.total_anchor_channels_reserve_sats.store(1_000, Ordering::Relaxed); + metrics.total_lightning_balance_sats.store(250_000, Ordering::Relaxed); + + let result = metrics.gather_metrics(); + + // Check that output contains updated values and correct Prometheus format + assert!(result.contains("# HELP ldk_server_total_peers_count Total number of peers")); + assert!(result.contains("# TYPE ldk_server_total_peers_count gauge")); + assert!(result.contains("ldk_server_total_peers_count 5")); + + assert!(result.contains("ldk_server_total_payments_count 10")); + assert!(result.contains("ldk_server_total_pending_payments_count 1")); + assert!(result.contains("ldk_server_total_successful_payments_count 8")); + assert!(result.contains("ldk_server_total_failed_payments_count 2")); + assert!(result.contains("ldk_server_total_channels_count 3")); + assert!(result.contains("ldk_server_total_public_channels_count 1")); + assert!(result.contains("ldk_server_total_private_channels_count 2")); + assert!(result.contains("ldk_server_total_onchain_balance_sats 100000")); + assert!(result.contains("ldk_server_spendable_onchain_balance_sats 50000")); + assert!(result.contains("ldk_server_total_anchor_channels_reserve_sats 1000")); + assert!(result.contains("ldk_server_total_lightning_balance_sats 250000")); + } +} diff --git a/ldk-server/src/util/mod.rs b/ldk-server/src/util/mod.rs index 5d74de4..a57dbd0 100644 --- a/ldk-server/src/util/mod.rs +++ b/ldk-server/src/util/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod config; pub(crate) mod logger; +pub(crate) mod metrics; pub(crate) mod proto_adapter; pub(crate) mod systemd; pub(crate) mod tls;