diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ac4d8c69..96097d25 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,7 +46,7 @@ jobs: - name: Build Binary run: | - cargo build --release --locked --target=${{ matrix.target }} --bin objectstore + cargo build --release --locked --target=${{ matrix.target }} --bin objectstore --features profiling cp target/${{ matrix.target }}/release/objectstore ./objectstore - name: Set up Docker Buildx diff --git a/Cargo.lock b/Cargo.lock index cc944985..e4be1837 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -672,6 +672,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "critical-section" version = "1.2.0" @@ -1078,6 +1087,16 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1907,6 +1926,23 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "jemalloc_pprof" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb2dfa20a68824f4c8c1aa271617d0ee0d9b707a866d56b7d7ee39c60c235a8" +dependencies = [ + "anyhow", + "libc", + "mappings", + "once_cell", + "pprof_util", + "tempfile", + "tikv-jemalloc-ctl", + "tokio", + "tracing", +] + [[package]] name = "jni" version = "0.21.1" @@ -2094,6 +2130,19 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "mappings" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bab1e61a4b76757edb59cd81fcaa7f3ba9018d43b527d9abfad877b4c6c60f2" +dependencies = [ + "anyhow", + "libc", + "once_cell", + "pprof_util", + "tracing", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2189,6 +2238,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -2308,6 +2358,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -2334,6 +2398,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.2.0" @@ -2360,6 +2433,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -2614,6 +2698,7 @@ dependencies = [ "http 1.4.2", "humantime", "humantime-serde", + "jemalloc_pprof", "jsonwebtoken", "nix 0.31.3", "num_cpus", @@ -2980,6 +3065,20 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "pprof_util" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eea0cc524de808a6d98d192a3d99fe95617031ad4a52ec0a0f987ef4432e8fe1" +dependencies = [ + "anyhow", + "backtrace", + "flate2", + "num", + "paste", + "prost", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -4089,6 +4188,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "simd_cesu8" version = "1.1.1" diff --git a/Cargo.toml b/Cargo.toml index e311da67..38130f7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ serde_yaml = "0.9.34-deprecated" sketches-ddsketch = "0.3.1" tempfile = "3.27.0" thiserror = "2.0.18" +jemalloc_pprof = { version = "0.9.0", features = ["symbolize"] } tikv-jemallocator = { version = "0.7.0", features = ["background_threads", "override_allocator_on_supported_platforms"] } tikv-jemalloc-ctl = { version = "0.7.0", features = ["stats"] } tokio = "1.52.3" diff --git a/README.md b/README.md index 371d1a9c..5b7dbbd9 100644 --- a/README.md +++ b/README.md @@ -243,6 +243,36 @@ cargo run -- run You can copy and save additional config files next to the examples in `objectstore-server/config`. All other files are ignored by git. +### Heap Profiling + +Production release builds include on-demand heap profiling via jemalloc. It can +be enabled and disabled through HTTP endpoints that are only reachable from +loopback. + +To capture a heap profile from a running server, for example on localhost with +default port: + +```sh +# Enable sampling, let the workload run, then dump a profile +curl -X POST http://localhost:8888/debug/pprof/enable +curl http://localhost:8888/debug/pprof/heap > heap.pb.gz + +# Disable sampling when done +curl -X POST http://localhost:8888/debug/pprof/disable +``` + +Analyze the profile dump with `go tool pprof`: + +```sh +go tool pprof heap.pb.gz + +# To isolate growth between two snapshots, use the -base flag: +go tool pprof -base before.pb.gz after.pb.gz +``` + +The sampling overhead is expected to be low, so profiling can be left enabled +for an extended capture window safely. + ### Tests To run tests: diff --git a/objectstore-server/Cargo.toml b/objectstore-server/Cargo.toml index 5dc9088b..67b61394 100644 --- a/objectstore-server/Cargo.toml +++ b/objectstore-server/Cargo.toml @@ -45,9 +45,13 @@ tower = { workspace = true } tower-http = { workspace = true, features = ["catch-panic", "metrics", "set-header", "trace"] } [target.'cfg(target_os = "linux")'.dependencies] +jemalloc_pprof = { workspace = true, optional = true } tikv-jemallocator = { workspace = true } tikv-jemalloc-ctl = { workspace = true } +[features] +profiling = ["dep:jemalloc_pprof", "tikv-jemallocator/profiling"] + [dev-dependencies] nix = { workspace = true, features = ["signal"] } objectstore-test = { workspace = true } diff --git a/objectstore-server/src/endpoints/mod.rs b/objectstore-server/src/endpoints/mod.rs index a29f5060..ab460b69 100644 --- a/objectstore-server/src/endpoints/mod.rs +++ b/objectstore-server/src/endpoints/mod.rs @@ -12,10 +12,12 @@ pub mod health; mod keda; mod multipart; mod objects; +#[cfg(all(target_os = "linux", feature = "profiling"))] +mod profiling; /// Returns `true` for internal endpoints that are exempt from metrics and concurrency limits. pub fn is_internal_route(route: &str) -> bool { - matches!(route, "/health" | "/ready" | "/keda") + matches!(route, "/health" | "/ready" | "/keda") || route.starts_with("/debug/") } /// Returns a router with all objectstore HTTP endpoints mounted. @@ -28,8 +30,15 @@ pub fn routes() -> Router { .merge(batch::router()) .merge(multipart::router()); - Router::new() + let router = Router::new() .merge(health::router()) .merge(keda::router()) - .nest("/v1/", routes_v1) + .nest("/v1/", routes_v1); + + std::cfg_select! { + all(target_os = "linux", feature = "profiling") => { + router.merge(profiling::router()) + } + _ => { router } + } } diff --git a/objectstore-server/src/endpoints/profiling.rs b/objectstore-server/src/endpoints/profiling.rs new file mode 100644 index 00000000..abdf583b --- /dev/null +++ b/objectstore-server/src/endpoints/profiling.rs @@ -0,0 +1,181 @@ +//! On-demand heap profiling endpoints. +//! +//! Exposes three routes under `/debug/pprof/`: +//! +//! | Method | Path | Description | +//! |--------|------|-------------| +//! | `POST` | `/debug/pprof/enable` | Activate heap sampling | +//! | `POST` | `/debug/pprof/disable` | Deactivate heap sampling | +//! | `GET` | `/debug/pprof/heap` | Dump a symbolized gzipped pprof profile | +//! +//! **Requires the `profiling` feature.** When included, profiling is available +//! but disabled at startup. Enable it on demand to start capturing samples. +//! With `GET /debug/pprof/heap`, you can then download a pprof snapshot with +//! all allocations since profiling was enabled. +//! +//! **Note**:Due to their sensitive nature, these routes are only reachable via +//! the loopback interface. + +use std::net::SocketAddr; + +use axum::extract::{ConnectInfo, Request}; +use axum::http::{HeaderValue, StatusCode, header}; +use axum::middleware::Next; +use axum::response::{IntoResponse, Response}; +use axum::{Router, routing}; +use jemalloc_pprof::PROF_CTL; + +use crate::state::ServiceState; + +const HEAP_DISPOSITION: HeaderValue = + HeaderValue::from_static("attachment; filename=\"heap.pb.gz\""); + +/// Returns a router for all `/debug/pprof/*` endpoints, protected by the loopback gate. +pub fn router() -> Router { + Router::new() + .route("/debug/pprof/enable", routing::post(enable)) + .route("/debug/pprof/disable", routing::post(disable)) + .route("/debug/pprof/heap", routing::get(heap)) + .route_layer(axum::middleware::from_fn(require_loopback)) +} + +/// Middleware that rejects any request whose TCP peer is not a loopback address. +/// +/// Fails closed: if `ConnectInfo` is absent from the request extensions, the request is denied. +async fn require_loopback(request: Request, next: Next) -> Response { + let is_loopback = request + .extensions() + .get::>() + .is_some_and(|ConnectInfo(addr)| addr.ip().is_loopback()); + + if is_loopback { + next.run(request).await + } else { + StatusCode::NOT_FOUND.into_response() + } +} + +/// Activates jemalloc heap sampling. +/// +/// Sampling runs until `POST /debug/pprof/disable` is called. The sampling interval is set +/// at startup via `malloc_conf` (`lg_prof_sample:19`, i.e. 512 KiB mean interval). +async fn enable() -> Response { + let Some(ctl) = &*PROF_CTL else { + return unavailable(); + }; + + match ctl.lock().await.activate() { + Ok(()) => { + objectstore_log::info!("Heap profiling enabled"); + StatusCode::OK.into_response() + } + Err(err) => (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response(), + } +} + +/// Deactivates jemalloc heap sampling. +async fn disable() -> Response { + let Some(ctl) = &*PROF_CTL else { + return unavailable(); + }; + + match ctl.lock().await.deactivate() { + Ok(()) => { + objectstore_log::info!("Heap profiling disabled"); + StatusCode::OK.into_response() + } + Err(err) => (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response(), + } +} + +/// Dumps a symbolized, gzipped pprof heap profile. +/// +/// Returns `409 Conflict` if profiling is not currently active. Activate first with +/// `POST /debug/pprof/enable`. +/// +/// The response body is a gzipped pprof protobuf (`heap.pb.gz`), ready for +/// `go tool pprof` or Speedscope without shipping a separate binary. +async fn heap() -> Response { + let Some(ctl) = &*PROF_CTL else { + return unavailable(); + }; + + let mut guard = ctl.lock().await; + if !guard.activated() { + return (StatusCode::CONFLICT, "profiling not enabled").into_response(); + } + + objectstore_log::info!("Heap profile requested"); + match guard.dump_pprof() { + Ok(bytes) => ([(header::CONTENT_DISPOSITION, HEAP_DISPOSITION)], bytes).into_response(), + Err(err) => (StatusCode::INTERNAL_SERVER_ERROR, err.to_string()).into_response(), + } +} + +fn unavailable() -> Response { + (StatusCode::SERVICE_UNAVAILABLE, "profiling unavailable").into_response() +} + +#[cfg(test)] +mod tests { + use std::net::SocketAddr; + + use axum::Router; + use axum::body::Body; + use axum::extract::ConnectInfo; + use axum::http::{Request, StatusCode}; + use axum::middleware::from_fn; + use axum::routing::get; + use tower::ServiceExt; + + use super::require_loopback; + + fn make_loopback_app() -> Router { + Router::new() + .route("/test", get(|| async { StatusCode::OK })) + .route_layer(from_fn(require_loopback)) + } + + fn request_with_peer(peer: &str) -> Request { + let addr: SocketAddr = peer.parse().unwrap(); + Request::builder() + .uri("/test") + .extension(ConnectInfo(addr)) + .body(Body::empty()) + .unwrap() + } + + #[tokio::test] + async fn loopback_ipv4_allowed() { + let resp = make_loopback_app() + .oneshot(request_with_peer("127.0.0.1:1234")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } + + #[tokio::test] + async fn loopback_ipv6_allowed() { + let resp = make_loopback_app() + .oneshot(request_with_peer("[::1]:1234")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::OK); + } + + #[tokio::test] + async fn non_loopback_rejected() { + let resp = make_loopback_app() + .oneshot(request_with_peer("203.0.113.1:1234")) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + + #[tokio::test] + async fn no_connect_info_rejected() { + let req = Request::builder().uri("/test").body(Body::empty()).unwrap(); + let resp = make_loopback_app().oneshot(req).await.unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } +} diff --git a/objectstore-server/src/main.rs b/objectstore-server/src/main.rs index b1108f12..7f701788 100644 --- a/objectstore-server/src/main.rs +++ b/objectstore-server/src/main.rs @@ -12,6 +12,14 @@ use objectstore_server::cli; #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// Prof enabled at compile time; sampling starts dormant and is activated on demand via the +// /debug/pprof/enable endpoint. lg_prof_sample:19 means one sample per 2^19 = 512 KiB allocated +// on average (jemalloc's default). The env var MALLOC_CONF still overrides this if set. +#[cfg(all(target_os = "linux", feature = "profiling"))] +#[allow(non_upper_case_globals)] +#[unsafe(export_name = "malloc_conf")] +static malloc_conf: &[u8] = b"prof:true,prof_active:false,lg_prof_sample:19\0"; + fn main() { match cli::execute() { Ok(()) => std::process::exit(0), diff --git a/scripts/build-cross.sh b/scripts/build-cross.sh index ee56d977..34a2f0eb 100755 --- a/scripts/build-cross.sh +++ b/scripts/build-cross.sh @@ -28,7 +28,7 @@ docker run --rm \ -v "$HOME/.cargo/registry":/usr/local/cargo/registry \ -v "$HOME/.cargo/git":/usr/local/cargo/git \ objectstore-build \ - -p "$PACKAGE" + -p "$PACKAGE" --features profiling docker build \ --platform linux/amd64 \