diff --git a/nix/docker/README.md b/nix/docker/README.md index 902fe87ac80..bd84c2fe790 100644 --- a/nix/docker/README.md +++ b/nix/docker/README.md @@ -120,6 +120,14 @@ docker run \ ghcr.io/intersectmbo/cardano-node:dev ``` +The resulting merged config and topology are written to +`/tmp/cardano-{{,tracer-}config,topology}-merged.json` +and used as the runtime configuration. +Relative file references are rewritten to absolute paths +anchored at `/opt/cardano/config/$NETWORK/` +so they resolve from the new location. + + ## CLI Mode To run cardano-cli, leave the `NETWORK` env variable unset and provide entrypoint args starting with `cli` followed by cardano-cli command args. @@ -149,6 +157,49 @@ respectively. This makes bind mounting easier when switching between default state directory locations, `/{data,ipc,logs}`, will work for both modes. +## Read-Only Root Filesystem +The image is compatible with `--read-only` (Docker/Podman) and +`securityContext.readOnlyRootFilesystem: true` (Kubernetes), provided the +runtime supplies writable storage for the state directories described above +(`/data`, `/ipc`, `/logs`) and a writable `/tmp` (tmpfs or `emptyDir`). + +A resolved-configuration snapshot is written at runtime to `/tmp/cardano-env` +and can be `source`d for an interactive debug shell inside the container. +The legacy path `/usr/local/bin/env` is preserved as a symlink to +`/tmp/cardano-env` for backwards compatibility. + +In "scripts" mode, GHC RTS profiling output (`cardano-node.stats`, +`cardano-node.prof`, `cardano-node.hp`, etc.) is directed to `/logs/` so the +image keeps working when profiling is enabled under a read-only root. +In "custom" mode the operator chooses the RTS flags, so any profiling output +must similarly be directed to a writable mount, for example: +``` +... run \ + --config /opt/cardano/config/mainnet/config.json \ + ... \ + +RTS --machine-readable -t/logs/cardano-node.stats -po/logs/cardano-node -p -RTS +``` + + +## Non-Root User +The image can run as any non-root user (`docker run --user ` / +Kubernetes `securityContext.runAsUser`). None of the entrypoint or +`run-node` startup logic touches image-content directories at runtime; +all generated artifacts live under `/tmp`. + +The mount-point directories (`/data`, `/ipc`, `/logs`) are owned by +GID 0 and group-writable in the image, so non-root containers can write +to freshly-created Docker or Kubernetes volumes mounted at those paths +without an init container or pre-chown. To inherit the group-writable +perm, the non-root user needs to run with primary group 0 (the Kubernetes +default for `runAsUser`) or with supplementary group 0. In Kubernetes +you can also set `securityContext.fsGroup: 0` to have the kubelet chown +the volume on mount. For Docker, `--user :0` is the equivalent. + +The image defaults to running as root; specify a UID explicitly +to opt into a non-root run. + + ## Cardano-node Socket Sharing To share a cardano-node socket with a different container, a volume can be made for establishing cross-container communication: diff --git a/nix/docker/context/node/bin/entrypoint b/nix/docker/context/node/bin/entrypoint index a160f641cb0..88062d4b5e6 100755 --- a/nix/docker/context/node/bin/entrypoint +++ b/nix/docker/context/node/bin/entrypoint @@ -3,6 +3,15 @@ set -euo pipefail [[ -n ${DEBUG:-} ]] && set -x +# The image writes a resolved-config env snapshot and any merge-mode +# artifacts to /tmp. Catch the common operator mistake of running +# with a read-only filesystem without mounting a writable /tmp. +if ! [[ -w /tmp ]]; then + echo "ERROR: /tmp is not writable." >&2 + echo "With a read-only filesystem, mount a tmpfs or emptyDir at /tmp." >&2 + exit 1 +fi + # If the NETWORK env var is set to a valid cardano network, pre-defined # configuration will be used. if [[ -n ${NETWORK:-} ]]; then @@ -30,24 +39,44 @@ if [[ -n ${NETWORK:-} ]]; then # full replacement and null values persist. # # jq -S sorts output keys alphabetically for deterministic diffs. + # + # Merged files are written to /tmp so that the image can run as a + # non-root user ($CFG is image content and only writable by root) + # and under read-only root filesystems. + # Relative file references are rewritten to absolute paths + # anchored at $CFG/$NETWORK/ so they resolve from the new location. if [[ -n ${CARDANO_CONFIG_JSON_MERGE:-} ]]; then jq -S \ + --arg cfgDir "$CFG/$NETWORK" \ --argjson deepMerge "$CARDANO_CONFIG_JSON_MERGE" \ - '. * $deepMerge' \ + '. * $deepMerge + | with_entries( + if ((.key | test("GenesisFile$|^CheckpointsFile$")) + and (.value | type == "string") + and (.value | startswith("/") | not)) + then .value = "\($cfgDir)/\(.value)" + else . + end + )' \ < "$CFG/$NETWORK/config.json" \ - > "$CFG/$NETWORK/config-merged.json" - export CARDANO_CONFIG="$CFG/$NETWORK/config-merged.json" + > /tmp/cardano-config-merged.json + export CARDANO_CONFIG=/tmp/cardano-config-merged.json else export CARDANO_CONFIG="$CFG/$NETWORK/config.json" fi if [[ -n ${CARDANO_TOPOLOGY_JSON_MERGE:-} ]]; then jq -S \ + --arg cfgDir "$CFG/$NETWORK" \ --argjson deepMerge "$CARDANO_TOPOLOGY_JSON_MERGE" \ - '. * $deepMerge' \ + '. * $deepMerge + | if (.peerSnapshotFile? | type) == "string" and (.peerSnapshotFile | startswith("/") | not) + then .peerSnapshotFile = "\($cfgDir)/\(.peerSnapshotFile)" + else . + end' \ < "$CFG/$NETWORK/topology.json" \ - > "$CFG/$NETWORK/topology-merged.json" - export CARDANO_TOPOLOGY="$CFG/$NETWORK/topology-merged.json" + > /tmp/cardano-topology-merged.json + export CARDANO_TOPOLOGY=/tmp/cardano-topology-merged.json else export CARDANO_TOPOLOGY="$CFG/$NETWORK/topology.json" fi diff --git a/nix/docker/context/node/bin/run-node b/nix/docker/context/node/bin/run-node index 1229631f801..607dc670e61 100755 --- a/nix/docker/context/node/bin/run-node +++ b/nix/docker/context/node/bin/run-node @@ -98,7 +98,7 @@ printRunEnv () { # writeRootEnv () { -cat << EOF > /usr/local/bin/env +cat << EOF > /tmp/cardano-env #!/usr/bin/env bash # Docker run ENV vars @@ -106,30 +106,30 @@ EOF if [[ -n ${CARDANO_SHELLEY_KES_AGENT_SOCKET:-} ]]; then echo "CARDANO_SHELLEY_KES_AGENT_SOCKET=\"$CARDANO_SHELLEY_KES_AGENT_SOCKET\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi if [[ -n ${CARDANO_TRACER_SOCKET_NETWORK_ACCEPT:-} ]]; then echo "CARDANO_TRACER_SOCKET_NETWORK_ACCEPT=\"$CARDANO_TRACER_SOCKET_NETWORK_ACCEPT\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi if [[ -n ${CARDANO_TRACER_SOCKET_NETWORK_CONNECT:-} ]]; then echo "CARDANO_TRACER_SOCKET_NETWORK_CONNECT=\"$CARDANO_TRACER_SOCKET_NETWORK_CONNECT\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi if [[ -n ${CARDANO_TRACER_SOCKET_PATH_ACCEPT:-} ]]; then echo "CARDANO_TRACER_SOCKET_PATH_ACCEPT=\"$CARDANO_TRACER_SOCKET_PATH_ACCEPT\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi if [[ -n ${CARDANO_TRACER_SOCKET_PATH_CONNECT:-} ]]; then echo "CARDANO_TRACER_SOCKET_PATH_CONNECT=\"$CARDANO_TRACER_SOCKET_PATH_CONNECT\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi -cat << EOF >> /usr/local/bin/env +cat << EOF >> /tmp/cardano-env CARDANO_BIND_ADDR="$CARDANO_BIND_ADDR" CARDANO_BLOCK_PRODUCER=$CARDANO_BLOCK_PRODUCER CARDANO_CONFIG="$CARDANO_CONFIG" @@ -138,20 +138,6 @@ CARDANO_LOG_DIR="$CARDANO_LOG_DIR" CARDANO_PORT=$CARDANO_PORT CARDANO_SOCKET_PATH="$CARDANO_SOCKET_PATH" CARDANO_TOPOLOGY="$CARDANO_TOPOLOGY" - -CARDANO_PUBLIC_IP="${CARDANO_PUBLIC_IP:-}" -CARDANO_CUSTOM_PEERS="${CARDANO_CUSTOM_PEERS:-}" - -# Mapping for topologyUpdater -CNODE_HOSTNAME="${CARDANO_PUBLIC_IP:-}" -CNODE_PORT=$CARDANO_PORT -CUSTOM_PEERS="${CARDANO_CUSTOM_PEERS:-}" - -# Derived from CARDANO_CONFIG to support non-mainnet deployments -GENESIS_JSON="$(dirname "$CARDANO_CONFIG")/shelley-genesis.json" - -TOPOLOGY="$CARDANO_TOPOLOGY" -LOG_DIR="$CARDANO_LOG_DIR" EOF } diff --git a/nix/docker/context/tracer/bin/entrypoint b/nix/docker/context/tracer/bin/entrypoint index d6f9ff1cfc8..193fbe61a2f 100755 --- a/nix/docker/context/tracer/bin/entrypoint +++ b/nix/docker/context/tracer/bin/entrypoint @@ -3,6 +3,15 @@ set -euo pipefail [[ -n ${DEBUG:-} ]] && set -x +# The image writes a resolved-config env snapshot and any merge-mode +# artifacts to /tmp. Catch the common operator mistake of running +# with a read-only filesystem without mounting a writable /tmp. +if ! [[ -w /tmp ]]; then + echo "ERROR: /tmp is not writable." >&2 + echo "With a read-only filesystem, mount a tmpfs or emptyDir at /tmp." >&2 + exit 1 +fi + # If the NETWORK env var is set to a valid cardano network, pre-defined # configuration will be used. if [[ -n ${NETWORK:-} ]]; then @@ -30,13 +39,19 @@ if [[ -n ${NETWORK:-} ]]; then # full replacement and null values persist. # # jq -S sorts output keys alphabetically for deterministic diffs. + # + # Merged files are written to /tmp so that the image can run as a + # non-root user ($CFG is image content and only writable by root) + # and under read-only root filesystems. + # The base tracer config has no relative file + # references, so no path rewriting is needed. if [[ -n ${CARDANO_CONFIG_JSON_MERGE:-} ]]; then jq -S \ --argjson deepMerge "$CARDANO_CONFIG_JSON_MERGE" \ '. * $deepMerge' \ < "$CFG/$NETWORK/tracer-config.json" \ - > "$CFG/$NETWORK/tracer-config-merged.json" - export CARDANO_CONFIG="$CFG/$NETWORK/tracer-config-merged.json" + > /tmp/cardano-tracer-config-merged.json + export CARDANO_CONFIG=/tmp/cardano-tracer-config-merged.json else export CARDANO_CONFIG="$CFG/$NETWORK/tracer-config.json" fi diff --git a/nix/docker/context/tracer/bin/run-tracer b/nix/docker/context/tracer/bin/run-tracer index 3c0bf3f48f7..b8fc44018e4 100755 --- a/nix/docker/context/tracer/bin/run-tracer +++ b/nix/docker/context/tracer/bin/run-tracer @@ -34,7 +34,7 @@ printRunEnv () { # writeRootEnv () { -cat << EOF > /usr/local/bin/env +cat << EOF > /tmp/cardano-env #!/usr/bin/env bash # Docker run ENV vars @@ -44,7 +44,7 @@ EOF if [[ -n ${CARDANO_MIN_LOG_SEVERITY:-} ]]; then echo "CARDANO_MIN_LOG_SEVERITY=\"$CARDANO_MIN_LOG_SEVERITY\"" \ - >> /usr/local/bin/env + >> /tmp/cardano-env fi } diff --git a/nix/docker/default.nix b/nix/docker/default.nix index 3805b4f85ce..c4c3a6f0c16 100644 --- a/nix/docker/default.nix +++ b/nix/docker/default.nix @@ -152,6 +152,15 @@ in # Similarly, make a root level dir for logs: mkdir -p logs + # Make the mount-point directories group-writable. Group is already + # 0 (the build env writes files as 0:0). When a fresh Docker volume + # is first mounted at one of these paths, the perms propagate from + # the image, so non-root containers (running as a UID in group 0 — + # the K8s default for runAsUser — or with explicit fsGroup) can + # write to a freshly-created volume without an init container or + # pre-chown. + chmod g+w data ipc logs + # The "custom" operation mode of this image, when the NETWORK env is # unset and "run" is provided as an entrypoint arg, will use the # following default directories. To reduce confusion caused by default @@ -176,6 +185,11 @@ in ln -sv ${snapshot-converter}/bin/snapshot-converter usr/local/bin/snapshot-converter ln -sv ${jq}/bin/jq usr/local/bin/jq + # Backwards-compatible alias for the resolved-config env snapshot + # written by run-node. The runtime writer targets /tmp/cardano-env so + # the image remains compatible with a read-only root filesystem. + ln -sv /tmp/cardano-env usr/local/bin/env + # Create iohk-nix network configs, organized by network directory. SRC="${genCfgs}" DST="opt/cardano" diff --git a/nix/docker/tracer.nix b/nix/docker/tracer.nix index db18fb0731f..fb767efeede 100644 --- a/nix/docker/tracer.nix +++ b/nix/docker/tracer.nix @@ -130,11 +130,21 @@ in # The "scripts" operation mode of this image, when the NETWORK env var is # set to a valid network, will use the following default directories # mounted at /: + mkdir -p data mkdir -p ipc # Similarly, make a root level dir for logs: mkdir -p logs + # Make the mount-point directories group-writable. Group is already + # 0 (the build env writes files as 0:0). When a fresh Docker volume + # is first mounted at one of these paths, the perms propagate from + # the image, so non-root containers (running as a UID in group 0 — + # the K8s default for runAsUser — or with explicit fsGroup) can + # write to a freshly-created volume without an init container or + # pre-chown. + chmod g+w data ipc logs + # The "custom" operation mode of this image, when the NETWORK env is # unset and "run" is provided as an entrypoint arg, will use the # following default directories. To reduce confusion caused by default @@ -143,6 +153,7 @@ in # permit use of volume mounts at the root directory location regardless # of which mode the image is operating in. mkdir -p opt/cardano + ln -sv /data opt/cardano/data ln -sv /ipc opt/cardano/ipc ln -sv /logs opt/cardano/logs @@ -153,6 +164,11 @@ in ln -sv ${cardano-tracer}/bin/cardano-tracer usr/local/bin/cardano-tracer ln -sv ${jq}/bin/jq usr/local/bin/jq + # Backwards-compatible alias for the resolved-config env snapshot + # written by run-tracer. The runtime writer targets /tmp/cardano-env so + # the image remains compatible with a read-only root filesystem. + ln -sv /tmp/cardano-env usr/local/bin/env + # Create iohk-nix network configs, organized by network directory. SRC="${genCfgs}" DST="opt/cardano" diff --git a/nix/nixos/cardano-node-service.nix b/nix/nixos/cardano-node-service.nix index 86843a617b0..faca5693685 100644 --- a/nix/nixos/cardano-node-service.nix +++ b/nix/nixos/cardano-node-service.nix @@ -862,12 +862,27 @@ in { description = ''Extra CLI args for cardano-node, to be surrounded by "+RTS"/"-RTS"''; }; + profilingOutputDir = mkOption { + type = nullOrStr; + default = null; + description = '' + Optional directory prefix for GHC RTS profiling output files + (cardano-node.stats, cardano-node.prof, cardano-node.hp, etc.). + When null, files are written relative to the working directory + (the systemd unit's WorkingDirectory for NixOS deployments, which + is cfg.stateDir). + ''; + }; + profilingArgs = mkOption { type = listOf str; - default = - [ "--machine-readable" - "-tcardano-node.stats" - "-pocardano-node" + default = let + prefix = if cfg.profilingOutputDir == null then "" else "${cfg.profilingOutputDir}/"; + in + optionals (cfg.profiling != "none" || cfg.eventlog) [ + "--machine-readable" + "-t${prefix}cardano-node.stats" + "-po${prefix}cardano-node" ] ++ optional (cfg.eventlog) "-l" ++ ( diff --git a/nix/nixos/cardano-tracer-service.nix b/nix/nixos/cardano-tracer-service.nix index 581dd90e342..4cfa71be2f9 100644 --- a/nix/nixos/cardano-tracer-service.nix +++ b/nix/nixos/cardano-tracer-service.nix @@ -501,12 +501,27 @@ in { ''; }; + profilingOutputDir = mkOption { + type = nullOr str; + default = null; + description = '' + Optional directory prefix for GHC RTS profiling output files + (cardano-node.stats, cardano-node.prof, cardano-node.hp, etc.). + When null, files are written relative to the working directory + (the systemd unit's WorkingDirectory for NixOS deployments, which + is cfg.stateDir). + ''; + }; + profilingArgs = mkOption { type = listOf str; - default = - [ "--machine-readable" - "-tcardano-node.stats" - "-pocardano-node" + default = let + prefix = if cfg.profilingOutputDir == null then "" else "${cfg.profilingOutputDir}/"; + in + optionals (cfg.profiling != "none" || cfg.eventlog) [ + "--machine-readable" + "-t${prefix}cardano-node.stats" + "-po${prefix}cardano-node" ] ++ optional (cfg.eventlog) "-l" ++ ( diff --git a/nix/scripts.nix b/nix/scripts.nix index 421d09f6530..f6443da273d 100644 --- a/nix/scripts.nix +++ b/nix/scripts.nix @@ -16,6 +16,10 @@ let nodeConfig = cfg.environments.${cfg.environment}.nodeConfig; stateDir = mkDefault "state-node-${cfg.environment}"; runtimeDir = mkDefault null; + # When profiling is enabled, direct GHC RTS output + # (stats, prof, hp, ...) to /logs so the OCI image works + # with a read-only root filesystem. No effect when profiling = "none". + profilingOutputDir = mkDefault "/logs"; } // optionalAttrs (envConfig ? topology) { topology = mkDefault envConfig.topology; };