diff --git a/devops/build/packaging/docker/Dockerfile.rocky9 b/devops/build/packaging/docker/Dockerfile.rocky9 new file mode 100644 index 00000000000..1633d67aba6 --- /dev/null +++ b/devops/build/packaging/docker/Dockerfile.rocky9 @@ -0,0 +1,149 @@ +# -------------------------------------------------------------------- +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------- +# Dockerfile for Apache Cloudberry (Incubating) - Production Build +# -------------------------------------------------------------------- +# Multi-stage build optimized for production deployment +# Builds Cloudberry from source using official development environment +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Build stage: Use official Cloudberry development image +# -------------------------------------------------------------------- +ARG BUILDER_IMAGE=apache/incubator-cloudberry:cbdb-build-rocky9-latest +FROM ${BUILDER_IMAGE} AS builder + +# Copy source code +COPY --chown=gpadmin:gpadmin . /home/gpadmin/cloudberry + +# Build Cloudberry using official build scripts +USER gpadmin +WORKDIR /home/gpadmin/cloudberry + +RUN sudo dnf install -y --enablerepo=crb liburing-devel && \ + export SRC_DIR=/home/gpadmin/cloudberry && \ + export BUILD_DESTINATION=/usr/local/cloudberry-db && \ + mkdir -p ${SRC_DIR}/build-logs && \ + ./devops/build/automation/cloudberry/scripts/configure-cloudberry.sh && \ + ./devops/build/automation/cloudberry/scripts/build-cloudberry.sh + +# -------------------------------------------------------------------- +# Runtime stage: Minimal production image +# -------------------------------------------------------------------- +FROM rockylinux/rockylinux:9.6 + +# Set locale environment and timezone +ENV TZ=UTC +ENV LANG=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 + +# Cloudberry environment variables +ENV GPHOME=/usr/local/cloudberry-db +ENV PATH=$GPHOME/bin:$PATH +ENV LD_LIBRARY_PATH=$GPHOME/lib +ENV COORDINATOR_DATA_DIRECTORY=/data0/database/coordinator/gpseg-1 + +# Runtime dependencies (keep aligned with devops/sandbox/Dockerfile.*.rockylinux9 where possible) +# Note: do NOT install libcurl here to avoid rocky9 libcurl-minimal conflicts. +RUN dnf -y install --setopt=install_weak_deps=False \ + apr \ + bash \ + bzip2-libs \ + ca-certificates \ + glibc-langpack-en \ + iproute \ + keyutils \ + krb5-libs \ + libevent \ + libicu \ + libstdc++ \ + liburing \ + libuv \ + libuuid \ + libxml2 \ + libyaml \ + libzstd \ + lz4 \ + ncurses \ + net-tools \ + openldap \ + openssh-clients \ + openssh-server \ + openssl \ + pam \ + pcre2 \ + perl \ + procps-ng \ + protobuf \ + python3 \ + readline \ + rsync \ + shadow-utils \ + sudo \ + which \ + zlib && \ + dnf clean all && rm -rf /var/cache/dnf + +# Set locale, create gpadmin user, and setup directories & SSH config +RUN echo "LANG=en_US.UTF-8" > /etc/locale.conf && \ + /usr/sbin/groupadd -r gpadmin && \ + /usr/sbin/useradd -m -r -g gpadmin gpadmin && \ + printf "Defaults:gpadmin !requiretty\ngpadmin ALL=(ALL) NOPASSWD: ALL\n" > /etc/sudoers.d/90-gpadmin && \ + chmod 440 /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry-db/cloudberry-env.sh ]; then\n source /usr/local/cloudberry-db/cloudberry-env.sh\nfi' >> /home/gpadmin/.bashrc && \ + mkdir -p /data0/database/coordinator /data0/database/primary /data0/database/mirror && \ + mkdir -p /home/gpadmin/.ssh && \ + mkdir -p /run/sshd && \ + chown -R gpadmin:gpadmin /data0 /home/gpadmin/.ssh && \ + chmod 700 /home/gpadmin/.ssh && \ + echo -e "Host *\n StrictHostKeyChecking no\n UserKnownHostsFile ~/.ssh/known_hosts\n ServerAliveInterval 60" > /home/gpadmin/.ssh/config && \ + chown gpadmin:gpadmin /home/gpadmin/.ssh/config && \ + chmod 600 /home/gpadmin/.ssh/config + +# Copy configuration files from sandbox (reusable components) +COPY --chown=gpadmin:gpadmin devops/sandbox/configs/gpinitsystem_singlenode /tmp/gpinitsystem_singlenode + +# Reuse sandbox tuning configs (note: sysctls require privileged/sysctl support at runtime) +COPY devops/sandbox/configs/90-cbdb-limits.conf /etc/security/limits.d/90-cbdb-limits.conf +COPY devops/sandbox/configs/90-cbdb-sysctl.conf /etc/sysctl.d/90-cbdb-sysctl.conf + +# Copy custom scripts +COPY --chown=gpadmin:gpadmin devops/build/packaging/docker/cloudberry-entrypoint.sh /usr/local/bin/cloudberry-entrypoint.sh + +# Set executable permissions +RUN chmod 755 /usr/local/bin/cloudberry-entrypoint.sh /tmp/gpinitsystem_singlenode + +# Copy built Cloudberry from builder stage +COPY --from=builder --chown=gpadmin:gpadmin /usr/local/cloudberry-db /usr/local/cloudberry-db +COPY --from=builder --chown=gpadmin:gpadmin /usr/local/xerces-c/lib/libxerces-c.so /usr/local/cloudberry-db/lib/ +COPY --from=builder --chown=gpadmin:gpadmin /usr/local/xerces-c/lib/libxerces-c-3.*.so /usr/local/cloudberry-db/lib/ + +# Expose coordinator port +EXPOSE 5432 + +# Healthcheck: coordinator readiness (initialization can take a while) +HEALTHCHECK --interval=10s --timeout=5s --start-period=5m --retries=6 \ + CMD /usr/local/cloudberry-db/bin/pg_isready -h localhost -p 5432 || exit 1 + +# Volume for persistent data +VOLUME ["/data0"] + +# Set default user +USER gpadmin + +# Entrypoint and default command +ENTRYPOINT ["/usr/local/bin/cloudberry-entrypoint.sh"] +CMD ["cloudberry"] diff --git a/devops/build/packaging/docker/README.md b/devops/build/packaging/docker/README.md new file mode 100644 index 00000000000..f7a0a7293fd --- /dev/null +++ b/devops/build/packaging/docker/README.md @@ -0,0 +1,92 @@ +# Apache Cloudberry (Incubating) Docker image (Rocky Linux 9) + +This directory contains Docker build definitions for a single-node Apache Cloudberry container image. + +## Build + +Build from the current source tree (multi-stage build using a pre-built builder image): + +```bash +docker build \ + -f devops/build/packaging/docker/Dockerfile.rocky9 \ + -t apache/cloudberry:dev . +``` + +Override the builder image (for example, pin to a digest/tag or use a locally-built builder): + +```bash +docker build \ + -f devops/build/packaging/docker/Dockerfile.rocky9 \ + --build-arg BUILDER_IMAGE=apache/incubator-cloudberry:cbdb-build-rocky9-latest \ + -t apache/cloudberry:dev . +``` + +## Run + +On first startup the container initializes a single-node cluster under `/data0` and starts it. By default, host connections use `trust` authentication to facilitate seamless development and testing workflows. + +```bash +docker volume create cloudberry_data + +docker run --rm -it \ + --name cloudberry-db \ + -p 5432:5432 \ + -v cloudberry_data:/data0 \ + apache/cloudberry:dev +``` + +When run interactively (with `-it` and without `-d`), the container initializes the cluster and immediately drops you into a `psql` prompt. + +If you prefer to run it in the background (detached), use the `-d` flag. **Important**: Do not combine `-d` with `-t` or `-it`, otherwise the container will attempt to start the interactive SQL prompt and exit immediately. + +```bash +docker run --rm -d \ + --name cloudberry-db \ + -p 5432:5432 \ + -v cloudberry_data:/data0 \ + apache/cloudberry:dev +``` + +If you require production-level security (e.g., password enforcement), simply run the container with `-e POSTGRES_HOST_AUTH_METHOD=md5` and provide a `POSTGRES_PASSWORD`: + +```bash +docker run --rm -it \ + -d \ + --name cloudberry-db \ + -e POSTGRES_HOST_AUTH_METHOD=md5 \ + -e POSTGRES_PASSWORD=your_secure_password \ + -p 5432:5432 \ + -v cloudberry_data:/data0 \ + apache/cloudberry:dev +``` + +## Connect / Inspect + +From the host (assuming `trust` default or matching passwords): + +```bash +psql -h localhost -p 5432 -U gpadmin -d gpadmin +``` + +From inside the container (environment variables are already globally injected): + +```bash +docker exec -it cloudberry-db psql -d postgres +``` + +Cluster status and logs: + +```bash +docker exec cloudberry-db gpstate -s +docker logs cloudberry-db +``` + +## Notes + +- **Timezone:** The container defaults to `UTC` (`TZ=UTC`). To use a different timezone, pass the `TZ` environment variable during run (e.g., `-e TZ=Asia/Shanghai`). +- **Graceful Shutdown:** The entrypoint natively traps `SIGTERM`, `SIGINT`, and `SIGQUIT` to perform a safe `gpstop -a -M fast`. Standard `docker stop ` is perfectly safe and ensures data consistency. +- **Internal SSH:** `sshd` is started exclusively for internal cluster communication. Port 22 is not exposed by default. +- **System Limits:** For maximum performance, consider explicitly raising container limits at runtime: + `--ulimit nofile=524288:524288 --ulimit nproc=131072:131072`. +- **Tuning Configs:** Environment system configurations are powerfully reused from `devops/sandbox/configs/`: + `/etc/security/limits.d/90-cbdb-limits.conf` and `/etc/sysctl.d/90-cbdb-sysctl.conf`. diff --git a/devops/build/packaging/docker/cloudberry-entrypoint.sh b/devops/build/packaging/docker/cloudberry-entrypoint.sh new file mode 100755 index 00000000000..7a4a54e4142 --- /dev/null +++ b/devops/build/packaging/docker/cloudberry-entrypoint.sh @@ -0,0 +1,299 @@ +#!/usr/bin/env bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -------------------------------------------------------------------- +# Cloudberry Docker Entrypoint Script +# -------------------------------------------------------------------- +# Standardized entrypoint for Apache Cloudberry Docker containers +# Provides consistent behavior across different deployment scenarios +# -------------------------------------------------------------------- + +set -euo pipefail + +log() { + echo "cloudberry-entrypoint: $*" >&2 +} + +die() { + log "ERROR: $*" + exit 1 +} + +# -------------------------------------------------------------------- +# Command Handler +# -------------------------------------------------------------------- +# Handle different command types for Docker standard behavior +# -------------------------------------------------------------------- +if [[ $# -gt 0 ]]; then + case "$1" in + cloudberry) + # Default behavior - start Cloudberry + ;; + bash|sh|/bin/bash|/bin/sh) + exec "$@" + ;; + -*) + echo "Error: unsupported options: $*" >&2 + echo "Hint: run an interactive shell with: docker run -it bash" >&2 + exit 2 + ;; + *) + exec "$@" + ;; + esac +fi + +# -------------------------------------------------------------------- +# Environment Setup +# -------------------------------------------------------------------- +# Set up Cloudberry environment variables and directories +# -------------------------------------------------------------------- +GPHOME="${GPHOME:-/usr/local/cloudberry-db}" +COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-/data0/database/coordinator/gpseg-1}" +export GPHOME COORDINATOR_DATA_DIRECTORY + +if [[ ! -f "${GPHOME}/cloudberry-env.sh" ]]; then + die "Missing ${GPHOME}/cloudberry-env.sh (GPHOME=${GPHOME})" +fi + +# Optional: Postgres-compatible env vars for production-friendly auth defaults. +# - Default host auth method is md5 (like many database images). +# - If set to trust, no password is required (NOT recommended for production). +HOST_AUTH_METHOD="${POSTGRES_HOST_AUTH_METHOD:-${CLOUDBERRY_HOST_AUTH_METHOD:-trust}}" +DB_PASSWORD="${POSTGRES_PASSWORD:-${GPADMIN_PASSWORD:-${CLOUDBERRY_PASSWORD:-}}}" + +case "${HOST_AUTH_METHOD}" in + trust|md5|scram-sha-256) ;; + *) die "Unsupported host auth method: ${HOST_AUTH_METHOD}. Use trust|md5|scram-sha-256." ;; +esac + +host_auth_method_explicit=false +if [[ "${POSTGRES_HOST_AUTH_METHOD+x}" == "x" || "${CLOUDBERRY_HOST_AUTH_METHOD+x}" == "x" ]]; then + host_auth_method_explicit=true +fi + +db_password_explicit=false +if [[ "${POSTGRES_PASSWORD+x}" == "x" || "${GPADMIN_PASSWORD+x}" == "x" || "${CLOUDBERRY_PASSWORD+x}" == "x" ]]; then + db_password_explicit=true +fi + +# Source Cloudberry environment +source "${GPHOME}/cloudberry-env.sh" + +# Create hostfile for cluster initialization (must match coordinator hostname) +CBDB_HOSTNAME="$(hostname)" +echo "${CBDB_HOSTNAME}" > /tmp/gpdb-hosts +chown gpadmin:gpadmin /tmp/gpdb-hosts + +# -------------------------------------------------------------------- +# SSH Setup +# -------------------------------------------------------------------- +# Generate SSH keys at runtime for security (production-safe) +# -------------------------------------------------------------------- +if [[ ! -f /home/gpadmin/.ssh/id_rsa ]]; then + ssh-keygen -q -t rsa -b 4096 -N '' -C 'gpadmin@cloudberry' -f /home/gpadmin/.ssh/id_rsa >/dev/null +fi + +touch /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys +if ! grep -qF "$(cat /home/gpadmin/.ssh/id_rsa.pub)" /home/gpadmin/.ssh/authorized_keys; then + cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +fi + +# -------------------------------------------------------------------- +# SSH Daemon Startup +# -------------------------------------------------------------------- +# Start SSH daemon for cluster communication +# -------------------------------------------------------------------- +mkdir -p /run/sshd 2>/dev/null || true +# Host keys are generated at runtime (do not bake into image) +sudo ssh-keygen -A >/dev/null 2>&1 || true +if ! sudo /usr/sbin/sshd \ + -o PasswordAuthentication=no \ + -o PermitRootLogin=no \ + -o ChallengeResponseAuthentication=no \ + -o AllowUsers=gpadmin; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi +sleep 2 +sudo rm -f /run/nologin 2>/dev/null || true + +# Setup SSH known_hosts (optional; StrictHostKeyChecking is disabled in the image) +ssh-keyscan -t rsa "${CBDB_HOSTNAME}" localhost 127.0.0.1 > /home/gpadmin/.ssh/known_hosts 2>/dev/null || true +chmod 600 /home/gpadmin/.ssh/known_hosts +chown gpadmin:gpadmin /home/gpadmin/.ssh/known_hosts + +# Prepare an effective gpinitsystem config with the runtime hostname. +GPINIT_CONF="/tmp/gpinitsystem_singlenode.runtime" +cp /tmp/gpinitsystem_singlenode "${GPINIT_CONF}" +sed -i -E "s/^COORDINATOR_HOSTNAME=.*/COORDINATOR_HOSTNAME=${CBDB_HOSTNAME}/" "${GPINIT_CONF}" + +# -------------------------------------------------------------------- +# Cluster Initialization +# -------------------------------------------------------------------- +# Initialize Cloudberry cluster if not already initialized +# -------------------------------------------------------------------- +echo "Starting Cloudberry initialization..." + +ensure_access_config() { + local is_first_init="${1:-false}" + local coordinator_conf="${COORDINATOR_DATA_DIRECTORY}/postgresql.conf" + local hba_file="${COORDINATOR_DATA_DIRECTORY}/pg_hba.conf" + + # On subsequent startups, only manage access config if explicitly requested. + if [[ "${is_first_init}" != "true" && "${host_auth_method_explicit}" != "true" && "${db_password_explicit}" != "true" ]]; then + return 0 + fi + + # Ensure coordinator listens on all interfaces for container port-mapping use cases. + if [[ -f "${coordinator_conf}" ]]; then + if grep -Eq '^[#[:space:]]*listen_addresses[[:space:]]*=' "${coordinator_conf}"; then + sed -i -E "s/^[#[:space:]]*(listen_addresses)[[:space:]]*=.*/\\1 = '*'/" "${coordinator_conf}" + else + echo "listen_addresses = '*'" >> "${coordinator_conf}" + fi + fi + + # Manage a dedicated docker auth block idempotently. + if [[ -f "${hba_file}" ]]; then + if grep -Eq '^# BEGIN CLOUDBERRY DOCKER AUTH$' "${hba_file}"; then + sed -i '/^# BEGIN CLOUDBERRY DOCKER AUTH$/, /^# END CLOUDBERRY DOCKER AUTH$/d' "${hba_file}" + fi + + { + echo "" + echo "# BEGIN CLOUDBERRY DOCKER AUTH" + echo "# Managed by devops/build/packaging/docker/cloudberry-entrypoint.sh" + echo "host all all 0.0.0.0/0 ${HOST_AUTH_METHOD}" + echo "host all all ::/0 ${HOST_AUTH_METHOD}" + echo "# END CLOUDBERRY DOCKER AUTH" + } >> "${hba_file}" + fi + + if [[ "${HOST_AUTH_METHOD}" != "trust" ]]; then + if [[ -z "${DB_PASSWORD}" ]]; then + if [[ "${is_first_init}" == "true" ]]; then + die "Database is uninitialized and POSTGRES_PASSWORD is not set (auth=${HOST_AUTH_METHOD}). Set POSTGRES_PASSWORD, or set POSTGRES_HOST_AUTH_METHOD=trust." + fi + log "POSTGRES_PASSWORD is not set; skipping password/auth reconfiguration." + return 0 + fi + + log "Setting gpadmin password..." + psql -v ON_ERROR_STOP=1 -d template1 -v gpadmin_pass="${DB_PASSWORD}" -c "ALTER USER gpadmin PASSWORD :'gpadmin_pass';" + else + log "WARNING: POSTGRES_HOST_AUTH_METHOD=trust disables password authentication. Do NOT use this in production." + fi + + # Reload configuration (pg_hba.conf / postgresql.conf). + gpstop -u || true +} + +if [[ ! -f "${COORDINATOR_DATA_DIRECTORY}/PG_VERSION" ]]; then + echo "Initializing Cloudberry cluster (first startup)..." + + if [[ "${HOST_AUTH_METHOD}" != "trust" && -z "${DB_PASSWORD}" ]]; then + die "Database is uninitialized and POSTGRES_PASSWORD is not set (auth=${HOST_AUTH_METHOD}). Set POSTGRES_PASSWORD, or set POSTGRES_HOST_AUTH_METHOD=trust." + fi + + # Clean up any existing data directories + rm -rf /data0/database/coordinator/* /data0/database/primary/* /data0/database/mirror/* 2>/dev/null || true + + # Ensure database directories exist with proper permissions + sudo mkdir -p /data0/database/coordinator /data0/database/primary /data0/database/mirror + sudo chown -R gpadmin:gpadmin /data0/database + chmod -R 700 /data0/database + + # Initialize cluster using standard configuration + gpinitsystem -a \ + -c "${GPINIT_CONF}" \ + -h /tmp/gpdb-hosts \ + --max_connections=100 + + ensure_access_config true + + echo "Cluster initialization completed successfully!" +else + echo "Cluster already initialized, starting..." + gpstart -a + ensure_access_config false +fi + +# -------------------------------------------------------------------- +# Deployment Success Message +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + ____ _ _ _ + / ___| | ___ _ _ __| | |__ ___ _ __ _ __ _ _ + | | | |/ _ \| | | |/ _` | '_ \ / _ \ '__| '__| | | | + | |___| | (_) | |_| | (_| | |_) | __/ | | | | |_| | + \____|_|\___/ \__,_|\__,_|_.__/ \___|_| |_| \__, | + |___/ +====================================================================== += DEPLOYMENT SUCCESSFUL = +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Container Lifecycle Management +# -------------------------------------------------------------------- +# Follow logs (users can `docker exec` for interactive shells) +# -------------------------------------------------------------------- +stop_cluster() { + log "Stopping Cloudberry..." + gpstop -a -M fast >/dev/null 2>&1 || true +} + +trap stop_cluster SIGTERM SIGINT SIGQUIT + +log_dir="${COORDINATOR_DATA_DIRECTORY}/log" +log "Following coordinator logs in: ${log_dir}" + +shopt -s nullglob +while [[ ! -d "${log_dir}" ]]; do + sleep 1 +done + +while :; do + log_files=( "${log_dir}"/*.log ) + if (( ${#log_files[@]} > 0 )); then + break + fi + sleep 1 +done + +tail -n 0 -F "${log_files[@]}" & +tail_pid=$! + +# Drop into psql if running interactively attached, otherwise wait indefinitely +# Note: If running detached but allocating a TTY (`docker run -d -t`), this +# will still trigger and exit. Please use `docker run -d` without `-t`/`-it` +# to keep the container running in the background. +if [ -t 0 ]; then + log "Interactive terminal detected. Dropping into psql..." + # Sleep briefly to ensure the cluster is ready + sleep 2 + exec psql -d postgres +else + log "Running in detached mode. Waiting indefinitely..." + wait "${tail_pid}" || true + sleep infinity +fi