From d0d61938fa6fcadb2608c4dae9a1c243e483e3f5 Mon Sep 17 00:00:00 2001 From: Aleksandar Grbic Date: Sun, 21 Dec 2025 13:22:06 +0100 Subject: [PATCH 1/3] Update documentation and configuration files for full-stack example - Enhanced README with additional sections for clarity and organization. - Added a new resources document to share community insights and inspirations. - Removed the terminology document to streamline content. - Updated mini PCs setup guide with detailed hardware preparation and configuration steps. - Improved K3S backup documentation with Cloudflare R2 setup instructions and Velero integration. - Added new Kubernetes configurations for Node.js API, including PostgreSQL and Redis deployments. - Adjusted Ingress settings for Node.js API to reflect updated host names and service ports. --- .vscode/settings.json | 6 +- README.md | 4 + .../mini-pcs-setup.md | 115 +++++++- docusaurus/docs/kubernetes/k3s-backup.md | 263 ++++++++++++++---- docusaurus/docs/resources.md | 15 + docusaurus/docs/terminology.md | 17 -- docusaurus/sidebars.ts | 6 - exercises/full-stack-example/.drawio | 58 ++-- .../nodejs-api-configmap.yaml | 6 +- .../nodejs-api-deployment.yaml | 5 + .../nodejs-api-ingress.yaml | 6 +- .../nodejs-api-postgres.yaml | 16 ++ .../nodejs-api-redis-deployment.yaml | 35 +++ .../nodejs-api-redis-pvc.yaml | 12 + .../nodejs-api-redis-service.yaml | 14 + 15 files changed, 459 insertions(+), 119 deletions(-) create mode 100644 docusaurus/docs/resources.md delete mode 100644 docusaurus/docs/terminology.md create mode 100644 exercises/full-stack-example/nodejs-api-postgres.yaml create mode 100644 exercises/full-stack-example/nodejs-api-redis-deployment.yaml create mode 100644 exercises/full-stack-example/nodejs-api-redis-pvc.yaml create mode 100644 exercises/full-stack-example/nodejs-api-redis-service.yaml diff --git a/.vscode/settings.json b/.vscode/settings.json index 7893d48..07da026 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,5 +3,9 @@ "https://i.ibb.co/xYkFskM/space-colorful-waves-abstract-4k-36.jpg" ], "background.autoInstall": true, - "background.smoothImageRendering": true + "background.smoothImageRendering": true, + "twitch.channel": "programmer_network", + "twitch.oauth": "oauth:xgqnvk20qflyufpd33kcibqp74a869", + "twitch.username": "programmer_network", + "ansible.python.interpreterPath": "/bin/python3" } \ No newline at end of file diff --git a/README.md b/README.md index d1aefcf..f536b7a 100644 --- a/README.md +++ b/README.md @@ -11,10 +11,12 @@ **Goal**: By the end of this journey, aim to have the capability to rapidly instantiate new development and production environments and expose them to the external world with equal ease. ## Table of Contents + - [Hardware](./docs/hardware-components.md#hardware) - [Hardware Components](./docs/hardware-components.md#hardware) - [Why These Choices?](./docs/hardware-components.md#why-these-choices) - [Raspberry Pi's Setup](./docs/raspberry-pi-setup.md#raspberry-pis-setup) + - [Flash SD Cards with Raspberry Pi OS](./docs/raspberry-pi-setup.md#flash-sd-cards-with-raspberry-pi-os-using-pi-imager) - [Initial Boot and Setup](./docs/raspberry-pi-setup.md#initial-boot-and-setup) - [Update and Upgrade](./docs/raspberry-pi-setup.md#update-and-upgrade---ansible-playbook) @@ -27,12 +29,14 @@ - [Automation with Ansible](./docs/getting-started-with-ansible.md) - [K3S Setup](./docs/k3s-setup.md#k3s-setup) + - [Enable Memory CGroups](./docs/k3s-setup.md#enable-memory-cgroups-ansible-playbook) - [Master Node](./docs/k3s-setup.md#setup-the-master-node) - [Worker Nodes](./docs/k3s-setup.md#setup-worker-nodes) - [Kubectl on local machine](./docs/k3s-setup.md#setup-kubectl-on-your-local-machine) - [Kubernetes Theory](./docs/kubernetes-theory.md#kubernetes) + - [What is Kubernetes](./docs/kubernetes-theory.md#1-what-is-kubernetes-) - [Kubernetes Components Explained](./docs/kubernetes-theory.md#kubernetes-components-explained) - [Control Plane Components](./docs/kubernetes-theory.md#control-plane-components) diff --git a/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md b/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md index 62a96b4..c0797af 100644 --- a/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md +++ b/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md @@ -2,28 +2,113 @@ title: Mini PCs Setup --- -Setting up mini PCs is a bit different than setting up Raspberry Pi's. That is mainly because you might not have the same hardware available. Due to this, what I write here might not match your setup completely, but regardless of the hardware, worst case, it should be nearly identical. +Setting up mini PCs differs from setting up Raspberry Pis, mainly due to hardware variations. Your experience may vary depending on your specific mini PC, but the general process should be similar. The main difference is the architecture, x86 vs ARM, and the operating systems. Raspberry Pis run on Raspbian, while our mini PCs use Ubuntu Server. Although both are Linux distributions and Debian-based, they differ slightly in configuration and the services they use. -## Setup +### Goals for Mini PC Servers -Typically, when thinking about PC's, we often think about the computer that one uses to surf the web, watch videos, play games, and do other things. Due to that, the way we setup the PC is a bit different. We focus on e.g. maximum performance, while paying the price of high power consumption, and high heat. +When configuring mini PCs, we prioritize: -In the context of servers, especially our home "mini data center", we want to focus on low power consumption, low noise, and low heat. All of these requirements (or goals) equal low cost. Since our computers will be running 24/7, we have to do all we can to reduce the cost of running them. Additionally, we want to extend the life of the hardware, and reduce the amount of maintenance we have to do. +- **Low power consumption** (reduces electricity costs) +- **Low noise** (quieter operation) +- **Low heat output** (extends hardware lifespan, less cooling required) -With that little "preface", let's get started. Whenever you get one of the used mini PCs (as I showed under [Hardware](../hardware-raspberry-pi-setup/hardware.mdx#mini-pcs)), you'll have to do some maintenance: +These factors help keep costs down and ensure stable, long-term operation. -- Clean the hardware, e.g. dust, -- Clean the computer -- Install a new OS -- Install a new OS +### Initial Hardware Preparation -### BIOS +**Clean the hardware:** -### Ubuntu Server +- Open the case and remove dust using compressed air. +- Check for any obvious damage or worn-out components. +- Clean the CPU using isopropyl alcohol and a microfiber cloth. +- Apply new thermal paste to the CPU. Every single Mini PC that I bought had a very worn out thermal paste. This will keep the CPU cool, and prevent it from overheating. -For our Mini PCs, we'll be using [Ubuntu Server](https://ubuntu.com/download/server) as the OS. More specifically, throughout the setup process, we'll select the minimal (minimized) version of the OS. +**Upgrade RAM (if possible):** -> Ubuntu Server Minimal is a version of Ubuntu Server with a reduced set of pre-installed packages, designed for cloud deployments and situations where a smaller footprint and faster boot times are desired. It provides the bare minimum to get to the command line, making it ideal for users who know what they're doing and prefer to install only necessary software. +While not mandatory, and higly dependent on your needs and usecase, I recommend to upgrade the RAM to the maximum supported by your mini PC. You can also do this later, especially by looking for used RAM on eBay, or other platforms. -If you've never setup Ubuntu Server before, I recommend you to read the [How to install Ubuntu Server 22.04 -](https://systemwatchers.com/index.php/blog/ubuntu-server/how-to-install-ubuntu-server-22-04/). +**Check storage:** + +Use an SSD, preferably an NVMe, for better performance and faster boot times. Additionally, ensure the drive is healthy (consider running a SMART test). + +### BIOS Settings + +- Update the BIOS to the latest version (if available). +- Set the system to auto power-on after power loss. This is critical for servers, as we want to ensure that the server is always on, and that it's always available. If there is a power outage, or any other setback, we want to ensure that the server gets back online as soon as possible. +- Disable any hardware components that are not needed, e.g. Bluetooth, WiFi, etc. +- Ensure that the system is set to boot from the SSD. We want to ensure that the system doesn't have to wait for any other boot devices, e.g. USB, CD-ROM, etc. + +### Ubuntu Server Installation + +We'll use [Ubuntu Server](https://ubuntu.com/download/server) as the operating system. + +**Recommended steps:** + +- Download the Ubuntu Server ISO (choose the minimal installation option). +- Create a bootable USB drive with the ISO (e.g., using [Rufus](https://rufus.ie/) or `dd`). +- Boot the mini PC from the USB drive. +- Follow the installation prompts: + + - Select the minimal (minimized) version of Ubuntu Server. In simple words, we want to install the bare minimum to get to the command line. We won't be using any GUI, and we'll be using the command line to manage our servers. + - Set up your user account and hostname. While hostname can be anything, I recommend setting it to something that will easily identify the server. As you will add more servers over time, you'll want to be able to easily identify them. E.g. you could have something like `lenovo-m920q-mini-pc-1`. + - **Enable SSH during installation** (critical for remote management). + - Partition the disk as needed (guided partitioning is fine for most users). + +- Complete the installation and reboot. + +### Optimize our Mini PC's + +At this point, we want to do several steps that we've done under [Raspberry Pi Setup](../hardware-raspberry-pi-setup/raspberry-pi-setup.md). Essentially, we need to disable some default services, and ensure minimal power consumption. As our PC's will be running 24/7, and in many cases, will remain idle most of the time, we want to ensure that we're not wasting any resources. + +Connect to your mini PC via SSH from another computer: + +```sh +ssh @ +``` + +Update the system: + +```sh +sudo apt update && sudo apt upgrade -y +``` + +#### Disable Swap + +As with the Raspberry Pis, we want to disable swap. I have also included an [Ansible playbook](../../static/ansible/playbooks/disable-swap-ubuntu-server.yml) to automate this process. However, for the sake of learning, especially if you have multiple machines, I recommend doing the steps manually at least once to understand what’s happening. Once you’re comfortable, you can use the playbook for convenience. + +To permanently disable swap on Ubuntu Server: + +**Turn off swap immediately:** + +```sh +sudo swapoff -a +``` + +**Edit /etc/fstab to prevent swap from enabling on boot:** + +```sh +sudo nano /etc/fstab +``` + +Find any lines referencing a swap partition or swap file (they usually contain the word 'swap'). Comment out those lines by adding a `#` at the beginning, or delete them entirely. + +For example: + +``` +#/swapfile none swap sw 0 0 +``` + +Save and exit (`Ctrl+O`, `Enter`, then `Ctrl+X` in nano). + +#### Install btop, sensors, and powertop + +```sh +sudo apt install btop sensors powertop +``` + +You can now use `btop` to monitor the system and `sensors` to check CPU temperature. This is typically what I do when setting up a new server, it gives confidence that the thermal paste is applied correctly and the CPU is not overheating. Additionally, you can see how much memory and CPU are being used, and spot any services you may have forgotten to disable. + +For `powertop`, I recommend running it once and then configuring it to run automatically at boot. This helps us understand our server's power usage and find optimization opportunities. + +TODO: https://github.com/hubblo-org/scaphandre +TODO: Watch YouTube for some power consumption tips diff --git a/docusaurus/docs/kubernetes/k3s-backup.md b/docusaurus/docs/kubernetes/k3s-backup.md index 81d2521..f8ebc3d 100644 --- a/docusaurus/docs/kubernetes/k3s-backup.md +++ b/docusaurus/docs/kubernetes/k3s-backup.md @@ -2,93 +2,252 @@ title: K3S Backup --- -## Backup and Restore for Single-Node K3s Cluster Using SQLite +### **Part 1: Prerequisites , Cloudflare R2 Setup** -[Ansible Playbook](/ansible/playbooks/backup-k3s.yml) +Before we touch the cluster, let's prepare our backup destination. -When working with a single-node K3s cluster, the default datastore is [SQLite](https://docs.k3s.io/datastore/backup-restore#backup-and-restore-with-sqlite), which is a lightweight, file-based database. Unfortunately, K3s does not provide specialized tools for backing up SQLite in single-node configurations. +1. **Create an R2 Bucket:** -In contrast, if you're running a multi-node (High Availability) cluster using etcd as the datastore, K3s offers a convenient [`k3s etcd-snapshot`](https://docs.k3s.io/cli/etcd-snapshot) command for backups and recovery. However, this tool is not applicable for single-node clusters where SQLite is the default datastore. + - In your Cloudflare dashboard, go to **R2** and click **Create bucket**. + - Give it a unique name (e.g., `k3s-backup-repository`). Note this name. + - Note your **S3 Endpoint URL** from the bucket's main page. It looks like: `https://.r2.cloudflarestorage.com`. -### Why Manually Back Up? +2. **Create R2 API Credentials:** -SQLite backups in K3s require manual steps because: + - On the main R2 page, click **Manage R2 API Tokens**. + - Click **Create API Token**. + - Give it a name (e.g., `k3s-backup-token`) and grant it **Object Read & Write** permissions. + - Click **Create API Token** and securely copy the **Access Key ID** and the **Secret Access Key**. -* SQLite is a simple, file-based database, so backing it up is as easy as copying key directories. -* K3s doesn't provide automatic backup utilities for this. +You now have four critical pieces of information: -The good news is that manual backups are not too complicated. In this guide, we'll walk you through how to perform a manual backup and restore of K3s data using simple tools. +- Bucket Name +- S3 Endpoint URL +- Access Key ID +- Secret Access Key -## Backup and Restore for Single-Node K3s (SQLite) +### **Part 2: The Foundation , K3s Installation** -### Backup Process: +Install K3s on your server node. Using the default installation script is straightforward. -1. **Identify Critical Files**: - -- SQLite Database: `/var/lib/rancher/k3s/server/db/` -- TLS Certificates: `/var/lib/rancher/k3s/server/tls/` -- Join Token: `/var/lib/rancher/k3s/server/token` +```bash +curl -sfL https://get.k3s.io | sh - +# Wait a moment for it to start +sudo k3s kubectl get nodes +``` -2. Create Backup Folder on Local Machine: +### **Part 3: The Storage Layer , Longhorn Setup** + +We will install Longhorn using Helm, the standard package manager for Kubernetes. + +1. **Add the Longhorn Helm Repository:** -```bash -mkdir -p ~/k3s-backups/ + ```bash + helm repo add longhorn https://charts.longhorn.io + helm repo update + ``` + +2. **Install Longhorn:** + + ```bash + helm install longhorn longhorn/longhorn \ + --namespace longhorn-system \ + --create-namespace \ + --set persistence.defaultClass=true + ``` + + - `persistence.defaultClass=true`: This is crucial. It makes Longhorn the default storage provider for any `PersistentVolumeClaim` (PVC). + +3. **Verify the Installation:** + + ```bash + kubectl get pods -n longhorn-system --watch + # Wait until all pods are Running. This can take several minutes. + ``` + +4. **Configure Longhorn's Native Backup (Secondary Protection):** + + - Access the Longhorn UI. You can do this via port-forwarding: + ```bash + kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 + ``` + Now open `http://localhost:8080` in your browser. + - Navigate to **Settings \> Backup**. + - Set the **Backup Target** to your R2 endpoint and bucket: `s3://@/` (for R2, region can be `auto`). For example: `s3://k3s-backup-repository@auto/longhorn` + - Create a Kubernetes secret containing your R2 credentials: + ```bash + kubectl create secret generic r2-longhorn-secret -n longhorn-system \ + --from-literal=AWS_ACCESS_KEY_ID='YOUR_R2_ACCESS_KEY_ID' \ + --from-literal=AWS_SECRET_ACCESS_KEY='YOUR_R2_SECRET_ACCESS_KEY' + ``` + - Set the **Backup Target Credential Secret** in the Longhorn UI to `r2-longhorn-secret`. + - Click **Save**. + +### **Part 4: The Primary Backup Layer , Velero Setup** + +This is the core of our application recovery strategy. + +1. **Create a Credentials File for Velero:** + Create a file named `credentials-velero`: + + ```ini + [default] + aws_access_key_id = YOUR_R2_ACCESS_KEY_ID + aws_secret_access_key = YOUR_R2_SECRET_ACCESS_KEY + ``` + +2. **Install Velero with Helm:** + This command will install Velero and configure it to use R2 as the backup destination and enable the crucial CSI plugin for Longhorn snapshots. + + ```bash + helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts + helm repo update + + helm install velero vmware-tanzu/velero \ + --namespace velero \ + --create-namespace \ + --set-file credentials.secretContents.cloud=credentials-velero \ + --set configuration.provider=aws \ + --set configuration.backupStorageLocation.name=default \ + --set configuration.backupStorageLocation.bucket= \ + --set configuration.backupStorageLocation.config.region=auto \ + --set configuration.backupStorageLocation.config.s3Url= \ + --set-string snapshotsEnabled=true \ + --set-string deployRestic=false \ + --set initContainers[0].name=velero-plugin-for-aws \ + --set initContainers[0].image=velero/velero-plugin-for-aws:v1.10.0 \ + --set initContainers[0].volumeMounts[0].mountPath=/target \ + --set initContainers[0].volumeMounts[0].name=plugins \ + --set initContainers[1].name=velero-plugin-for-csi \ + --set initContainers[1].image=velero/velero-plugin-for-csi:v0.6.2 \ + --set initContainers[1].volumeMounts[0].mountPath=/target \ + --set initContainers[1].volumeMounts[0].name=plugins + ``` + +3. **Verify the Velero Installation:** + + ```bash + kubectl get pods -n velero --watch + # Wait for the velero pod to be Running. + ``` + + You have now installed Velero and given it access to your R2 bucket. + +### **Part 5: The Test , Break and Rebuild** + +Now for the fun part. Let's prove the system works. + +**Step 1: Deploy a Stateful Application** + +Create a file `my-app.yaml`: + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-app-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: longhorn + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Pod +metadata: + name: my-app-pod +spec: + containers: + - name: my-app + image: busybox + command: ["/bin/sh", "-c"] + args: + - while true; do + echo "$(date)" >> /data/test.log; + sleep 5; + done + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: my-app-pvc ``` -3. Copy Files from K3s Server to Local Machine: +Deploy it: ```bash -scp -r user@master_node:/var/lib/rancher/k3s/server/db ~/k3s-backups/ -scp -r user@master_node:/var/lib/rancher/k3s/server/tls ~/k3s-backups/ -scp user@master_node:/var/lib/rancher/k3s/server/token ~/k3s-backups/ +kubectl apply -f my-app.yaml ``` -4. (Optional) Compress the Backup: +**Step 2: Create a Backup with Velero** ```bash -tar -czf ~/k3s-backups/k3s-backup-$(date +%F_%T).tar.gz -C ~/k3s-backups db tls token +velero backup create my-first-backup --include-namespaces default ``` -### Restore Process: +This command tells Velero to back up all resources in the `default` namespace. Because you enabled the CSI plugin, Velero automatically finds the PVC and triggers Longhorn to create a volume snapshot, which is then backed up alongside the Pod and PVC definitions. -1. Stop K3s: +**Step 3: The Disaster , Destroy the Cluster** + +Let's simulate a total cluster failure. We will completely remove K3s. ```bash -sudo systemctl stop k3s +# First, delete the application to simulate data loss +kubectl delete -f my-app.yaml + +# Now, obliterate the cluster +/usr/local/bin/k3s-uninstall.sh ``` -2. Upload Backup from Local Machine to K3s Node: +Your cluster is now gone. All that remains is your R2 bucket. -```bash -scp -r ~/k3s-backups/db user@master_node:/var/lib/rancher/k3s/server/ -scp -r ~/k3s-backups/tls user@master_node:/var/lib/rancher/k3s/server/ -scp ~/k3s-backups/token user@master_node:/var/lib/rancher/k3s/server/ -``` +**Step 4: The Recovery , Rebuild and Restore** -3. Ensure Correct Permissions: +1. **Re-install a Clean K3s Cluster:** -```bash -sudo chown -R root:root /var/lib/rancher/k3s/server/db /var/lib/rancher/k3s/server/tls -sudo chown root:root /var/lib/rancher/k3s/server/token -sudo chmod 0600 /var/lib/rancher/k3s/server/token -``` + ```bash + curl -sfL https://get.k3s.io | sh - + sudo k3s kubectl get nodes + ``` -4. Start K3s: +2. **Re-install Longhorn:** You must have the storage provider available before you can restore data to it. -```bash -sudo systemctl start k3s -``` + ```bash + helm repo add longhorn https://charts.longhorn.io + helm repo update + helm install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace --set persistence.defaultClass=true + # Wait for Longhorn pods to be running + kubectl get pods -n longhorn-system --watch + ``` -5. Verify Cluster Health: +3. **Re-install Velero with the EXACT same configuration:** Run the same Helm install command from Part 4 again. This is critical, as it reconnects Velero to your R2 bucket where the backups live. -```bash -kubectl get nodes -kubectl get pods --all-namespaces -``` +4. **Verify Velero Sees Your Backup:** + + ```bash + # It may take a minute for Velero to sync. + velero backup get + # You should see 'my-first-backup' in the list! + ``` + +5. **Restore Everything:** + + ```bash + velero restore create --from-backup my-first-backup + ``` -### Summary: +6. **Verify the Restore:** -- Backup: Copy `db/`, `tls/`, and `token` from `/var/lib/rancher/k3s/server/` to your local machine. + ```bash + kubectl get pods --watch + # You will see 'my-app-pod' get created. -- Restore: Stop K3s, upload those files back to the node, ensure permissions, and start K3s again. + # Check the data that was restored + kubectl exec my-app-pod -- cat /data/test.log + ``` +You will see the log file with the timestamps from before you destroyed the cluster. You have successfully recovered your application and its persistent state from nothing but a backup file in Cloudflare R2. diff --git a/docusaurus/docs/resources.md b/docusaurus/docs/resources.md new file mode 100644 index 0000000..ab74ed6 --- /dev/null +++ b/docusaurus/docs/resources.md @@ -0,0 +1,15 @@ +--- +title: Resources +--- + +I've been in this industry for over 20 years, and I can with certainty say that there's no better community than the one in this industry. Everytyhing that we build, all the new things we come up with, are the combination of the work of many people. In simple terms, we are all sharing knowledge with each other, and building up on top of each other's work. + +This very guide is no exception of that. I came up with many ideas, but also, got inspired by the work of others. + +While I haven't been using the things from these resources directly, e.g. some might have only challenged me to think differently, I'll still share them for the sake of being transparent, aka, being a good citizen of the community. + +The order is random, and I'll write them down as I go through my notes. + +- [Pi Kubernetes Cluster](https://picluster.ricsanfre.com/) +- [k3s.rocks](https://k3s.rocks/) +- [RPI4Cluster](https://rpi4cluster.com/) diff --git a/docusaurus/docs/terminology.md b/docusaurus/docs/terminology.md deleted file mode 100644 index a6033bf..0000000 --- a/docusaurus/docs/terminology.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: Terminology ---- - -This course covers much more than Kubernetes, touching on storage, networking, security, and many other topics. To fully understand the material, it's important to learn a wide range of concepts. While this might seem challenging, it also makes the process satisfying and rewarding. - -In my experience as a teacher and professional, a common mistake people make when learning is not taking the time to pause and understand new terms or concepts. Instead, they rush ahead, which can weaken their foundation and make it harder to build a deep understanding later. - -As you go through this course, give yourself the time and patience to pause whenever you come across a new protocol or term. At the very least, take a moment to understand it on a basic level so you can confidently move forward. - -In today’s world, it’s easy to copy and paste solutions, especially with tools like AI. However, this can lead us to skip the important step of going back to understand the basics. - ---- - -### Swap Memory - -[Swap memory](https://serverfault.com/questions/48486/what-is-swap-memory), also known as swap space, is a portion of a computer's hard drive or SSD that acts as an extension of RAM. When RAM is full, the operating system moves inactive data from RAM to the swap space, freeing up RAM for active tasks and improving system performance. Accessing data in swap is slower than accessing data in RAM, but it prevents the system from crashing when it runs out of RAM. diff --git a/docusaurus/sidebars.ts b/docusaurus/sidebars.ts index 5f0fb50..95cc01c 100644 --- a/docusaurus/sidebars.ts +++ b/docusaurus/sidebars.ts @@ -275,12 +275,6 @@ const sidebars: SidebarsConfig = { }, ], }, - - { - id: "terminology", - type: "doc", - label: "Terminology", - }, ], }; diff --git a/exercises/full-stack-example/.drawio b/exercises/full-stack-example/.drawio index 8cc5c81..0306294 100644 --- a/exercises/full-stack-example/.drawio +++ b/exercises/full-stack-example/.drawio @@ -1,63 +1,75 @@ - + - - - - - - - + - - + + - + - + - + - + - + - + - + - + - + - + - + - + - + + + + + + + + + + + + + + + + + + + diff --git a/exercises/full-stack-example/nodejs-api-configmap.yaml b/exercises/full-stack-example/nodejs-api-configmap.yaml index c3c77f7..9d56f7f 100644 --- a/exercises/full-stack-example/nodejs-api-configmap.yaml +++ b/exercises/full-stack-example/nodejs-api-configmap.yaml @@ -7,5 +7,7 @@ data: POSTGRES_USER: appuser POSTGRES_PASSWORD: appuser_password POSTGRES_DB: app - POSTGRES_HOST: my-postgres-cluster-rw - POSTGRES_PORT: "5432" \ No newline at end of file + POSTGRES_HOST: my-postgres-cluster-rw.postgres-db.svc.cluster.local + POSTGRES_PORT: "5432" + + \ No newline at end of file diff --git a/exercises/full-stack-example/nodejs-api-deployment.yaml b/exercises/full-stack-example/nodejs-api-deployment.yaml index 4a6098a..514726e 100644 --- a/exercises/full-stack-example/nodejs-api-deployment.yaml +++ b/exercises/full-stack-example/nodejs-api-deployment.yaml @@ -21,6 +21,8 @@ spec: volumeMounts: - name: data mountPath: /app/data + - name: config-volume + mountPath: /app/config envFrom: - configMapRef: name: nodejs-api-configmap @@ -35,3 +37,6 @@ spec: - name: data persistentVolumeClaim: claimName: nodejs-api-pvc + - name: config-volume + configMap: + name: nodejs-api-configmap diff --git a/exercises/full-stack-example/nodejs-api-ingress.yaml b/exercises/full-stack-example/nodejs-api-ingress.yaml index 587c936..d22d2ae 100644 --- a/exercises/full-stack-example/nodejs-api-ingress.yaml +++ b/exercises/full-stack-example/nodejs-api-ingress.yaml @@ -1,12 +1,12 @@ apiVersion: networking.k8s.io/v1 kind: Ingress metadata: - name: traefik-ingress + name: nodejs-ingress namespace: nodejs spec: ingressClassName: traefik rules: - - host: node-api.local.host + - host: nodejs.local.host http: paths: - path: / @@ -15,4 +15,4 @@ spec: service: name: nodejs-api port: - number: 3000 \ No newline at end of file + number: 80 \ No newline at end of file diff --git a/exercises/full-stack-example/nodejs-api-postgres.yaml b/exercises/full-stack-example/nodejs-api-postgres.yaml new file mode 100644 index 0000000..0a26eb3 --- /dev/null +++ b/exercises/full-stack-example/nodejs-api-postgres.yaml @@ -0,0 +1,16 @@ +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster + namespace: postgres-db +spec: + instances: 3 + primaryUpdateMethod: switchover + storage: + size: 1Gi + storageClass: my-first-storage-class + bootstrap: + initdb: + # Avoid creating the default app database + postInitSQL: + - CREATE USER appuser WITH PASSWORD 'appuser_password'; diff --git a/exercises/full-stack-example/nodejs-api-redis-deployment.yaml b/exercises/full-stack-example/nodejs-api-redis-deployment.yaml new file mode 100644 index 0000000..482d2f4 --- /dev/null +++ b/exercises/full-stack-example/nodejs-api-redis-deployment.yaml @@ -0,0 +1,35 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: redis + labels: + app: redis +spec: + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:7.0 + ports: + - containerPort: 6379 + name: redis_port + resources: + limits: + memory: "256Mi" + cpu: "250m" + requests: + memory: "128Mi" + cpu: "100m" + volumeMounts: + - name: redis-data + mountPath: /data + volumes: + - name: redis-data + emptyDir: {} diff --git a/exercises/full-stack-example/nodejs-api-redis-pvc.yaml b/exercises/full-stack-example/nodejs-api-redis-pvc.yaml new file mode 100644 index 0000000..80a66b0 --- /dev/null +++ b/exercises/full-stack-example/nodejs-api-redis-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nodejs-redis-pvc + namespace: nodejs +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: longhorn diff --git a/exercises/full-stack-example/nodejs-api-redis-service.yaml b/exercises/full-stack-example/nodejs-api-redis-service.yaml new file mode 100644 index 0000000..2803706 --- /dev/null +++ b/exercises/full-stack-example/nodejs-api-redis-service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + selector: + app: redis + ports: + - protocol: TCP + port: 6379 + targetPort: redis_port + type: ClusterIP From 779bb7e0166f548c013d1d2f57a9c132592ce124 Mon Sep 17 00:00:00 2001 From: Aleksandar Grbic Date: Mon, 12 Jan 2026 22:56:25 +0100 Subject: [PATCH 2/3] Add Prettier configuration and update Docusaurus settings - Introduced .prettierignore and .prettierrc files to standardize code formatting. - Updated Docusaurus configuration to change Prism themes for improved syntax highlighting. - Adjusted package.json to update dependencies for Prettier, TailwindCSS, and TypeScript. - Enhanced documentation with clearer language and formatting improvements across various files. --- docusaurus/.prettierignore | 13 + docusaurus/.prettierrc | 11 + .../docs/ansible/automation-with-ansible.md | 63 +- .../databases/databases-within-kubernetes.md | 134 +++- .../docs/databases/setup-cloudnative-pg.md | 150 +++- .../before-we-start.md | 29 +- .../hardware-raspberry-pi-setup/hardware.mdx | 258 ++++--- .../mini-pcs-setup.md | 77 +- .../raspberry-pi-setup.md | 142 +++- .../kubernetes/anatomy-of-kubectl-command.mdx | 10 +- .../kubernetes/anatomy-of-kubernetes-yaml.mdx | 236 +++++- .../kubernetes/common-kubernetes-commands.md | 120 +++- .../getting-started-with-kubernetes.md | 63 +- .../kubernetes/k3s-backup-cloudnative-pg.md | 679 ++++++++++++++++++ .../k3s-backup-disaster-recovery.md | 679 ++++++++++++++++++ docusaurus/docs/kubernetes/k3s-backup-etcd.md | 265 +++++++ .../docs/kubernetes/k3s-backup-longhorn.md | 355 +++++++++ .../docs/kubernetes/k3s-backup-velero.md | 398 ++++++++++ docusaurus/docs/kubernetes/k3s-backup.md | 312 +++----- .../docs/kubernetes/k3s-maintenance-health.md | 324 +++++++++ .../docs/kubernetes/k3s-maintenance-nodes.md | 408 +++++++++++ .../k3s-maintenance-troubleshooting.md | 498 +++++++++++++ .../kubernetes/k3s-maintenance-updates.md | 256 +++++++ docusaurus/docs/kubernetes/k3s-maintenance.md | 121 ++-- docusaurus/docs/kubernetes/k3s-setup.md | 100 ++- .../docs/kubernetes/kubernetes-80-20-rule.mdx | 9 +- .../kubernetes/kubernetes-yml-structure.md | 44 +- docusaurus/docs/kubernetes/setup-argocd.md | 534 ++++++++++++++ docusaurus/docs/kubernetes/setup-vault.md | 540 ++++++++++++++ .../docs/kubernetes/what-is-kubernetes.md | 77 +- ...raefik-dashboard-inside-the-k3s-cluster.md | 80 ++- .../kubernetes-networking-explained.md | 86 ++- .../networking/mikrotik/common-scenarios.mdx | 4 +- .../mikrotik/configure-email-on-mikrotik.md | 22 +- .../networking/mikrotik/core-concepts.mdx | 4 +- .../mikrotik/device-configuration.mdx | 4 +- .../mikrotik/dynamic-dns-with-cloudflare.md | 103 ++- .../networking/mikrotik/firewall-logic.mdx | 4 +- .../networking/mikrotik/lenovo-m920q-roas.mdx | 212 ++++-- .../networking/mikrotik/network-overview.mdx | 4 +- .../mikrotik/summary-and-checklist.mdx | 4 +- .../docs/networking/mikrotik/vlan-schema.mdx | 4 +- .../docs/networking/mikrotik/why-mikrotik.mdx | 53 +- docusaurus/docs/networking/setup-metallb.md | 247 +++++-- .../understanding-network-components.md | 144 +++- docusaurus/docs/resources.md | 13 +- .../docs/storage/setup-longhorn-dashboard.md | 52 +- docusaurus/docs/storage/setup-longhorn.md | 83 ++- .../understanding-longhorn-concepts.md | 125 ++-- docusaurus/docs/welcome.md | 50 +- docusaurus/docs/what-we-will-learn.md | 19 +- docusaurus/docs/why-is-it-hard.md | 35 +- docusaurus/docs/why.md | 64 +- docusaurus/docusaurus.config.ts | 4 +- docusaurus/package.json | 38 +- docusaurus/sidebars.ts | 177 +++-- .../src/components/ExplanationCard/index.tsx | 14 +- .../KubernetesYAMLAnatomy/configmap.ts | 12 +- .../KubernetesYAMLAnatomy/deployment.ts | 20 +- .../KubernetesYAMLAnatomy/index.tsx | 26 +- .../KubernetesYAMLAnatomy/ingress.ts | 21 +- .../components/KubernetesYAMLAnatomy/pvc.ts | 17 +- .../KubernetesYAMLAnatomy/secret.ts | 84 +-- .../KubernetesYAMLAnatomy/service.ts | 14 +- .../MikrotikNetworking/CodeBlock/index.tsx | 2 +- docusaurus/src/css/custom.css | 132 +++- .../static/img/programmer-network-logo.svg | 6 +- 67 files changed, 7703 insertions(+), 1185 deletions(-) create mode 100644 docusaurus/.prettierignore create mode 100644 docusaurus/.prettierrc create mode 100644 docusaurus/docs/kubernetes/k3s-backup-cloudnative-pg.md create mode 100644 docusaurus/docs/kubernetes/k3s-backup-disaster-recovery.md create mode 100644 docusaurus/docs/kubernetes/k3s-backup-etcd.md create mode 100644 docusaurus/docs/kubernetes/k3s-backup-longhorn.md create mode 100644 docusaurus/docs/kubernetes/k3s-backup-velero.md create mode 100644 docusaurus/docs/kubernetes/k3s-maintenance-health.md create mode 100644 docusaurus/docs/kubernetes/k3s-maintenance-nodes.md create mode 100644 docusaurus/docs/kubernetes/k3s-maintenance-troubleshooting.md create mode 100644 docusaurus/docs/kubernetes/k3s-maintenance-updates.md create mode 100644 docusaurus/docs/kubernetes/setup-argocd.md create mode 100644 docusaurus/docs/kubernetes/setup-vault.md diff --git a/docusaurus/.prettierignore b/docusaurus/.prettierignore new file mode 100644 index 0000000..50e9cec --- /dev/null +++ b/docusaurus/.prettierignore @@ -0,0 +1,13 @@ +# Dependencies +node_modules/ + +# Build outputs +build/ +.docusaurus/ + +# Static assets +static/ + +# Generated files +*.min.js +*.min.css diff --git a/docusaurus/.prettierrc b/docusaurus/.prettierrc new file mode 100644 index 0000000..da12aa7 --- /dev/null +++ b/docusaurus/.prettierrc @@ -0,0 +1,11 @@ +{ + "printWidth": 80, + "proseWrap": "always", + "tabWidth": 2, + "useTabs": false, + "semi": false, + "singleQuote": true, + "trailingComma": "es5", + "arrowParens": "always", + "endOfLine": "lf" +} diff --git a/docusaurus/docs/ansible/automation-with-ansible.md b/docusaurus/docs/ansible/automation-with-ansible.md index ed7d089..43df692 100644 --- a/docusaurus/docs/ansible/automation-with-ansible.md +++ b/docusaurus/docs/ansible/automation-with-ansible.md @@ -3,15 +3,27 @@ sidebar_position: 4 title: Automation with Ansible --- -After setting up one of our Raspberry Pi devices, it's easy to see how tedious it would be to SSH into the other three devices and manually repeat each step. This process is not only time-consuming but also error-prone, given that each step is done manually. +After setting up one of our Raspberry Pi devices, it's easy to see how tedious +it would be to SSH into the other three devices and manually repeat each step. +This process is not only time-consuming but also error-prone, given that each +step is done manually. -To make things more efficient, we can turn to **Ansible**, a tool that allows us to automate tasks across multiple machines. To get started, refer to the official [Getting Started](https://docs.ansible.com/ansible/latest/getting_started/index.html) guide. +To make things more efficient, we can turn to **Ansible**, a tool that allows us +to automate tasks across multiple machines. To get started, refer to the +official +[Getting Started](https://docs.ansible.com/ansible/latest/getting_started/index.html) +guide. ## Installation and PATH Configuration -Once Ansible has been installed, you \*might\*\* encounter a warning indicating that some Ansible executables (like `ansible-doc`, `ansible-galaxy`, and others) are installed in `/home/YOUR_USER/.local/bin`, which is not included in your system’s PATH. +Once Ansible has been installed, you \*might\*\* encounter a warning indicating +that some Ansible executables (like `ansible-doc`, `ansible-galaxy`, and others) +are installed in `/home/YOUR_USER/.local/bin`, which is not included in your +system’s PATH. -To resolve this, you will need to edit your shell profile. If you’re using Bash, open the `.bashrc` file with `nano ~/.bashrc`. For Zsh users, you should open `.zshrc` by running `nano ~/.zshrc`. +To resolve this, you will need to edit your shell profile. If you’re using Bash, +open the `.bashrc` file with `nano ~/.bashrc`. For Zsh users, you should open +`.zshrc` by running `nano ~/.zshrc`. At the end of the file, you should add this line: @@ -19,27 +31,38 @@ At the end of the file, you should add this line: export PATH="$HOME/.local/bin:$PATH" ``` -Once you’ve saved and closed the file, reload your shell profile so that the new PATH takes effect. For Bash, you can run `source ~/.bashrc`, and for Zsh users, run `source ~/.zshrc`. After performing these steps, you should no longer see warnings related to the Ansible executables. +Once you’ve saved and closed the file, reload your shell profile so that the new +PATH takes effect. For Bash, you can run `source ~/.bashrc`, and for Zsh users, +run `source ~/.zshrc`. After performing these steps, you should no longer see +warnings related to the Ansible executables. ## Creating a Project Directory -With the setup completed, it's a good idea to create a dedicated directory to organize all your Ansible files. You can create a new directory called `ansible` and navigate into it using: +With the setup completed, it's a good idea to create a dedicated directory to +organize all your Ansible files. You can create a new directory called `ansible` +and navigate into it using: ```bash mkdir ansible && cd ansible ``` -In this folder, you’ll store your playbooks, inventory files, and any other Ansible configurations. +In this folder, you’ll store your playbooks, inventory files, and any other +Ansible configurations. ## Setting Up Ansible Vault -Ansible Vault is a tool that allows you to securely store sensitive information such as passwords, IP addresses, or other secrets. To initialize a new encrypted vault file, use the following command: +Ansible Vault is a tool that allows you to securely store sensitive information +such as passwords, IP addresses, or other secrets. To initialize a new encrypted +vault file, use the following command: ```bash ansible-vault create secrets.yml ``` -When prompted, set a password, this password will be required every time you access or modify the vault file. After you’ve set the password, you can include sensitive data in the `secrets.yml` file using YAML format. For example, you might include the IP addresses and credentials for each Raspberry Pi: +When prompted, set a password, this password will be required every time you +access or modify the vault file. After you’ve set the password, you can include +sensitive data in the `secrets.yml` file using YAML format. For example, you +might include the IP addresses and credentials for each Raspberry Pi: ```yaml all: @@ -54,22 +77,26 @@ all: ansible_host: 192.168.1.13 vars: ansible_user: pi - ansible_password: "your_password_here" + ansible_password: 'your_password_here' ``` -If you already have an unencrypted inventory file and want to encrypt it for security, you can do so by running: +If you already have an unencrypted inventory file and want to encrypt it for +security, you can do so by running: ```bash ansible-vault encrypt inventory.yml ``` -To use an encrypted inventory file when running a playbook, you’ll need to provide the vault password with the `--ask-vault-pass` option, like so: +To use an encrypted inventory file when running a playbook, you’ll need to +provide the vault password with the `--ask-vault-pass` option, like so: ```bash ansible-playbook -i secrets.yml --ask-vault-pass playbook.yml ``` -If you prefer not to manually enter the password every time, you can store the password in a text file such as `vault_pass.txt`. Ensure that the file is protected using the following command: +If you prefer not to manually enter the password every time, you can store the +password in a text file such as `vault_pass.txt`. Ensure that the file is +protected using the following command: ```bash chmod 600 vault_pass.txt @@ -87,6 +114,12 @@ If you need to make changes to the vault file, you can use the command: ansible-vault edit secrets.yml ``` -For more complex setups, such as managing different environments, you can create separate encrypted inventory files, like `prod_secrets.yml` and `dev_secrets.yml`. You can also organize secrets by groups or hosts by creating encrypted files for each, stored in the `group_vars` and `host_vars` directories. This approach allows for fine-grained control over your environments while keeping sensitive data secure. +For more complex setups, such as managing different environments, you can create +separate encrypted inventory files, like `prod_secrets.yml` and +`dev_secrets.yml`. You can also organize secrets by groups or hosts by creating +encrypted files for each, stored in the `group_vars` and `host_vars` +directories. This approach allows for fine-grained control over your +environments while keeping sensitive data secure. -In the context of our cluster, we won't be using Ansible in any complex way. We will be using it to automate the setup of our cluster. +In the context of our cluster, we won't be using Ansible in any complex way. We +will be using it to automate the setup of our cluster. diff --git a/docusaurus/docs/databases/databases-within-kubernetes.md b/docusaurus/docs/databases/databases-within-kubernetes.md index 04393c8..dfd11fe 100644 --- a/docusaurus/docs/databases/databases-within-kubernetes.md +++ b/docusaurus/docs/databases/databases-within-kubernetes.md @@ -2,94 +2,172 @@ title: Hosting Databases within Kubernetes --- -While researching and writing this Kubernetes series, I probably went through hundreds of articles, forum posts, and Reddit comments about a single core question: +While researching and writing this Kubernetes series, I probably went through +hundreds of articles, forum posts, and Reddit comments about a single core +question: -_Should I host a database within my Kubernetes cluster, or should I use a managed database service instead?_ +_Should I host a database within my Kubernetes cluster, or should I use a +managed database service instead?_ -It's a question that continues to pop up frequently, and honestly, as someone who's still relatively new to Kubernetes, it's not surprising why. The concern is valid and widely shared among both beginners and experts, not just in the Kubernetes community but also in modern DevOps and infrastructure contexts. +It's a question that continues to pop up frequently, and honestly, as someone +who's still relatively new to Kubernetes, it's not surprising why. The concern +is valid and widely shared among both beginners and experts, not just in the +Kubernetes community but also in modern DevOps and infrastructure contexts. ## The Historical Context: Stateless vs. Stateful -For a long time, the best practices around Kubernetes have revolved around the concept of "stateless" applications. The principles behind Kubernetes were designed to scale and recover from failures effortlessly. Because of its inherent design for self-healing and declarative state, Kubernetes excels at running stateless applications, where any pod or container can die, be recreated, and get back to running almost immediately, with no impact on the application's availability, given that these don’t carry persistent data within themselves. +For a long time, the best practices around Kubernetes have revolved around the +concept of "stateless" applications. The principles behind Kubernetes were +designed to scale and recover from failures effortlessly. Because of its +inherent design for self-healing and declarative state, Kubernetes excels at +running stateless applications, where any pod or container can die, be +recreated, and get back to running almost immediately, with no impact on the +application's availability, given that these don’t carry persistent data within +themselves. Here's a typical example: -- Imagine a web server or an API service. If one of the replicas of a stateless service goes down or gets killed by the scheduler, Kubernetes just spins up another one somewhere else, connects it to the load balancer, and resumes traffic, all without anyone noticing. Simple! +- Imagine a web server or an API service. If one of the replicas of a stateless + service goes down or gets killed by the scheduler, Kubernetes just spins up + another one somewhere else, connects it to the load balancer, and resumes + traffic, all without anyone noticing. Simple! -But when it comes to _stateful workloads_ like _databases_, it's a different story. This is where Kubernetes' stateless-first orientation starts to clash with the persistence and durability requirements of databases. The whole point of a database is to store data in a reliable and consistent way that survives pod failures, node restarts, or even an entire cluster shutting down. +But when it comes to _stateful workloads_ like _databases_, it's a different +story. This is where Kubernetes' stateless-first orientation starts to clash +with the persistence and durability requirements of databases. The whole point +of a database is to store data in a reliable and consistent way that survives +pod failures, node restarts, or even an entire cluster shutting down. -> The dilemma boils down to this simple point: **How do we reconcile stateless infrastructure with stateful services like databases?** +> The dilemma boils down to this simple point: **How do we reconcile stateless +> infrastructure with stateful services like databases?** ### The Challenge of Stateful Applications -When you introduce stateful services, such as databases, into a Kubernetes cluster, you encounter some key challenges: +When you introduce stateful services, such as databases, into a Kubernetes +cluster, you encounter some key challenges: Persistent Storage: -- Stateless apps don’t care about storage or data. In contrast, a database relies heavily on persistent storage to store and retrieve data without losing it. Fortunately, Kubernetes has matured in this area with components such as Persistent Volumes (PVs) and Persistent Volume Claims (PVCs), which allow pods to retain data even if they are recreated. But managing these can still be tricky, especially in scenarios of node failure or during cluster migrations. +- Stateless apps don’t care about storage or data. In contrast, a database + relies heavily on persistent storage to store and retrieve data without losing + it. Fortunately, Kubernetes has matured in this area with components such as + Persistent Volumes (PVs) and Persistent Volume Claims (PVCs), which allow pods + to retain data even if they are recreated. But managing these can still be + tricky, especially in scenarios of node failure or during cluster migrations. Data Consistency and Durability: -- Databases are critical to maintaining data consistency and often need to replicate data across nodes to ensure durability and high availability. Any deployment failure or pod misplacement could lead to potential data corruption or downtime. Using stateful sets for databases helps address this, but it requires careful orchestration of failover, recovery, and scaling. +- Databases are critical to maintaining data consistency and often need to + replicate data across nodes to ensure durability and high availability. Any + deployment failure or pod misplacement could lead to potential data corruption + or downtime. Using stateful sets for databases helps address this, but it + requires careful orchestration of failover, recovery, and scaling. Disaster Recovery and Backups: -- When a database is managed independently of Kubernetes (e.g., through a cloud provider), backup and restore processes are simplified. In Kubernetes, organizations need to carefully define backup strategies to avoid data loss during disruptions. +- When a database is managed independently of Kubernetes (e.g., through a cloud + provider), backup and restore processes are simplified. In Kubernetes, + organizations need to carefully define backup strategies to avoid data loss + during disruptions. Performance and Resource Contention: -- Applications running in a Kubernetes cluster often compete for shared resources (CPU, memory, I/O bandwidth). Large, resource-hungry databases may face performance bottlenecks, especially in clusters designed primarily for serving stateless microservices. Dedicated hosting of databases reduces the risk of congestion and performance hits. +- Applications running in a Kubernetes cluster often compete for shared + resources (CPU, memory, I/O bandwidth). Large, resource-hungry databases may + face performance bottlenecks, especially in clusters designed primarily for + serving stateless microservices. Dedicated hosting of databases reduces the + risk of congestion and performance hits. Scaling: -- Scaling stateless applications in Kubernetes is trivial, up and down scaling is as simple as updating the replica count of a deployment. Scaling stateful applications, particularly relational databases, is much more complex. Horizontal scaling for databases often requires complex sharding or replication, each with its own intricacies. +- Scaling stateless applications in Kubernetes is trivial, up and down scaling + is as simple as updating the replica count of a deployment. Scaling stateful + applications, particularly relational databases, is much more complex. + Horizontal scaling for databases often requires complex sharding or + replication, each with its own intricacies. ## When Does Database Hosting in Kubernetes Make Sense? -Kubernetes has developed significantly since its stateless-first beginnings, and modern workloads have shown that stateful applications, including databases, can be successfully hosted in Kubernetes, but it comes with trade-offs that you need to carefully assess based on your use case. +Kubernetes has developed significantly since its stateless-first beginnings, and +modern workloads have shown that stateful applications, including databases, can +be successfully hosted in Kubernetes, but it comes with trade-offs that you need +to carefully assess based on your use case. Here are situations where hosting a database in Kubernetes might make sense: ### Portability Across Multiple Environments -If you want consistency across your development, staging, and production environments, Kubernetes offers the advantage of running databases exactly the same way anywhere, whether that's on-premises, in the cloud, or even across hybrid-cloud setups. With the right configurations, you can move your entire application, including its database, as a single, unified package. +If you want consistency across your development, staging, and production +environments, Kubernetes offers the advantage of running databases exactly the +same way anywhere, whether that's on-premises, in the cloud, or even across +hybrid-cloud setups. With the right configurations, you can move your entire +application, including its database, as a single, unified package. ### Cost-Efficiency with Self-Hosting -Managed cloud databases provide convenience and reliability but come at a cost (often significant when scaling out). Running a database inside your Kubernetes cluster, especially in on-premise environments, can be much more cost-efficient. It allows for better utilization of server capacity, as you’ll be using the same resources to host both the application and database. +Managed cloud databases provide convenience and reliability but come at a cost +(often significant when scaling out). Running a database inside your Kubernetes +cluster, especially in on-premise environments, can be much more cost-efficient. +It allows for better utilization of server capacity, as you’ll be using the same +resources to host both the application and database. ### Advanced Kubernetes Features -Kubernetes has introduced a variety of features that make running databases smoother: +Kubernetes has introduced a variety of features that make running databases +smoother: -- StatefulSets: These provide ordered deployment, scaling, and self-healing of persistent pods used with your database. -- Persistent Volumes & Claims: Enable your pods to store data independently of their lifecycle, ensuring persistent data even if pods die. -- Operators: Kubernetes operators (e.g., for MySQL, PostgreSQL, MongoDB) have become more capable in simplifying the management of complex stateful apps such as databases, handling replication, failover, backups, and more automatically. +- StatefulSets: These provide ordered deployment, scaling, and self-healing of + persistent pods used with your database. +- Persistent Volumes & Claims: Enable your pods to store data independently of + their lifecycle, ensuring persistent data even if pods die. +- Operators: Kubernetes operators (e.g., for MySQL, PostgreSQL, MongoDB) have + become more capable in simplifying the management of complex stateful apps + such as databases, handling replication, failover, backups, and more + automatically. ## When Should You Use Managed Databases? -Though running stateful services and databases in Kubernetes is possible, for many teams, the complexities may outweigh the benefits. In particular, managed databases (e.g., AWS RDS, Google Cloud SQL, Azure Database for PostgreSQL/MySQL, etc.) have remained a more popular choice in many production environments. +Though running stateful services and databases in Kubernetes is possible, for +many teams, the complexities may outweigh the benefits. In particular, managed +databases (e.g., AWS RDS, Google Cloud SQL, Azure Database for PostgreSQL/MySQL, +etc.) have remained a more popular choice in many production environments. -Here are some reasons why you might opt for a managed database instead of self-hosting in Kubernetes: +Here are some reasons why you might opt for a managed database instead of +self-hosting in Kubernetes: ### Focused Reliability -Managed database services are specifically optimized for uptime, with guarantees around availability, fault tolerance, and backups. Cloud providers take care of infrastructure management, including failover and hardware reliability, which is ideal for workloads requiring strong service level agreements (SLAs). +Managed database services are specifically optimized for uptime, with guarantees +around availability, fault tolerance, and backups. Cloud providers take care of +infrastructure management, including failover and hardware reliability, which is +ideal for workloads requiring strong service level agreements (SLAs). ### Simpler Setup and Maintenance: -With managed databases, you don’t need to worry about keeping your database software up to date, scaling it as your system grows, ensuring data is backed up, or managing disaster recovery strategies. This level of automation around operational concerns can drastically reduce maintenance overhead. +With managed databases, you don’t need to worry about keeping your database +software up to date, scaling it as your system grows, ensuring data is backed +up, or managing disaster recovery strategies. This level of automation around +operational concerns can drastically reduce maintenance overhead. ### Scalability Without Complexity: -Cloud-managed databases allow you to scale up (vertically) or replicate databases easier, without having to configure sharding or complex replication setups typically required for self-hosted databases. +Cloud-managed databases allow you to scale up (vertically) or replicate +databases easier, without having to configure sharding or complex replication +setups typically required for self-hosted databases. ## Conclusion -The decision on whether to host your database on Kubernetes or use a managed database highly depends on your specific needs. From cost-efficiency to required complexity, there are legitimate use cases for both approaches. +The decision on whether to host your database on Kubernetes or use a managed +database highly depends on your specific needs. From cost-efficiency to required +complexity, there are legitimate use cases for both approaches. In general: -If you don't want to manage the complexities of running and maintaining a database (backups, scaling, failover, etc.), a managed database service might be your best bet. +If you don't want to manage the complexities of running and maintaining a +database (backups, scaling, failover, etc.), a managed database service might be +your best bet. -However, if you require higher control, flexibility, or have specific portability needs (e.g., managing everything in Kubernetes, running on-premises, or multi-cloud without cloud-vendor lock-in), hosting a database within Kubernetes might make more sense. +However, if you require higher control, flexibility, or have specific +portability needs (e.g., managing everything in Kubernetes, running on-premises, +or multi-cloud without cloud-vendor lock-in), hosting a database within +Kubernetes might make more sense. diff --git a/docusaurus/docs/databases/setup-cloudnative-pg.md b/docusaurus/docs/databases/setup-cloudnative-pg.md index f7f9839..56eb11b 100644 --- a/docusaurus/docs/databases/setup-cloudnative-pg.md +++ b/docusaurus/docs/databases/setup-cloudnative-pg.md @@ -2,31 +2,100 @@ title: CloudNativePG Operator --- -### Install the CloudNativePG Operator +## Overview + +CloudNativePG is a Kubernetes operator for PostgreSQL that simplifies database +cluster management, providing features like high availability, automated +backups, and seamless upgrades. + +## Installation + +There are two ways to install CloudNativePG: via ArgoCD (recommended for GitOps +workflows) or manually using kubectl. + +### Option 1: Install via ArgoCD (Recommended) + +If you're using ArgoCD for GitOps, you can manage the CloudNativePG operator as +an ArgoCD Application. This ensures the operator stays up to date and is managed +declaratively. + +Create an ArgoCD Application manifest (e.g., `cloudnative-pg-application.yaml`): + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: cloudnative-pg-operator + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + namespace: argocd + server: https://kubernetes.default.svc + project: cloudnativepg-project + source: + path: releases/ + repoURL: https://github.com/cloudnative-pg/cloudnative-pg.git + targetRevision: main + directory: + recurse: false + include: 'cnpg-1.26.0.yaml' + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - ServerSideApply=true + - ApplyOutOfSyncOnly=true +``` + +Apply the application: + +```bash +kubectl apply -f cloudnative-pg-application.yaml +``` + +ArgoCD will automatically install and keep the operator updated. Check the +application status: + +```bash +kubectl get application cloudnative-pg-operator -n argocd +``` + +### Option 2: Install Manually using kubectl **Create the [CloudNativePG](https://cloudnative-pg.io) Namespace** -First, create a namespace for CloudNativePG. You don't have to do this, but it's good practice to separate operators into their own namespaces. + +First, create a namespace for CloudNativePG. You don't have to do this, but it's +good practice to separate operators into their own namespaces. ```bash kubectl create namespace cnpg-system ``` -**Install the [CloudNativePG](https://cloudnative-pg.io) Operator using kubectl** +**Install the [CloudNativePG](https://cloudnative-pg.io) Operator using +kubectl** -The CloudNativePG team provides a manifest file that’s hosted publicly. You can fetch it using `kubectl` directly from their GitHub repository and apply it to your cluster. +The CloudNativePG team provides a manifest file that’s hosted publicly. You can +fetch it using `kubectl` directly from their GitHub repository and apply it to +your cluster. ```bash # Take the latest version from: https://cloudnative-pg.io/documentation/current/installation_upgrade/ +# Example for version 1.26.0: kubectl apply --server-side -f \ - https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg/release-1.24/releases/cnpg-1.24.1.yaml + https://raw.githubusercontent.com/cloudnative-pg/cloudnative-pg/release-1.26/releases/cnpg-1.26.0.yaml ``` -This command applies all necessary resources such as CRDs, RBAC permissions, and the operator's Deployment. +This command applies all necessary resources such as CRDs, RBAC permissions, and +the operator's Deployment. **Verify the Deployment** -You can check if the CloudNativePG operator pod is running correctly in its namespace: +You can check if the CloudNativePG operator pod is running correctly in its +namespace: ```bash kubectl get pods -n cnpg-system @@ -39,16 +108,18 @@ NAME READY STATUS RESTARTS AGE cloudnative-pg-controller-manager 1/1 Running 0 1m ``` -At this point, the CloudNativePG operator is installed, and you’re ready to create PostgreSQL clusters. - +At this point, the CloudNativePG operator is installed, and you’re ready to +create PostgreSQL clusters. ### Deploy a PostgreSQL Cluster -Now that CloudNativePG is running, let's set up a simple PostgreSQL database cluster. +Now that CloudNativePG is running, let's set up a simple PostgreSQL database +cluster. **Create a Namespace for Your PostgreSQL Database** -For better organization, create a namespace for your PostgreSQL cluster if needed: +For better organization, create a namespace for your PostgreSQL cluster if +needed: ```bash kubectl create namespace postgres-db @@ -62,23 +133,27 @@ Save the following YAML into a file called `postgres-cluster.yaml`: apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: - name: my-postgres-cluster - namespace: postgres-db + name: my-postgres-cluster + namespace: postgres-db spec: - instances: 3 # Number of database instances - primaryUpdateMethod: switchover # Update strategy for the primary node - storage: - size: 1Gi # Storage size for persistent volumes - storageClass: longhorn + instances: 3 # Number of database instances + primaryUpdateMethod: switchover # Update strategy for the primary node + storage: + size: 1Gi # Storage size for persistent volumes + storageClass: longhorn ``` -This YAML creates a PostgreSQL cluster with 3 instances managed by CloudNativePG. Note the `storageClass` is set to `longhorn`, assuming you have Longhorn installed and set up as the default backend. You might want to adjust the `size` value of the storage (`1Gi`) if needed. +This YAML creates a PostgreSQL cluster with 3 instances managed by +CloudNativePG. Note the `storageClass` is set to `longhorn`, assuming you have +Longhorn installed and set up as the default backend. You might want to adjust +the `size` value of the storage (`1Gi`) if needed. 3 replicas of PostgreSQL pods will be created, providing High Availability. **Apply the PostgreSQL Cluster YAML** -Run the following command to deploy the PostgreSQL cluster to your Kubernetes cluster: +Run the following command to deploy the PostgreSQL cluster to your Kubernetes +cluster: ```bash kubectl apply -f postgres-cluster.yaml @@ -86,7 +161,8 @@ kubectl apply -f postgres-cluster.yaml **Verify Running PostgreSQL Pods** -After creating the cluster, confirm that the pods for your PostgreSQL cluster are created and running: +After creating the cluster, confirm that the pods for your PostgreSQL cluster +are created and running: ```bash kubectl get pods -n postgres-db @@ -101,12 +177,13 @@ my-postgres-cluster-2 1/1 Running 0 1m my-postgres-cluster-3 1/1 Running 0 1m ``` - **Access PostgreSQL** -To access PostgreSQL from your local machine, you'll need to port-forward one of the PostgreSQL services. +To access PostgreSQL from your local machine, you'll need to port-forward one of +the PostgreSQL services. -First, let's list the services that have been exposed by the CloudNativePG operator: +First, let's list the services that have been exposed by the CloudNativePG +operator: ```bash kubectl get svc -n postgres-db @@ -122,16 +199,20 @@ my-postgres-cluster-rw ClusterIP 10.43.242.201 5432/TCP 22 ``` - `my-postgres-cluster-r`: Typically routes to the **read** replica. -- `my-postgres-cluster-ro`: Provides a **read-only** interface for **non-primary** nodes. -- `my-postgres-cluster-rw`: Connects to the current **primary** node for **read/write** operations. +- `my-postgres-cluster-ro`: Provides a **read-only** interface for + **non-primary** nodes. +- `my-postgres-cluster-rw`: Connects to the current **primary** node for + **read/write** operations. -For example, to expose the `rw` service (which connects to the primary node), you can run: +For example, to expose the `rw` service (which connects to the primary node), +you can run: ```bash kubectl port-forward svc/my-postgres-cluster-rw 5432:5432 -n postgres-db ``` -Then, on your machine, you can connect to PostgreSQL at `localhost:5432` using any PostgreSQL client or `psql`. +Then, on your machine, you can connect to PostgreSQL at `localhost:5432` using +any PostgreSQL client or `psql`. For example: @@ -139,17 +220,20 @@ For example: psql -h localhost -U postgres ``` -By default, the `postgres` user is created, and you can set custom credentials by defining them in the cluster YAML under `spec.users`. - +By default, the `postgres` user is created, and you can set custom credentials +by defining them in the cluster YAML under `spec.users`. ### Optional: Persistent Volumes with Longhorn -To ensure the PostgreSQL data persists across node restarts, Kubernetes Persistent Volume Claims (PVCs) should use a proper storage class. +To ensure the PostgreSQL data persists across node restarts, Kubernetes +Persistent Volume Claims (PVCs) should use a proper storage class. -We assumed in the YAML above that you've configured Longhorn as your storage solution: +We assumed in the YAML above that you've configured Longhorn as your storage +solution: ```yaml storageClass: longhorn ``` -This makes use of Longhorn's reliable storage and ensures that your PostgreSQL data is replicated and safe from node failures. \ No newline at end of file +This makes use of Longhorn's reliable storage and ensures that your PostgreSQL +data is replicated and safe from node failures. diff --git a/docusaurus/docs/hardware-raspberry-pi-setup/before-we-start.md b/docusaurus/docs/hardware-raspberry-pi-setup/before-we-start.md index 4654cc2..0fcf8fc 100644 --- a/docusaurus/docs/hardware-raspberry-pi-setup/before-we-start.md +++ b/docusaurus/docs/hardware-raspberry-pi-setup/before-we-start.md @@ -2,13 +2,23 @@ title: Before We Start --- - + -Before we start, I want to mention that I've provided an [Ansible](../ansible/automation-with-ansible.md) playbook for most of the setup tasks. While I encourage you to use it, I also recommend doing things manually at first. Experience the process, let frustration build, and allow yourself to feel the annoyance. +Before we start, I want to mention that I've provided an +[Ansible](../ansible/automation-with-ansible.md) playbook for most of the setup +tasks. While I encourage you to use it, I also recommend doing things manually +at first. Experience the process, let frustration build, and allow yourself to +feel the annoyance. -Just as I did, I want you to truly understand why we use certain tools. You'll only internalize this by initially experiencing the challenges and then resolving them by introducing the right tools. +Just as I did, I want you to truly understand why we use certain tools. You'll +only internalize this by initially experiencing the challenges and then +resolving them by introducing the right tools. -Once you feel the process, once you get tired of typing the same commands over and over again, you can then use the Ansible playbook to automate the same tasks across the other devices. This is really the only way to truly understand the process and the tools we use, and why we use them. +Once you feel the process, once you get tired of typing the same commands over +and over again, you can then use the Ansible playbook to automate the same tasks +across the other devices. This is really the only way to truly understand the +process and the tools we use, and why we use them. While learning all this, I have failed countless times. @@ -23,6 +33,13 @@ While learning all this, I have failed countless times. - I had to re-configure the fans multiple times. - I had to re-configure the everything multiple times. -All this is part of the learning process. And that very failure is what will lead you to the right tools and understanding. And additionally, it will enforce your learning by making you remember the process and the tools we use. +All this is part of the learning process. And that very failure is what will +lead you to the right tools and understanding. And additionally, it will enforce +your learning by making you remember the process and the tools we use. -The most beautiful part about this entire learning process is that you will be able to learn so many things. And the most fascinating part is that they are all interconnected, and necessary for our cluster to work. It can often feel confusing learning some of these things in isolation, whereas when you learn them in the context of the cluster and this entire setup, it becomes much easier to understand. +The most beautiful part about this entire learning process is that you will be +able to learn so many things. And the most fascinating part is that they are all +interconnected, and necessary for our cluster to work. It can often feel +confusing learning some of these things in isolation, whereas when you learn +them in the context of the cluster and this entire setup, it becomes much easier +to understand. diff --git a/docusaurus/docs/hardware-raspberry-pi-setup/hardware.mdx b/docusaurus/docs/hardware-raspberry-pi-setup/hardware.mdx index afd3810..07398d5 100644 --- a/docusaurus/docs/hardware-raspberry-pi-setup/hardware.mdx +++ b/docusaurus/docs/hardware-raspberry-pi-setup/hardware.mdx @@ -3,75 +3,75 @@ sidebar_position: 3 title: Hardware Components --- -import ImageGallery from "react-image-gallery"; +import ImageGallery from 'react-image-gallery' -import Alert from "@site/src/components/Alert/index.tsx"; +import Alert from '@site/src/components/Alert/index.tsx' -While all the hardware is listed below, you can also get the full breakdown by opening the [excel sheet](https://docs.google.com/spreadsheets/d/17sQfTlpE3TCcj2Gz2uwkA2mW_AVzyXSE4EnZitLnAEY/edit?gid=0#gid=0). +While all the hardware is listed below, you can also get the full breakdown by +opening the +[excel sheet](https://docs.google.com/spreadsheets/d/17sQfTlpE3TCcj2Gz2uwkA2mW_AVzyXSE4EnZitLnAEY/edit?gid=0#gid=0). ## Affordable Hardware (If you don't want to follow my setup) -If you want to follow my k3s guide on a tight budget, here's the absolute cheapest Mikrotik combo I could find after some digging: +If you want to follow my k3s guide on a tight budget, here's the absolute +cheapest Mikrotik combo I could find after some digging: **Cheapest Router:** -- **[MikroTik hAP lite (RB941-2nD)](https://mikrotik.com/product/RB941-2nD-TC):** This is about as cheap as it gets. Prices jump around, but I've seen it listed for roughly 168 DKK (about $24 USD). -- **[MikroTik hEX lite (RB750r2)](https://mikrotik.com/product/RB750r2):** Another solid low-cost pick, usually around $40. You get 5 Ethernet ports, an 850MHz CPU, and 64MB RAM, plenty for a basic home lab. +- **[MikroTik hAP lite (RB941-2nD)](https://mikrotik.com/product/RB941-2nD-TC):** + This is about as cheap as it gets. Prices jump around, but I've seen it listed + for roughly 168 DKK (about $24 USD). +- **[MikroTik hEX lite (RB750r2)](https://mikrotik.com/product/RB750r2):** + Another solid low-cost pick, usually around $40. You get 5 Ethernet ports, an + 850MHz CPU, and 64MB RAM, plenty for a basic home lab. **Cheapest Switch:** -- **[MikroTik RB260GS](https://mikrotik.com/product/RB260GS):** 5-port Gigabit smart switch, also about $40. Bonus: it has an SFP cage if you ever want to mess with fiber. +- **[MikroTik RB260GS](https://mikrotik.com/product/RB260GS):** 5-port Gigabit + smart switch, also about $40. Bonus: it has an SFP cage if you ever want to + mess with fiber. -So, if you're really trying to keep costs down, grab the hAP lite router and the RB260GS switch. This combo gives you all the routing and switching you need to follow along with my guide, without spending a fortune on higher-end gear. +So, if you're really trying to keep costs down, grab the hAP lite router and the +RB260GS switch. This combo gives you all the routing and switching you need to +follow along with my guide, without spending a fortune on higher-end gear. -As of the Raspberry Pi and/or Mini PC, you can really use anything. For instance, if you have an old Intel laptop laying around, that is perfect. If you have an older version of Raspberry Pi, that is also perfect. No matter your hardware, you can still follow this guide and release your service, or services, by the end of it. +As of the Raspberry Pi and/or Mini PC, you can really use anything. For +instance, if you have an old Intel laptop laying around, that is perfect. If you +have an older version of Raspberry Pi, that is also perfect. No matter your +hardware, you can still follow this guide and release your service, or services, +by the end of it. ## Raspberry Pi's **[4x Raspberry Pi 4 B 8GB](https://www.raspberrypi.com/products/raspberry-pi-4-model-b/)** -_Note_: If you're looking to replicate my setup, opt for Raspberry Pi version 4 only if it's available at a significant discount. Otherwise, always go for the latest generation. Also, it's wise to select the model with the maximum RAM since we'll be running multiple services. More memory means better performance. +_Note_: If you're looking to replicate my setup, opt for Raspberry Pi version 4 +only if it's available at a significant discount. Otherwise, always go for the +latest generation. Also, it's wise to select the model with the maximum RAM +since we'll be running multiple services. More memory means better performance. ## Mini PCs -**[Lenovo Thinkcentre M900](https://www.ebay.com/sch/i.html?_nkw=Lenovo+thinkcentre+m900&_sacat=0&_from=R40&_trksid=m570.l1313)** - Slightly less powerful than the HP EliteDesk, but still a great choice. It came with 8GB of RAM, which I expanded to 24GB. - -**[HP EliteDesk 800 G3 Mini 65W](https://www.ebay.com/sch/i.html?_nkw=HP+EliteDesk+800+G3+Mini+65W&_sacat=0&_from=R40&_trksid=p2334524.m570.l1313&_odkw=Lenovo+thinkcentre+m900&_osacat=0)** - Expanded the HP EliteDesk with [32gb of RAM](https://www.amazon.de/dp/B07N1YBSPZ?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1). As you'll be browing for those on websites such is e.g. ebay, you'll find them in slightly different configurations, so make sure to check the specs before you buy. This one came with 16GB of RAM, which I moved to Lenovo. - -Mini PCs are a great alternative to Raspberry Pis. They are more powerful, more reliable, and more affordable. They are also more suitable for running a production-grade service. When it comes to idle power consumption, they are often on pair with the Raspberry Pi's, and as I will show you throughout this guide, we can put them to sleep using [Intel Active Management Technology (AMT)](https://en.wikipedia.org/wiki/Intel_Active_Management_Technology) and wake them up using a simple HTTP request. - -You want to get those on the used market, and as shown in the images below, you want to do basic maintenance to them. In my case, I've cleaned the fans and the heat sinks, and I've also added some thermal paste to the CPU and the GPU. Better the cooling, better the performance, and less noise and power consumption. - -I'm personally using [BSFF Thermal paste](https://www.amazon.de/dp/B09NLXSP4S?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1), but you can use whatever you want. To wipe the old thermal paste, you can use a simple paper towel or a microfiber cloth, preferably with some isopropyl alcohol. As you may see in the images below, I'm using [PURIVITA® Isopropanol 99.9%](https://www.amazon.de/-/en/dp/B0C4FKV9HY?ref_=ppx_hzsearch_conn_dt_b_fed_asin_title_1&th=1). Using isopropyl alcohol is a good idea, because it's a good solvent and it will help to remove the old thermal paste. - -As you can see on the images, these mini pc's often need a bit of maintenance, unless you purchase them from professional sellers. Ideally whatsoever, you want to buy them from regular people as they often undersell them out of incompetence. For instance, both of these PC's costed me less then a price of a new Raspberry Pi 4B. - -In mine, thermal paste was dry, and fans needed a bit of cleaning. Besides that, everything else was in a great shape. - -import Image from "@theme/IdealImage"; +**[Lenovo Thinkcentre M900](https://www.ebay.com/sch/i.html?_nkw=Lenovo+thinkcentre+m900&_sacat=0&_from=R40&_trksid=m570.l1313)** - +Slightly less powerful than the HP EliteDesk, but still a great choice. It came +with 8GB of RAM, which I expanded to 24GB. + +**[HP EliteDesk 800 G3 Mini 65W](https://www.ebay.com/sch/i.html?_nkw=HP+EliteDesk+800+G3+Mini+65W&_sacat=0&_from=R40&_trksid=p2334524.m570.l1313&_odkw=Lenovo+thinkcentre+m900&_osacat=0)** - +Expanded the HP EliteDesk with +[32gb of RAM](https://www.amazon.de/dp/B07N1YBSPZ?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1). +As you'll be browing for those on websites such is e.g. ebay, you'll find them +in slightly different configurations, so make sure to check the specs before you +buy. This one came with 16GB of RAM, which I moved to Lenovo. + +Mini PCs are a great alternative to Raspberry Pis. They are more powerful, more +reliable, and more affordable. They are also more suitable for running a +production-grade service. When it comes to idle power consumption, they are +often on pair with the Raspberry Pi's, and as I will show you throughout this +guide, we can put them to sleep using +[Intel Active Management Technology (AMT)](https://en.wikipedia.org/wiki/Intel_Active_Management_Technology) +and wake them up using a simple HTTP request. + +You want to get those on the used market, and as shown in the images below, you +want to do basic maintenance to them. In my case, I've cleaned the fans and the +heat sinks, and I've also added some thermal paste to the CPU and the GPU. +Better the cooling, better the performance, and less noise and power +consumption. + +I'm personally using +[BSFF Thermal paste](https://www.amazon.de/dp/B09NLXSP4S?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1), +but you can use whatever you want. To wipe the old thermal paste, you can use a +simple paper towel or a microfiber cloth, preferably with some isopropyl +alcohol. As you may see in the images below, I'm using +[PURIVITA® Isopropanol 99.9%](https://www.amazon.de/-/en/dp/B0C4FKV9HY?ref_=ppx_hzsearch_conn_dt_b_fed_asin_title_1&th=1). +Using isopropyl alcohol is a good idea, because it's a good solvent and it will +help to remove the old thermal paste. + +As you can see on the images, these mini pc's often need a bit of maintenance, +unless you purchase them from professional sellers. Ideally whatsoever, you want +to buy them from regular people as they often undersell them out of +incompetence. For instance, both of these PC's costed me less then a price of a +new Raspberry Pi 4B. + +In mine, thermal paste was dry, and fans needed a bit of cleaning. Besides that, +everything else was in a great shape. + +import Image from '@theme/IdealImage' ## Network -~~**[Mikrotik RB3011UiAS-RM](https://mikrotik.com/product/RB3011UiAS-RM)**: I went with a MikroTik router because it offers professional-grade features at a price that's hard to beat. It's packed with options you'd usually only find in high-end gear like Cisco, but without the hefty price tag. The advanced routing, solid firewall, and built-in VPN support made it an easy choice for what I needed.~~ - -**[Lenovo M920q as our Router](/docs/networking/mikrotik/lenovo-m920q-roas)**: After extensive research, I decided to replace the [Mikrotik RB3011UiAS-RM](https://mikrotik.com/product/RB3011UiAS-RM) with the much more powerful Lenovo M920q, running MikroTik RouterOS. I wanted to avoid networking bottlenecks when migrating all my services from Hetzner to my home cluster, so I chose a solution that would ensure reliable, high-performance networking. - -**[Mikrotik CRS326-24G-2S+RM](https://mikrotik.com/product/CRS326-24G-2SplusRM)**: SwOS/RouterOS powered 24 port Gigabit Ethernet switch with two SFP+ ports. We need a switch with sufficient ports and SFP+ ports for future expansion, but also to do proper VLANs (network isolation) and QoS (quality of service) for different services. - -**[deleyCON 5 x 0.25 m CAT8.1](https://www.amazon.de/-/en/gp/product/B08WPJVGHR/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&th=1)**: deleyCON CAT 8.1 patch cable network cable as set // 2x RJ45 plug // S/FTP PIMF shielding - -**[CSL CAT.8 Network Cable 40 Gigabit](https://www.amazon.de/-/en/gp/product/B08FCLHTH5/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&th=1)**: CSL CAT.8 Network Cable 40 Gigabit +~~**[Mikrotik RB3011UiAS-RM](https://mikrotik.com/product/RB3011UiAS-RM)**: I +went with a MikroTik router because it offers professional-grade features at a +price that's hard to beat. It's packed with options you'd usually only find in +high-end gear like Cisco, but without the hefty price tag. The advanced routing, +solid firewall, and built-in VPN support made it an easy choice for what I +needed.~~ + +**[Lenovo M920q as our Router](/docs/networking/mikrotik/lenovo-m920q-roas)**: +After extensive research, I decided to replace the +[Mikrotik RB3011UiAS-RM](https://mikrotik.com/product/RB3011UiAS-RM) with the +much more powerful Lenovo M920q, running MikroTik RouterOS. I wanted to avoid +networking bottlenecks when migrating all my services from Hetzner to my home +cluster, so I chose a solution that would ensure reliable, high-performance +networking. + +**[Mikrotik CRS326-24G-2S+RM](https://mikrotik.com/product/CRS326-24G-2SplusRM)**: +SwOS/RouterOS powered 24 port Gigabit Ethernet switch with two SFP+ ports. We +need a switch with sufficient ports and SFP+ ports for future expansion, but +also to do proper VLANs (network isolation) and QoS (quality of service) for +different services. + +**[deleyCON 5 x 0.25 m CAT8.1](https://www.amazon.de/-/en/gp/product/B08WPJVGHR/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&th=1)**: +deleyCON CAT 8.1 patch cable network cable as set // 2x RJ45 plug // S/FTP PIMF +shielding + +**[CSL CAT.8 Network Cable 40 Gigabit](https://www.amazon.de/-/en/gp/product/B08FCLHTH5/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&th=1)**: +CSL CAT.8 Network Cable 40 Gigabit **[deleyCON 5 x 0.25 m CAT6 Network Cable Set](https://www.amazon.de/dp/B079FYFZ96?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1)** @@ -158,19 +223,32 @@ import Image from "@theme/IdealImage"; ## Rack -**[4U Rack Cabinet](https://www.compumail.dk/en/p/lanberg-rack-gra-993865294)**: A 4U Rack to encapsulate all components cleanly. It provides the benefit of space efficiency and easy access for any hardware changes or additions. +**[4U Rack Cabinet](https://www.compumail.dk/en/p/lanberg-rack-gra-993865294)**: +A 4U Rack to encapsulate all components cleanly. It provides the benefit of +space efficiency and easy access for any hardware changes or additions. -**[2X Rack Power Supply](https://www.compumail.dk/en/p/lanberg-pdu-09f-0300-bk-stromstodsbeskytter-9-stik-16a-sort-3m-996106700)**: A centralized power supply solution for the entire rack. Ensures consistent and reliable power distribution to all the components. +**[2X Rack Power Supply](https://www.compumail.dk/en/p/lanberg-pdu-09f-0300-bk-stromstodsbeskytter-9-stik-16a-sort-3m-996106700)**: +A centralized power supply solution for the entire rack. Ensures consistent and +reliable power distribution to all the components. -**[GeeekPi 1U Rack Kit for Raspberry Pi 4B, 19" 1U Rack Mount](https://www.amazon.de/-/en/gp/product/B0972928CN/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: This 19 inch rack mount kit is specially designed for recording Raspberry Pi 4B boards and supports up to 4 units. +**[GeeekPi 1U Rack Kit for Raspberry Pi 4B, 19" 1U Rack Mount](https://www.amazon.de/-/en/gp/product/B0972928CN/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: +This 19 inch rack mount kit is specially designed for recording Raspberry Pi 4B +boards and supports up to 4 units. -**[DIGITUS Professional Extendible Shelf for 19-inch cabinets, Black](https://www.amazon.de/dp/B002KTE870?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1)**: This shelf is perfect for the Mini PCs. It's extendible, so you can add more shelves if you need to. +**[DIGITUS Professional Extendible Shelf for 19-inch cabinets, Black](https://www.amazon.de/dp/B002KTE870?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1)**: +This shelf is perfect for the Mini PCs. It's extendible, so you can add more +shelves if you need to. -**[upHere Case Fan 120 mm](https://www.amazon.de/dp/B081SYD24Z?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1)**: High-performance exhaust fans with adjustable speed and metal grill protection, used to efficiently remove heat from the rack and maintain optimal airflow for all components. +**[upHere Case Fan 120 mm](https://www.amazon.de/dp/B081SYD24Z?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1)**: +High-performance exhaust fans with adjustable speed and metal grill protection, +used to efficiently remove heat from the rack and maintain optimal airflow for +all components. ## Storage -Some of the storage choices were made based on a combination of overall research and a list of [Known Working Adapters](https://jamesachambers.com/best-ssd-storage-adapters-for-raspberry-pi-4-400/). +Some of the storage choices were made based on a combination of overall research +and a list of +[Known Working Adapters](https://jamesachambers.com/best-ssd-storage-adapters-for-raspberry-pi-4-400/). **[4X UGREEN Hard Drive Housing](https://www.amazon.de/dp/B07D2BHVBD?ref=ppx_yo2ov_dt_b_fed_asin_title)** @@ -180,16 +258,32 @@ Some of the storage choices were made based on a combination of overall research **[2x JSAUX USB 3.0 to SATA Adapter](https://www.amazon.de/dp/B086W944YT?ref=ppx_yo2ov_dt_b_fed_asin_title)** -_During my learning journey with Raspberry Pi, I realized that purchasing microSD cards was a mistake. They perform significantly worse than solid-state drives (SSDs), are prone to random failures, and unfortunately, these microSD cards can be as expensive, or even more so, than buying SSDs. E.g. in comparison, [Verbatim Vi550 S3 SSD](https://www.amazon.de/dp/B07LGKQLT5?ref=ppx_yo2ov_dt_b_fed_asin_title) costs the same as [SanDisk Extreme microSDXC](https://www.amazon.de/dp/B09X7BK27V?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1). In many instances in fact, microSD card is actually more expensive._ +_During my learning journey with Raspberry Pi, I realized that purchasing +microSD cards was a mistake. They perform significantly worse than solid-state +drives (SSDs), are prone to random failures, and unfortunately, these microSD +cards can be as expensive, or even more so, than buying SSDs. E.g. in +comparison, +[Verbatim Vi550 S3 SSD](https://www.amazon.de/dp/B07LGKQLT5?ref=ppx_yo2ov_dt_b_fed_asin_title) +costs the same as +[SanDisk Extreme microSDXC](https://www.amazon.de/dp/B09X7BK27V?ref=ppx_yo2ov_dt_b_fed_asin_title&th=1). +In many instances in fact, microSD card is actually more expensive._ -~~**[SanDisk Extreme microSDHC 3 Rescue Pro Deluxe Memory Card, Red/Gold 64GB](https://www.amazon.de/-/en/gp/product/B07FCMBLV6/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: Up to 160MB/s Read speed and 60 MB/s. Write speed for fast recording and transferring~~ +~~**[SanDisk Extreme microSDHC 3 Rescue Pro Deluxe Memory Card, Red/Gold 64GB](https://www.amazon.de/-/en/gp/product/B07FCMBLV6/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: +Up to 160MB/s Read speed and 60 MB/s. Write speed for fast recording and +transferring~~ -~~**[Vanja SD/Micro SD Card Reader](https://www.amazon.de/-/en/gp/product/B00W02VHM6/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: Micro USB OTG Adapter and USB 2.0 Memory Card Reader~~ +~~**[Vanja SD/Micro SD Card Reader](https://www.amazon.de/-/en/gp/product/B00W02VHM6/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1)**: +Micro USB OTG Adapter and USB 2.0 Memory Card Reader~~ ## Why These Choices? -**Mobility**: The 4U Rack allows me to move the entire setup easily, making it convenient for different scenarios, from a home office to a small business environment +**Mobility**: The 4U Rack allows me to move the entire setup easily, making it +convenient for different scenarios, from a home office to a small business +environment -**Professional-Grade Networking**: The Mikrotik router provides a rich feature set generally found in enterprise-grade hardware, offering me a sandbox to experiment with advanced networking configurations +**Professional-Grade Networking**: The Mikrotik router provides a rich feature +set generally found in enterprise-grade hardware, offering me a sandbox to +experiment with advanced networking configurations -**Affordability**: This setup provides a balance between cost and performance, giving me a powerful Kubernetes cluster without breaking the bank +**Affordability**: This setup provides a balance between cost and performance, +giving me a powerful Kubernetes cluster without breaking the bank diff --git a/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md b/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md index c0797af..b50ccf3 100644 --- a/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md +++ b/docusaurus/docs/hardware-raspberry-pi-setup/mini-pcs-setup.md @@ -2,7 +2,13 @@ title: Mini PCs Setup --- -Setting up mini PCs differs from setting up Raspberry Pis, mainly due to hardware variations. Your experience may vary depending on your specific mini PC, but the general process should be similar. The main difference is the architecture, x86 vs ARM, and the operating systems. Raspberry Pis run on Raspbian, while our mini PCs use Ubuntu Server. Although both are Linux distributions and Debian-based, they differ slightly in configuration and the services they use. +Setting up mini PCs differs from setting up Raspberry Pis, mainly due to +hardware variations. Your experience may vary depending on your specific mini +PC, but the general process should be similar. The main difference is the +architecture, x86 vs ARM, and the operating systems. Raspberry Pis run on +Raspbian, while our mini PCs use Ubuntu Server. Although both are Linux +distributions and Debian-based, they differ slightly in configuration and the +services they use. ### Goals for Mini PC Servers @@ -21,36 +27,52 @@ These factors help keep costs down and ensure stable, long-term operation. - Open the case and remove dust using compressed air. - Check for any obvious damage or worn-out components. - Clean the CPU using isopropyl alcohol and a microfiber cloth. -- Apply new thermal paste to the CPU. Every single Mini PC that I bought had a very worn out thermal paste. This will keep the CPU cool, and prevent it from overheating. +- Apply new thermal paste to the CPU. Every single Mini PC that I bought had a + very worn out thermal paste. This will keep the CPU cool, and prevent it from + overheating. **Upgrade RAM (if possible):** -While not mandatory, and higly dependent on your needs and usecase, I recommend to upgrade the RAM to the maximum supported by your mini PC. You can also do this later, especially by looking for used RAM on eBay, or other platforms. +While not mandatory, and higly dependent on your needs and usecase, I recommend +to upgrade the RAM to the maximum supported by your mini PC. You can also do +this later, especially by looking for used RAM on eBay, or other platforms. **Check storage:** -Use an SSD, preferably an NVMe, for better performance and faster boot times. Additionally, ensure the drive is healthy (consider running a SMART test). +Use an SSD, preferably an NVMe, for better performance and faster boot times. +Additionally, ensure the drive is healthy (consider running a SMART test). ### BIOS Settings - Update the BIOS to the latest version (if available). -- Set the system to auto power-on after power loss. This is critical for servers, as we want to ensure that the server is always on, and that it's always available. If there is a power outage, or any other setback, we want to ensure that the server gets back online as soon as possible. -- Disable any hardware components that are not needed, e.g. Bluetooth, WiFi, etc. -- Ensure that the system is set to boot from the SSD. We want to ensure that the system doesn't have to wait for any other boot devices, e.g. USB, CD-ROM, etc. +- Set the system to auto power-on after power loss. This is critical for + servers, as we want to ensure that the server is always on, and that it's + always available. If there is a power outage, or any other setback, we want to + ensure that the server gets back online as soon as possible. +- Disable any hardware components that are not needed, e.g. Bluetooth, WiFi, + etc. +- Ensure that the system is set to boot from the SSD. We want to ensure that the + system doesn't have to wait for any other boot devices, e.g. USB, CD-ROM, etc. ### Ubuntu Server Installation -We'll use [Ubuntu Server](https://ubuntu.com/download/server) as the operating system. +We'll use [Ubuntu Server](https://ubuntu.com/download/server) as the operating +system. **Recommended steps:** - Download the Ubuntu Server ISO (choose the minimal installation option). -- Create a bootable USB drive with the ISO (e.g., using [Rufus](https://rufus.ie/) or `dd`). +- Create a bootable USB drive with the ISO (e.g., using + [Rufus](https://rufus.ie/) or `dd`). - Boot the mini PC from the USB drive. - Follow the installation prompts: - - - Select the minimal (minimized) version of Ubuntu Server. In simple words, we want to install the bare minimum to get to the command line. We won't be using any GUI, and we'll be using the command line to manage our servers. - - Set up your user account and hostname. While hostname can be anything, I recommend setting it to something that will easily identify the server. As you will add more servers over time, you'll want to be able to easily identify them. E.g. you could have something like `lenovo-m920q-mini-pc-1`. + - Select the minimal (minimized) version of Ubuntu Server. In simple words, we + want to install the bare minimum to get to the command line. We won't be + using any GUI, and we'll be using the command line to manage our servers. + - Set up your user account and hostname. While hostname can be anything, I + recommend setting it to something that will easily identify the server. As + you will add more servers over time, you'll want to be able to easily + identify them. E.g. you could have something like `lenovo-m920q-mini-pc-1`. - **Enable SSH during installation** (critical for remote management). - Partition the disk as needed (guided partitioning is fine for most users). @@ -58,7 +80,11 @@ We'll use [Ubuntu Server](https://ubuntu.com/download/server) as the operating s ### Optimize our Mini PC's -At this point, we want to do several steps that we've done under [Raspberry Pi Setup](../hardware-raspberry-pi-setup/raspberry-pi-setup.md). Essentially, we need to disable some default services, and ensure minimal power consumption. As our PC's will be running 24/7, and in many cases, will remain idle most of the time, we want to ensure that we're not wasting any resources. +At this point, we want to do several steps that we've done under +[Raspberry Pi Setup](../hardware-raspberry-pi-setup/raspberry-pi-setup.md). +Essentially, we need to disable some default services, and ensure minimal power +consumption. As our PC's will be running 24/7, and in many cases, will remain +idle most of the time, we want to ensure that we're not wasting any resources. Connect to your mini PC via SSH from another computer: @@ -74,7 +100,12 @@ sudo apt update && sudo apt upgrade -y #### Disable Swap -As with the Raspberry Pis, we want to disable swap. I have also included an [Ansible playbook](../../static/ansible/playbooks/disable-swap-ubuntu-server.yml) to automate this process. However, for the sake of learning, especially if you have multiple machines, I recommend doing the steps manually at least once to understand what’s happening. Once you’re comfortable, you can use the playbook for convenience. +As with the Raspberry Pis, we want to disable swap. I have also included an +[Ansible playbook](../../static/ansible/playbooks/disable-swap-ubuntu-server.yml) +to automate this process. However, for the sake of learning, especially if you +have multiple machines, I recommend doing the steps manually at least once to +understand what’s happening. Once you’re comfortable, you can use the playbook +for convenience. To permanently disable swap on Ubuntu Server: @@ -90,7 +121,9 @@ sudo swapoff -a sudo nano /etc/fstab ``` -Find any lines referencing a swap partition or swap file (they usually contain the word 'swap'). Comment out those lines by adding a `#` at the beginning, or delete them entirely. +Find any lines referencing a swap partition or swap file (they usually contain +the word 'swap'). Comment out those lines by adding a `#` at the beginning, or +delete them entirely. For example: @@ -106,9 +139,15 @@ Save and exit (`Ctrl+O`, `Enter`, then `Ctrl+X` in nano). sudo apt install btop sensors powertop ``` -You can now use `btop` to monitor the system and `sensors` to check CPU temperature. This is typically what I do when setting up a new server, it gives confidence that the thermal paste is applied correctly and the CPU is not overheating. Additionally, you can see how much memory and CPU are being used, and spot any services you may have forgotten to disable. +You can now use `btop` to monitor the system and `sensors` to check CPU +temperature. This is typically what I do when setting up a new server, it gives +confidence that the thermal paste is applied correctly and the CPU is not +overheating. Additionally, you can see how much memory and CPU are being used, +and spot any services you may have forgotten to disable. -For `powertop`, I recommend running it once and then configuring it to run automatically at boot. This helps us understand our server's power usage and find optimization opportunities. +For `powertop`, I recommend running it once and then configuring it to run +automatically at boot. This helps us understand our server's power usage and +find optimization opportunities. -TODO: https://github.com/hubblo-org/scaphandre -TODO: Watch YouTube for some power consumption tips +TODO: https://github.com/hubblo-org/scaphandre TODO: Watch YouTube for some +power consumption tips diff --git a/docusaurus/docs/hardware-raspberry-pi-setup/raspberry-pi-setup.md b/docusaurus/docs/hardware-raspberry-pi-setup/raspberry-pi-setup.md index 8fed003..005e522 100644 --- a/docusaurus/docs/hardware-raspberry-pi-setup/raspberry-pi-setup.md +++ b/docusaurus/docs/hardware-raspberry-pi-setup/raspberry-pi-setup.md @@ -10,7 +10,10 @@ title: Raspberry Pi Setup - Choose the `Raspberry Pi OS (other)` > `Raspberry Pi OS Lite (64-bit)` - The tool will download the selected OS image for you. - Plug in your SSD and select it in the 'Storage' section. - - - _Note_: If you're just unpacking brand new SSDs, there's a good chance you'll need to use a Disk Management tool on your operating system to initialize and allocate the available space. Otherwise, they might not appear in the Pi Imager. + - - _Note_: If you're just unpacking brand new SSDs, there's a good chance + you'll need to use a Disk Management tool on your operating system to + initialize and allocate the available space. Otherwise, they might not + appear in the Pi Imager. - Before writing, click on the cog icon for advanced settings. - Set the hostname to your desired value, e.g., `RP1`. - Enable SSH and select the "allow public-key authorization only" option. @@ -18,14 +21,16 @@ title: Raspberry Pi Setup ### Initial Boot and Setup -- Insert the flashed SSD into the USB 3 port on your Raspberry Pi and power it on +- Insert the flashed SSD into the USB 3 port on your Raspberry Pi and power it + on - On the first boot, ssh into the Pi to perform initial configuration ### Update and Upgrade [Ansible Playbook](/ansible/playbooks/apt-update.yml) -- Run the following commands to update the package list and upgrade the installed packages: +- Run the following commands to update the package list and upgrade the + installed packages: ```bash sudo apt update @@ -36,15 +41,28 @@ sudo apt upgrade [Ansible Playbook](/ansible/playbooks/enable-memory-groups.yml) -Before installing K3s, it's essential to enable memory cgroups on the Raspberry Pi for container resource management. +Before installing K3s, it's essential to enable memory cgroups on the Raspberry +Pi for container resource management. For the Ubuntu Server, e.g. our mini-pcs, this is already enabled by default. -[Control Groups (Cgroups)](https://en.wikipedia.org/wiki/Cgroups) are a Linux kernel feature that allows you to allocate resources such as CPU time, system memory, and more among user-defined groups of tasks (processes). +[Control Groups (Cgroups)](https://en.wikipedia.org/wiki/Cgroups) are a Linux +kernel feature that allows you to allocate resources such as CPU time, system +memory, and more among user-defined groups of tasks (processes). -K3s requires memory cgroups to be enabled to better manage and restrict the resources that each container can use. This is crucial in a multi-container environment where resource allocation needs to be as efficient as possible. +K3s requires memory cgroups to be enabled to better manage and restrict the +resources that each container can use. This is crucial in a multi-container +environment where resource allocation needs to be as efficient as possible. -**Simple Analogy**: Imagine you live in a house with multiple people (processes), and there are limited resources like time (CPU), space (memory), and tools (I/O). Without a system in place, one person might hog the vacuum cleaner all day (CPU time), while someone else fills the fridge with their stuff (memory). With a `"chore schedule"` (cgroups), you ensure everyone gets an allocated time with the vacuum cleaner, some space in the fridge, and so on. This schedule ensures that everyone can do their chores without stepping on each other's toes, much like how cgroups allocate system resources to multiple processes. +**Simple Analogy**: Imagine you live in a house with multiple people +(processes), and there are limited resources like time (CPU), space (memory), +and tools (I/O). Without a system in place, one person might hog the vacuum +cleaner all day (CPU time), while someone else fills the fridge with their stuff +(memory). With a `"chore schedule"` (cgroups), you ensure everyone gets an +allocated time with the vacuum cleaner, some space in the fridge, and so on. +This schedule ensures that everyone can do their chores without stepping on each +other's toes, much like how cgroups allocate system resources to multiple +processes. Edit the `/boot/firmware/cmdline.txt` file on your Raspberry Pi. @@ -66,7 +84,11 @@ sudo reboot ## Optimize our Pi's -Since our Raspberry Pis are nodes in our cluster and will consistently be used when plugged into our Ethernet switch or router, we can optimize them by disabling unnecessary components. This reduces the number of services running on them, naturally lowering CPU and memory usage. More importantly, it reduces power consumption, leading to lower electricity bills. +Since our Raspberry Pis are nodes in our cluster and will consistently be used +when plugged into our Ethernet switch or router, we can optimize them by +disabling unnecessary components. This reduces the number of services running on +them, naturally lowering CPU and memory usage. More importantly, it reduces +power consumption, leading to lower electricity bills. ### Disable Wi-Fi @@ -121,9 +143,26 @@ sudo reboot [Swap Memory](/terminology.md#swap-memory) -Disabling swap in a K3s cluster is crucial because Kubernetes relies on precise memory management to allocate resources, schedule workloads, and handle potential memory limits. When swap is enabled, it introduces unpredictability in how memory is used. The Linux kernel may move inactive memory to disk (swap), giving the impression that there is available memory when, in reality, the node might be under significant memory pressure. This can lead to performance degradation for applications, as accessing memory from the swap space (on disk) is significantly slower than accessing it from RAM. In addition, Kubernetes, by default, expects swap to be off and prevents the kubelet from running unless explicitly overridden, as swap complicates memory monitoring and scheduling. - -Beyond performance, swap interferes with Kubernetes' ability to react to out-of-memory (OOM) conditions. With swap enabled, a node might avoid crashing but at the cost of drastically reduced performance, disk I/O bottlenecks, and inconsistent resource allocation. In contrast, with swap disabled, Kubernetes can correctly identify memory shortages and kill misbehaving pods in a controlled way, allowing the system to recover predictably. For edge cases like K3s, which often operate on lightweight and resource-constrained systems (e.g., Raspberry Pis or IoT devices), disabling swap ensures efficient and stable operation without unnecessary disk wear and performance hits. +Disabling swap in a K3s cluster is crucial because Kubernetes relies on precise +memory management to allocate resources, schedule workloads, and handle +potential memory limits. When swap is enabled, it introduces unpredictability in +how memory is used. The Linux kernel may move inactive memory to disk (swap), +giving the impression that there is available memory when, in reality, the node +might be under significant memory pressure. This can lead to performance +degradation for applications, as accessing memory from the swap space (on disk) +is significantly slower than accessing it from RAM. In addition, Kubernetes, by +default, expects swap to be off and prevents the kubelet from running unless +explicitly overridden, as swap complicates memory monitoring and scheduling. + +Beyond performance, swap interferes with Kubernetes' ability to react to +out-of-memory (OOM) conditions. With swap enabled, a node might avoid crashing +but at the cost of drastically reduced performance, disk I/O bottlenecks, and +inconsistent resource allocation. In contrast, with swap disabled, Kubernetes +can correctly identify memory shortages and kill misbehaving pods in a +controlled way, allowing the system to recover predictably. For edge cases like +K3s, which often operate on lightweight and resource-constrained systems (e.g., +Raspberry Pis or IoT devices), disabling swap ensures efficient and stable +operation without unnecessary disk wear and performance hits. - Open a terminal. - Run the following command to turn off swap for the current session: @@ -132,7 +171,8 @@ Beyond performance, swap interferes with Kubernetes' ability to react to out-of- sudo swapoff -a ``` -This command disables the swap immediately, but it will be re-enabled after a reboot unless further steps are taken. +This command disables the swap immediately, but it will be re-enabled after a +reboot unless further steps are taken. Modify `/etc/dphys-swapfile` to Disable Swap Permanently @@ -142,8 +182,7 @@ Open the swap configuration file `/etc/dphys-swapfile` in a text editor: sudo nano /etc/dphys-swapfile ``` -Search for the line starting with `CONF_SWAPSIZE=`. -Modify that line to read: +Search for the line starting with `CONF_SWAPSIZE=`. Modify that line to read: ```bash CONF_SWAPSIZE=0 @@ -163,7 +202,8 @@ Stop the `dphys-swapfile` service, which manages swap: sudo systemctl stop dphys-swapfile ``` -Prevent the `dphys-swapfile` service from starting during system boot by disabling it: +Prevent the `dphys-swapfile` service from starting during system boot by +disabling it: ```bash sudo systemctl disable dphys-swapfile @@ -175,7 +215,8 @@ Run the following command to verify that swap is no longer in use: free -m ``` -In the output, ensure that the "Swap" line shows `0` for total, used, and free space: +In the output, ensure that the "Swap" line shows `0` for total, used, and free +space: ``` total used free shared buffers cached @@ -184,21 +225,30 @@ Mem: 2003 322 1681 18 12 129 Swap: 0 0 0 ``` -Finally, reboot the system in order to apply all changes fully and ensure swap remains permanently disabled: +Finally, reboot the system in order to apply all changes fully and ensure swap +remains permanently disabled: ```bash sudo reboot ``` -After the system comes back online, run `free -m` again to confirm that swap is still disabled. +After the system comes back online, run `free -m` again to confirm that swap is +still disabled. ### Disable Bluetooth [Ansible Playbook](/ansible/playbooks/disable-bluetooth.yml) -When using Raspberry Pi devices in a Kubernetes-based environment like K3s, any unused hardware features, such as Bluetooth, can consume system resources or introduce potential security risks. Disabling Bluetooth on each Raspberry Pi optimizes performance by reducing background services and freeing up resources like CPU and memory. Additionally, by disabling an unused service, you reduce the attack surface of your Raspberry Pi-based K3s cluster, providing a more secure and streamlined operating environment. +When using Raspberry Pi devices in a Kubernetes-based environment like K3s, any +unused hardware features, such as Bluetooth, can consume system resources or +introduce potential security risks. Disabling Bluetooth on each Raspberry Pi +optimizes performance by reducing background services and freeing up resources +like CPU and memory. Additionally, by disabling an unused service, you reduce +the attack surface of your Raspberry Pi-based K3s cluster, providing a more +secure and streamlined operating environment. -**Stop the Bluetooth service** that might be currently running on your Raspberry Pi: +**Stop the Bluetooth service** that might be currently running on your Raspberry +Pi: ```bash sudo systemctl stop bluetooth @@ -210,9 +260,11 @@ sudo systemctl stop bluetooth sudo systemctl disable bluetooth ``` -This ensures that the Bluetooth service is not running in the background, conserving system resources. +This ensures that the Bluetooth service is not running in the background, +conserving system resources. -To prevent the operating system from loading Bluetooth modules at boot time, you'll need to blacklist specific modules. +To prevent the operating system from loading Bluetooth modules at boot time, +you'll need to blacklist specific modules. Open the blacklist configuration file for editing (or create it) @@ -229,9 +281,11 @@ blacklist hci_uart # Disables hci_uart module specific to Raspberry Pi Bluetoo **Save the file** (Ctrl+O in `nano`) and **exit** the editor (Ctrl+X in `nano`). -By blacklisting these modules, they won’t be loaded during boot, effectively preventing Bluetooth from running. +By blacklisting these modules, they won’t be loaded during boot, effectively +preventing Bluetooth from running. -Bluetooth can be disabled directly at the device level by editing specific Raspberry Pi system configurations. +Bluetooth can be disabled directly at the device level by editing specific +Raspberry Pi system configurations. Open the boot configuration file for editing: @@ -245,19 +299,24 @@ Add the following line to disable Bluetooth: dtoverlay=disable-bt ``` -Ensure no Bluetooth device can wake up your Raspberry Pi by ensuring the line is not commented out. +Ensure no Bluetooth device can wake up your Raspberry Pi by ensuring the line is +not commented out. -**Save the changes** (Ctrl+O in `nano`) and **exit** the editor (Ctrl+X in `nano`). +**Save the changes** (Ctrl+O in `nano`) and **exit** the editor (Ctrl+X in +`nano`). -This command ensures that the Raspberry Pi doesn’t enable Bluetooth at boot by making system-wide firmware adjustments. +This command ensures that the Raspberry Pi doesn’t enable Bluetooth at boot by +making system-wide firmware adjustments. -To fully apply the changes (stopping the service, blacklisting modules, and adjusting system configuration), it’s recommended to reboot the system. +To fully apply the changes (stopping the service, blacklisting modules, and +adjusting system configuration), it’s recommended to reboot the system. ```bash sudo reboot ``` -After rebooting, you can verify that Bluetooth has been disabled by checking the status of the service: +After rebooting, you can verify that Bluetooth has been disabled by checking the +status of the service: ```bash sudo systemctl status bluetooth @@ -267,21 +326,33 @@ It should indicate that the Bluetooth service is inactive or dead. ### Fan Control -Unfortunately, due to the limitations of the [GeeekPi 1U Rack Kit for Raspberry Pi 4B, 19" 1U Rack Mount](https://www.amazon.de/-/en/gp/product/B0972928CN/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1), I couldn't optimize the fans for each Raspberry Pi. The fans included with this kit lack [PWM](https://en.wikipedia.org/wiki/Pulse-width_modulation) control and only come with a 2-pin cable. If you're using different fans that you can control, I highly recommend setting them to remain off below certain temperature thresholds. This will not only make your setup completely silent but also reduce power consumption. +Unfortunately, due to the limitations of the +[GeeekPi 1U Rack Kit for Raspberry Pi 4B, 19" 1U Rack Mount](https://www.amazon.de/-/en/gp/product/B0972928CN/ref=ppx_yo_dt_b_search_asin_title?ie=UTF8&psc=1), +I couldn't optimize the fans for each Raspberry Pi. The fans included with this +kit lack [PWM](https://en.wikipedia.org/wiki/Pulse-width_modulation) control and +only come with a 2-pin cable. If you're using different fans that you can +control, I highly recommend setting them to remain off below certain temperature +thresholds. This will not only make your setup completely silent but also reduce +power consumption. ## Assign Static IP Addresses ### MikroTik Router - Open the MikroTik Web UI and navigate to `IP > DHCP Server`. -- Locate the `Leases` tab and identify the MAC addresses of your Raspberry Pi units. -- Click on the entry for each Raspberry Pi and change it from `Dynamic` to `Static`. +- Locate the `Leases` tab and identify the MAC addresses of your Raspberry Pi + units. +- Click on the entry for each Raspberry Pi and change it from `Dynamic` to + `Static`. -If you're using a different router, the process should be similar. The only possible limitation is if you're using a consumer-grade router that doesn't offer these features. In that case, you'll need to set up a DHCP server. +If you're using a different router, the process should be similar. The only +possible limitation is if you're using a consumer-grade router that doesn't +offer these features. In that case, you'll need to set up a DHCP server. ## Set SSH Aliases -Once you have assigned static IPs on your router, you can simplify the SSH process by setting up SSH aliases. Here's how to do it: +Once you have assigned static IPs on your router, you can simplify the SSH +process by setting up SSH aliases. Here's how to do it: Open the SSH config file on your local machine: @@ -309,7 +380,8 @@ Host rp4 User YOUR_USERNAME ``` -Replace ``, ``, ``, and `` with the actual static IP addresses of your Raspberry Pis. Save and close the file. +Replace ``, ``, ``, and `` with +the actual static IP addresses of your Raspberry Pis. Save and close the file. You should now be able to SSH into each Raspberry Pi using the alias: diff --git a/docusaurus/docs/kubernetes/anatomy-of-kubectl-command.mdx b/docusaurus/docs/kubernetes/anatomy-of-kubectl-command.mdx index d7a071c..61a180f 100644 --- a/docusaurus/docs/kubernetes/anatomy-of-kubectl-command.mdx +++ b/docusaurus/docs/kubernetes/anatomy-of-kubectl-command.mdx @@ -2,9 +2,15 @@ title: Anatomy of a `kubectl` Command --- -[kubectl](https://kubernetes.io/docs/reference/kubectl/) is the command-line interface for Kubernetes. It allows us to run commands against Kubernetes clusters. It is the most important command in Kubernetes, and we'll use it a lot. +[kubectl](https://kubernetes.io/docs/reference/kubectl/) is the command-line +interface for Kubernetes. It allows us to run commands against Kubernetes +clusters. It is the most important command in Kubernetes, and we'll use it a +lot. -I can't emphasize enough how important it is to write these commands manually until we internalize them. And, very importantly, make sure to set up [kubectl completion](https://kubernetes.io/docs/reference/kubectl/generated/kubectl_completion/), it will speed things up a lot. +I can't emphasize enough how important it is to write these commands manually +until we internalize them. And, very importantly, make sure to set up +[kubectl completion](https://kubernetes.io/docs/reference/kubectl/generated/kubectl_completion/), +it will speed things up a lot.
diff --git a/docusaurus/docs/kubernetes/anatomy-of-kubernetes-yaml.mdx b/docusaurus/docs/kubernetes/anatomy-of-kubernetes-yaml.mdx index 2665b9a..846c215 100644 --- a/docusaurus/docs/kubernetes/anatomy-of-kubernetes-yaml.mdx +++ b/docusaurus/docs/kubernetes/anatomy-of-kubernetes-yaml.mdx @@ -2,11 +2,84 @@ title: Anatomy of Kubernetes YAML --- -Let's take a look at a couple of YAML files that our infrastructure will typically be composed of. Note that we don't have to fully understand what they do or instantly know how to write them, but by the time we finish this lesson, we should have a good idea of how we'll be defining our infrastructure. +import Alert from '@site/src/components/Alert/index.tsx' +import KubernetesYAMLAnatomy from '@site/src/components/KubernetesYAMLAnatomy/index.tsx' - +## What You'll Learn + +By the end of this lesson, you'll understand: + +- The **four required fields** in every Kubernetes YAML file +- How to read and interpret YAML structure (indentation, arrays, objects) +- How different Kubernetes resources connect and work together +- The purpose of each major resource type (Deployment, Service, Ingress, etc.) +- Common patterns like labels, selectors, and templates + +## Why YAML Structure Matters + +Understanding Kubernetes YAML files is essential for working with Kubernetes. +These declarative files define nearly every aspect of our infrastructure: how +many instances of an application to run, what type of storage to use, access +controls, networking, and more. + +From now on, we should approach these concepts in a straightforward way, because +that's really all there is to it. If we focus too much on terminology and fluff, +it's easy to feel overwhelmed. + +> It's important to think about each of these "components" naturally. For +> example: we want to deploy an application with an API and supporting services +> (x, y, z). We might need 10GB of storage for our Postgres database, and we +> want Postgres to be accessible only by our API. Finally, we need to expose our +> API to the internet. + +## The Four Required Fields + +Every Kubernetes YAML file has four required top-level fields: + +- **`apiVersion`**: Specifies the Kubernetes API version to use for this object. +- **`kind`**: Indicates the type of Kubernetes object (e.g., Deployment, + Service, Ingress). +- **`metadata`**: Provides identifying information such as the object's name, + namespace, and labels. +- **`spec`**: Contains the desired state and configuration details for the + object. + +These four fields tell Kubernetes **what** you want (`kind`), **which API** to +use (`apiVersion`), **how to identify it** (`metadata`), and **what it should +look like** (`spec`). + +## Reading YAML: The Basics -import Alert from "@site/src/components/Alert/index.tsx"; +Before diving into Kubernetes-specific YAML, let's understand YAML syntax: + +- **Indentation matters**: Use 2 spaces (never tabs). Each level of indentation + represents nesting. +- **Arrays/Lists**: Use `-` prefix. Example: `- item1` or `- name: value` +- **Key-Value pairs**: Use `key: value` format +- **Strings**: Usually don't need quotes unless they contain special characters +- **Comments**: Start with `#` + +```js +# This is a comment +apiVersion: apps/v1 # Key-value pair +kind: Deployment # Another key-value pair +metadata: # Object (nested) + name: my-app # Nested key-value (2 spaces indent) + labels: # Another nested object + app: my-app # Nested under labels (4 spaces) +spec: # Top-level object + replicas: 3 # Number value + containers: # Array (list) + - name: app # Array item (4 spaces, dash) + image: nginx:latest # Nested under array item (6 spaces) +``` + +## Interactive YAML Explorer + +Explore the different Kubernetes resource types below. Hover over sections in +the YAML code or the explanation cards to see how they connect: + + -import KubernetesYAMLAnatomy from "@site/src/components/KubernetesYAMLAnatomy/index.tsx"; +## How Resources Connect -Understanding Kubernetes YAML files is essential for working with Kubernetes. These declarative files define nearly every aspect of our infrastructure: how many instances of an application to run, what type of storage to use, access controls, networking, and more. +Kubernetes resources work together to deploy and run applications. Here's how +they connect: -From now on, we should approach these concepts in a straightforward way, because that's really all there is to it. If we focus too much on terminology and fluff, it's easy to feel overwhelmed. +```mermaid +flowchart LR + subgraph external [External Traffic] + User[User Request] + end -> It's important to think about each of these "components" naturally. For example: we want to deploy an application with an API and supporting services (x, y, z). We might need 10GB of storage for our Postgres database, and we want Postgres to be accessible only by our API. Finally, we need to expose our API to the internet. + subgraph cluster [Kubernetes Cluster] + Ingress[Ingress] --> Service[Service] + Service --> Pod1[Pod] + Service --> Pod2[Pod] + Service --> Pod3[Pod] + Deployment[Deployment] -.->|manages| Pod1 + Deployment -.->|manages| Pod2 + Deployment -.->|manages| Pod3 + ConfigMap[ConfigMap] -.->|injects config| Pod1 + Secret[Secret] -.->|injects secrets| Pod1 + PVC[PVC] -.->|provides storage| Pod1 + end -Each major configuration area is typically represented by its own YAML file. For example: + User --> Ingress +``` -- **Ingress**: Describes how a service is accessed via HTTP and HTTPS. -- **Deployment**: Specifies how many instances (replicas) of an application to run, and resource allocations like RAM and CPU. -- **Service**: Defines networking and how other components or users can access our application. +**Flow of traffic:** + +1. **Ingress** receives external HTTP/HTTPS traffic and routes it to a + **Service** +2. **Service** acts as a load balancer, distributing traffic to matching + **Pods** +3. **Deployment** creates and manages the **Pods** (ensures the desired number + are running) +4. **ConfigMap** and **Secret** provide configuration and sensitive data to + **Pods** +5. **PVC** (Persistent Volume Claim) provides persistent storage to **Pods** + +## Quick Reference: Resource Types + +| Resource | Purpose | Common Use Case | Related Resources | +| -------------- | --------------------------- | --------------------------------------- | ----------------------- | +| **Deployment** | Manages Pod replicas | Run 3 copies of your app | Pods, ReplicaSet | +| **Service** | Exposes Pods internally | Allow Pods to talk to each other | Pods (via selectors) | +| **Ingress** | Exposes Services externally | Make your app accessible via HTTP/HTTPS | Service | +| **ConfigMap** | Stores non-sensitive config | Environment variables, config files | Pods (via volumeMounts) | +| **Secret** | Stores sensitive data | Passwords, API keys, certificates | Pods (via volumeMounts) | +| **PVC** | Requests persistent storage | Database storage, file uploads | Pods (via volumes) | + +## Common Patterns -A typical Kubernetes YAML file is structured into several key sections ([Objects In Kubernetes](https://kubernetes.io/docs/concepts/overview/working-with-objects/)): +### Labels and Selectors -- **apiVersion**: Specifies the Kubernetes API version to use for this object. -- **kind**: Indicates the type of Kubernetes object (e.g., Deployment, Service, Ingress). -- **metadata**: Provides identifying information such as the object's name, namespace, and labels. -- **spec**: Contains the desired state and configuration details for the object. +**Labels** are key-value pairs attached to resources. **Selectors** use labels +to find matching resources. + +```yaml +# Deployment uses selector to find Pods +spec: + selector: + matchLabels: + app: my-app # "Find Pods with label app=my-app" + template: + metadata: + labels: + app: my-app # Pods get this label +``` + +**Critical rule**: The labels in `spec.selector.matchLabels` must match the +labels in `spec.template.metadata.labels`. This is how the Deployment knows +which Pods to manage. + +### Deployment → Pod Template + +A Deployment's `spec.template` is a **blueprint** for creating Pods: + +```yaml +spec: + replicas: 3 # "I want 3 Pods" + template: # "Here's what each Pod should look like" + metadata: + labels: + app: my-app + spec: + containers: + - name: app + image: nginx:latest +``` + +Kubernetes uses this template to create Pods. If a Pod dies, Kubernetes creates +a new one using this template. + +### Service → Pod Selection + +A Service uses a selector to find Pods to route traffic to: + +```yaml +spec: + selector: + app: my-app # "Route traffic to Pods with app=my-app" + ports: + - port: 80 + targetPort: 8080 # Forward to port 8080 on the Pods +``` + +The Service finds all Pods with matching labels and distributes traffic among +them. + +## Try It Yourself + +The best way to learn is by doing. Try these exercises: + +1. **Validate your YAML**: Copy one of the examples above to a file (e.g., + `deployment.yaml`) and validate it: + + ```bash + kubectl apply --dry-run=client -f deployment.yaml + ``` + +2. **Explore the API**: Use `kubectl explain` to discover available fields: + + ```bash + kubectl explain deployment.spec + kubectl explain deployment.spec.template.spec.containers + ``` + +3. **Find API versions**: Discover which API versions are available: + + ```bash + kubectl api-resources + ``` + +4. **See it in action**: Apply a simple Deployment and watch it create Pods: + ```bash + kubectl apply -f deployment.yaml + kubectl get pods -w # Watch Pods being created + ``` + +Each major configuration area is typically represented by its own YAML file. For +example: + +- **Ingress**: Describes how a service is accessed via HTTP and HTTPS. +- **Deployment**: Specifies how many instances (replicas) of an application to + run, and resource allocations like RAM and CPU. +- **Service**: Defines networking and how other components or users can access + our application. -Each section plays a specific role in telling Kubernetes what we want to run and how we want it managed. Mastering these files is key to effectively deploying and managing applications in Kubernetes. +Each section plays a specific role in telling Kubernetes what we want to run and +how we want it managed. Mastering these files is key to effectively deploying +and managing applications in Kubernetes. diff --git a/docusaurus/docs/kubernetes/common-kubernetes-commands.md b/docusaurus/docs/kubernetes/common-kubernetes-commands.md index 6f94c7d..c21d7fc 100644 --- a/docusaurus/docs/kubernetes/common-kubernetes-commands.md +++ b/docusaurus/docs/kubernetes/common-kubernetes-commands.md @@ -3,298 +3,348 @@ ### Cluster Information and Health 1. **Check cluster components (control plane availability):** + ```bash kubectl get componentstatuses ``` 2. **Get general cluster information:** + ```bash kubectl cluster-info ``` 3. **List all nodes in the cluster (health/status):** + ```bash kubectl get nodes ``` 4. **Get detailed information about a node:** + ```bash kubectl describe node ``` 5. **View the current Kubernetes version running:** + ```bash kubectl version --short ``` 6. **Check any existing cluster issues or warning events globally:** + ```bash kubectl get events --all-namespaces --sort-by='.metadata.creationTimestamp' ``` - ### Workload / Pod Management 7. **View all pods across all namespaces:** + ```bash kubectl get pods --all-namespaces ``` -8. **List the pods in a specific namespace (e.g., `default`, `longhorn-system`):** +8. **List the pods in a specific namespace (e.g., `default`, + `longhorn-system`):** + ```bash kubectl get pods -n ``` 9. **Get detailed information for a specific pod:** + ```bash kubectl describe pod -n ``` 10. **Delete a pod (restarts the pod, useful for troubleshooting):** + ```bash kubectl delete pod -n ``` 11. **Create or apply resources from a YAML file:** + ```bash kubectl apply -f .yaml ``` 12. **View YAML/JSON configuration dump of a resource:** + - **Output YAML:** - ```bash - kubectl get -o yaml - ``` + + ```bash + kubectl get -o yaml + ``` - **Output JSON:** - ```bash - kubectl get -o json - ``` + ```bash + kubectl get -o json + ``` 13. **Get logs from a pod:** + ```bash kubectl logs -n ``` 14. **Stream continuous logs from a pod:** + ```bash kubectl logs -f -n ``` 15. **Get logs for a specific container in a multi-container pod:** + ```bash kubectl logs -c -n ``` -16. **Launch a debug pod for troubleshooting (basic busybox container in interactive terminal):** +16. **Launch a debug pod for troubleshooting (basic busybox container in + interactive terminal):** + ```bash kubectl run debug --image=busybox -it --rm -- /bin/sh ``` -17. **Forcefully delete a pod (if stuck in terminating or other strange states):** +17. **Forcefully delete a pod (if stuck in terminating or other strange + states):** + ```bash kubectl delete pod --grace-period=0 --force -n ``` - ### Service & Endpoint Management 18. **List all services in a namespace:** + ```bash kubectl get svc -n ``` 19. **Get detailed information about a service:** + ```bash kubectl describe svc -n ``` -20. **Forward a local port to a pod (e.g., for local access to service, like database):** +20. **Forward a local port to a pod (e.g., for local access to service, like + database):** + ```bash kubectl port-forward : -n ``` 21. **Test if a service is functioning by listing endpoints:** + ```bash kubectl get endpoints -n ``` - ### Storage Management (Longhorn) 22. **List Longhorn volumes:** + ```bash kubectl get volumes -n longhorn-system ``` 23. **Describe a Longhorn volume:** + ```bash kubectl describe -n longhorn-system ``` 24. **List PersistentVolumeClaims (PVCs) in a namespace:** + ```bash kubectl get pvc -n ``` 25. **Delete a PersistentVolumeClaim (PVC) carefully:** + ```bash kubectl delete pvc -n ``` 26. **Check the status of Longhorn-csi or other stateful sets:** + ```bash kubectl get statefulsets -n longhorn-system ``` 27. **List all StorageClasses (to verify Longhorn's StorageClasses):** + ```bash kubectl get storageclass ``` - ### Namespace Management 28. **List all namespaces:** + ```bash kubectl get namespaces ``` 29. **Switch context to a different namespace:** + ```bash kubectl config set-context --current --namespace= ``` 30. **Create a new namespace:** + ```bash kubectl create namespace ``` 31. **Delete a namespace (use caution):** + ```bash kubectl delete namespace ``` - ### PostgreSQL Management (example provider) -32. **List PostgreSQL-related resources (assuming you have CRDs or a PostgreSQL operator installed):** +32. **List PostgreSQL-related resources (assuming you have CRDs or a PostgreSQL + operator installed):** + ```bash kubectl get postgresql -n ``` 33. **Describe a PostgreSQL instance:** + ```bash kubectl describe postgresql -n ``` 34. **Connect to the PostgreSQL pod for database debugging:** + ```bash kubectl exec -it -n -- psql -U postgres ``` - ### Resource & Utilization Monitoring -35. **View resource usage (CPU/Memory) for nodes and pods (requires metrics-server):** -- **For nodes:** - ```bash - kubectl top nodes - ``` +35. **View resource usage (CPU/Memory) for nodes and pods (requires + metrics-server):** + +- **For nodes:** + + ```bash + kubectl top nodes + ``` - **For pods (in a specific namespace):** - ```bash - kubectl top pods -n - ``` + ```bash + kubectl top pods -n + ``` 36. **Check events for troubleshooting issues in a namespace:** + ```bash kubectl get events -n ``` 37. **Get details about a Deployment:** + ```bash kubectl describe deployment -n ``` - ### Scale Deployments 38. **Scale up/down the number of replicas in a Deployment:** + ```bash kubectl scale deployment --replicas= -n ``` 39. **Autoscale a Deployment based on CPU usage:** + ```bash kubectl autoscale deployment --cpu-percent= --min= --max= -n ``` - ### Debugging & Troubleshooting 40. **Check recent events sorted by timestamp to diagnose issues:** + ```bash kubectl get events --sort-by='.metadata.creationTimestamp' -n ``` 41. **Open a shell session inside a running container:** + ```bash kubectl exec -it -n -- /bin/bash ``` 42. **Run one-off commands in a container (e.g., to run a curl command):** + ```bash kubectl exec -it -n -- curl ``` -43. **Get the history of resource changes for a deployment (e.g., when scaling happens):** +43. **Get the history of resource changes for a deployment (e.g., when scaling + happens):** + ```bash kubectl rollout history deployment -n ``` - ### Service Account Management (API & Permissions) 44. **List all service accounts in a namespace:** + ```bash kubectl get serviceaccounts -n ``` 45. **Get details about a specific service account:** + ```bash kubectl describe serviceaccount -n ``` 46. **Create a service account:** + ```bash kubectl create serviceaccount -n ``` 47. **Delete a service account:** + ```bash kubectl delete serviceaccount -n ``` - ### Configuration Management 48. **View all ConfigMaps in a namespace:** + ```bash kubectl get configmap -n ``` 49. **Describe a specific ConfigMap:** + ```bash kubectl describe configmap -n ``` 50. **List Secrets (API keys, credentials, etc.) in a namespace:** + ```bash kubectl get secrets -n ``` 51. **Decode a base64-encoded Secret to reveal its true content:** + ```bash kubectl get secret -n -o jsonpath="{.data.}" | base64 --decode ``` @@ -302,8 +352,12 @@ kubectl get secret -n -o jsonpath="{.data. --- ### Additional Tips: -- **Backup critical configurations:** Before making any destructive operations like `delete`, always back up your resource configurations or use GitOps processes. -- **Use dry-run mode for testing deletions**: Use `--dry-run=client` to simulate applying or deleting things without actually making changes. -Tools like **`kubectl krew`** can extend the functionality of `kubectl` and provide additional `kubectl` plugins for advanced features. +- **Backup critical configurations:** Before making any destructive operations + like `delete`, always back up your resource configurations or use GitOps + processes. +- **Use dry-run mode for testing deletions**: Use `--dry-run=client` to simulate + applying or deleting things without actually making changes. +Tools like **`kubectl krew`** can extend the functionality of `kubectl` and +provide additional `kubectl` plugins for advanced features. diff --git a/docusaurus/docs/kubernetes/getting-started-with-kubernetes.md b/docusaurus/docs/kubernetes/getting-started-with-kubernetes.md index 57b0d82..2e652a9 100644 --- a/docusaurus/docs/kubernetes/getting-started-with-kubernetes.md +++ b/docusaurus/docs/kubernetes/getting-started-with-kubernetes.md @@ -2,18 +2,30 @@ title: Practice Makes Perfect 🥷🏻🚀 --- - -At this point, our Raspberry Pis should be configured, and we should have a basic understanding of Kubernetes. Most importantly, we know why we're learning all of this. Now, let's move into the practical side of things by using [`kubectl`](https://kubernetes.io/docs/reference/kubectl/) (pronounced "kube-control"). - -Until we start using tools like [`helm`](https://helm.sh/), [`kubectl`](https://kubernetes.io/docs/reference/kubectl/) will be our best friend. As I've mentioned before in previous sections or during my [live streams](https://www.twitch.tv/programmer_network), we should add tools and abstractions only **once** the work becomes repetitive and frustrating. - -In this case, we aren't going to use [`helm`](https://helm.sh/) until we've learned how to use [`kubectl`](https://kubernetes.io/docs/reference/kubectl/) thoroughly and memorized the key commands. Mastering the basics will help us build a strong foundation and make it clear when it's time to introduce new abstractions. +At this point, our Raspberry Pis should be configured, and we should have a +basic understanding of Kubernetes. Most importantly, we know why we're learning +all of this. Now, let's move into the practical side of things by using +[`kubectl`](https://kubernetes.io/docs/reference/kubectl/) (pronounced +"kube-control"). + +Until we start using tools like [`helm`](https://helm.sh/), +[`kubectl`](https://kubernetes.io/docs/reference/kubectl/) will be our best +friend. As I've mentioned before in previous sections or during my +[live streams](https://www.twitch.tv/programmer_network), we should add tools +and abstractions only **once** the work becomes repetitive and frustrating. + +In this case, we aren't going to use [`helm`](https://helm.sh/) until we've +learned how to use [`kubectl`](https://kubernetes.io/docs/reference/kubectl/) +thoroughly and memorized the key commands. Mastering the basics will help us +build a strong foundation and make it clear when it's time to introduce new +abstractions. ## Namespace Setup **Create a new Kubernetes Namespace**: **Command:** + ```bash kubectl create namespace my-apps ``` @@ -27,7 +39,8 @@ kind: Namespace metadata: # The name of the Namespace name: my-apps - ``` +``` + **Apply with:** ```bash @@ -36,7 +49,7 @@ kubectl apply -f namespace.yaml ## Basic Deployment -**Deploy a Simple App**: +**Deploy a Simple App**: **Command:** @@ -74,8 +87,8 @@ spec: ports: # Container port that needs to be exposed - containerPort: 80 - ``` + **Apply with:** ```bash @@ -84,7 +97,7 @@ kubectl apply -f deployment.yaml ## Service Exposure -**Expose the Deployment**: +**Expose the Deployment**: **Command:** @@ -100,8 +113,9 @@ apiVersion: v1 kind: Service metadata: # Name of the Service - name: hello-world - # Namespace to create the service in + name: + hello-world + # Namespace to create the service in namespace: my-apps spec: # Select Pods with this label to expose via the Service @@ -115,16 +129,17 @@ spec: targetPort: 80 # The type of Service; ClusterIP makes it reachable only within the cluster type: ClusterIP - ``` + **Apply with:** + ```bash kubectl apply -f service.yaml ``` ## Verify Deployment -**Verify Using Port-Forward**: +**Verify Using Port-Forward**: ```bash # This is only needed if service type is ClusterIP @@ -138,13 +153,15 @@ kubectl port-forward deployment/hello-world 8081:80 --namespace=my-apps ```bash kubectl delete namespace my-apps ``` + **Or remove individual resources with:** ```bash kubectl delete -f .yaml ``` -**Warning**: Deleting the namespace will remove all resources in that namespace. Ensure you're okay with that before running the command. +**Warning**: Deleting the namespace will remove all resources in that namespace. +Ensure you're okay with that before running the command. ## Exercises @@ -155,13 +172,13 @@ Create a simple Pod running Nginx. ```bash kubectl run nginx-pod --image=nginx --restart=Never ``` - + Examine the Pod. ```bash kubectl describe pod nginx-pod ``` - + Delete the Pod. ```bash @@ -172,7 +189,8 @@ kubectl delete pod nginx-pod ### Exercise 2: Create a Deployment -Create a Deployment for a simple Node.js app (You can use a Docker image like `node:20`). +Create a Deployment for a simple Node.js app (You can use a Docker image like +`node:20`). ```bash kubectl create deployment node-app --image=node:20 @@ -190,7 +208,8 @@ Rollback the Deployment. kubectl rollout undo deployment node-app ``` -**Objective**: Learn how to manage application instances declaratively using Deployments. +**Objective**: Learn how to manage application instances declaratively using +Deployments. ### Exercise 3: Expose the Deployment as a Service @@ -205,9 +224,9 @@ Access the service within the cluster. ```bash kubectl get svc ``` - + Use `kubectl port-forward` to test the service. - + ```bash kubectl port-forward svc/node-app 8080:80 ``` @@ -223,4 +242,4 @@ kubectl delete svc node-app kubectl delete deployment node-app ``` -**Objective**: Understand cleanup and resource management. \ No newline at end of file +**Objective**: Understand cleanup and resource management. diff --git a/docusaurus/docs/kubernetes/k3s-backup-cloudnative-pg.md b/docusaurus/docs/kubernetes/k3s-backup-cloudnative-pg.md new file mode 100644 index 0000000..190c90b --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-backup-cloudnative-pg.md @@ -0,0 +1,679 @@ +--- +title: CloudNative PG Backups +--- + +## Overview + +CloudNative PG provides PostgreSQL-specific backup capabilities that ensure +database consistency and enable point-in-time recovery (PITR). Unlike generic +volume backups, PostgreSQL backups require special handling to maintain +transactional integrity. + +### Why PostgreSQL Backups Are Different + +PostgreSQL databases need consistent snapshots that account for: + +- **Transaction isolation**: Backups must capture a consistent state across all + tables +- **Write-Ahead Logging (WAL)**: Transaction logs enable point-in-time recovery +- **Concurrent operations**: Backups must handle active transactions without + blocking +- **Data integrity**: Ensures no partial transactions or corrupted data + +CloudNative PG handles these requirements automatically through physical backups +and WAL archiving. + +### Backup Types + +CloudNative PG supports two backup mechanisms: + +1. **Base Backups**: Full physical backups of the database cluster (snapshots) +2. **WAL Archiving**: Continuous archiving of Write-Ahead Log files for PITR + +Together, these enable: + +- Full cluster restores from base backups +- Point-in-time recovery to any moment after the first base backup +- Continuous protection with minimal data loss + +## Prerequisites + +Before configuring backups, ensure you have: + +- CloudNative PG operator installed (see + [CloudNative PG Setup](../databases/setup-cloudnative-pg)) +- A PostgreSQL cluster managed by CloudNative PG +- Cloudflare R2 bucket for backup storage (or S3-compatible storage) +- R2 API credentials (Access Key ID and Secret Access Key) + +## Backup Storage Configuration + +### Create R2 Bucket + +1. In your Cloudflare dashboard, go to **R2** and click **Create bucket** +2. Give it a unique name (e.g., `postgres-backups`) +3. Note your **S3 Endpoint URL** from the bucket's main page: + `https://.r2.cloudflarestorage.com` + +### Create R2 API Credentials + +1. On the main R2 page, click **Manage R2 API Tokens** +2. Click **Create API Token** +3. Give it a name (e.g., `postgres-backup-token`) and grant it **Object Read & + Write** permissions +4. Securely copy the **Access Key ID** and **Secret Access Key** + +### Create Backup Storage Secret + +Create a Kubernetes secret with your R2 credentials: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: postgres-backup-credentials + namespace: +type: Opaque +stringData: + AWS_ACCESS_KEY_ID: + AWS_SECRET_ACCESS_KEY: + AWS_ENDPOINTS: https://.r2.cloudflarestorage.com +``` + +Apply the secret: + +```bash +kubectl apply -f postgres-backup-credentials.yaml +``` + +### Configure Cluster for Backups + +Update your PostgreSQL cluster to enable backups. Add backup configuration to +your cluster spec: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster + namespace: postgres-db +spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:15 + + # Backup configuration + backup: + barmanObjectStore: + destinationPath: s3://postgres-backups/my-cluster + s3Credentials: + accessKeyId: + name: postgres-backup-credentials + key: AWS_ACCESS_KEY_ID + secretAccessKey: + name: postgres-backup-credentials + key: AWS_SECRET_ACCESS_KEY + region: auto + endpoint: https://.r2.cloudflarestorage.com + wal: + retention: 7d # Keep WAL files for 7 days + data: + retention: 30d # Keep base backups for 30 days + tags: + cluster: my-postgres-cluster + environment: production +``` + +**Key Configuration Options:** + +- `destinationPath`: S3 path where backups will be stored +- `wal.retention`: How long to keep WAL files (enables PITR) +- `data.retention`: How long to keep base backups +- `tags`: Optional metadata for backup organization + +## Scheduled Backups + +CloudNative PG uses the `Backup` CRD to create scheduled backups. You can create +recurring backups using Kubernetes CronJobs or the operator's built-in +scheduling. + +### Create a Scheduled Backup + +Create a `Backup` resource with a schedule: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Backup +metadata: + name: daily-backup + namespace: postgres-db +spec: + cluster: + name: my-postgres-cluster + method: barmanObjectStore + target: primary + retentionPolicy: '30d' +``` + +### Backup Schedule (4 Times Daily) + +To create backups 4 times per day (every 6 hours), create multiple Backup +resources or use a CronJob: + +**Option 1: Multiple Backup Resources** + +Create 4 separate Backup resources with different schedules: + +```yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup-00 + namespace: postgres-db +spec: + schedule: '0 0 * * *' # Daily at midnight + jobTemplate: + spec: + template: + spec: + containers: + - name: backup + image: ghcr.io/cloudnative-pg/cloudnative-pg:1.26.0 + command: + - /bin/sh + - -c + - | + kubectl create backup backup-$(date +%Y%m%d-%H%M%S) \ + --cluster=my-postgres-cluster \ + --namespace=postgres-db \ + --dry-run=client -o yaml | kubectl apply -f - + restartPolicy: OnFailure +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup-06 + namespace: postgres-db +spec: + schedule: '0 6 * * *' # Daily at 6 AM + # ... same jobTemplate as above +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup-12 + namespace: postgres-db +spec: + schedule: '0 12 * * *' # Daily at noon + # ... same jobTemplate as above +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup-18 + namespace: postgres-db +spec: + schedule: '0 18 * * *' # Daily at 6 PM + # ... same jobTemplate as above +``` + +**Option 2: Single CronJob with Multiple Triggers** + +A simpler approach is to use a single CronJob that runs every 6 hours: + +```yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: postgres-backup + namespace: postgres-db +spec: + schedule: '0 */6 * * *' # Every 6 hours + jobTemplate: + spec: + template: + spec: + serviceAccountName: postgres-backup-sa + containers: + - name: backup + image: bitnami/kubectl:latest + command: + - /bin/sh + - -c + - | + cat < -n postgres-db +``` + +### Verify Backup Completion + +Check backup status: + +```bash +kubectl get backup -n postgres-db -o jsonpath='{.status.phase}' +``` + +Status values: + +- `Pending`: Backup is queued +- `Running`: Backup in progress +- `Completed`: Backup finished successfully +- `Failed`: Backup failed + +### Check Backup in R2 + +Verify backups exist in your R2 bucket: + +```bash +# Using AWS CLI (configured for R2) +aws s3 ls s3://postgres-backups/my-cluster/ --endpoint-url=https://.r2.cloudflarestorage.com +``` + +### Verify WAL Archiving + +Check if WAL archiving is working: + +```bash +# Check cluster status +kubectl get cluster my-postgres-cluster -n postgres-db -o yaml | grep -A 10 backup + +# Check WAL files in R2 +aws s3 ls s3://postgres-backups/my-cluster/wal/ --endpoint-url=https://.r2.cloudflarestorage.com +``` + +## Point-in-Time Recovery (PITR) + +CloudNative PG supports point-in-time recovery, allowing you to restore to any +specific timestamp after your first base backup. + +### PITR Requirements + +- Base backup must exist +- WAL archiving must be enabled and continuous +- WAL files must be available for the time period you want to recover to + +### Restore to Specific Time + +Create a new cluster from a backup with PITR: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster-restored + namespace: postgres-db +spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:15 + + bootstrap: + recovery: + backup: + name: + recoveryTarget: + targetTime: '2024-01-15 14:30:00' # Restore to this timestamp + # Or use: + # targetXID: "" + # targetLSN: "" + # targetName: "" + + source: + + backup: + barmanObjectStore: + # Same backup configuration as original cluster + destinationPath: s3://postgres-backups/my-cluster + s3Credentials: + accessKeyId: + name: postgres-backup-credentials + key: AWS_ACCESS_KEY_ID + secretAccessKey: + name: postgres-backup-credentials + key: AWS_SECRET_ACCESS_KEY + region: auto + endpoint: https://.r2.cloudflarestorage.com +``` + +**Recovery Target Options:** + +- `targetTime`: Restore to a specific timestamp (most common) +- `targetXID`: Restore to a specific transaction ID +- `targetLSN`: Restore to a specific log sequence number +- `targetName`: Restore to a named recovery point + +## Restore Procedures + +### Restore from Base Backup + +To restore a cluster from a base backup: + +1. **Identify the backup to restore:** + + ```bash + kubectl get backups -n postgres-db + ``` + +2. **Create a new cluster from backup:** + + ```yaml + apiVersion: postgresql.cnpg.io/v1 + kind: Cluster + metadata: + name: my-postgres-cluster-restored + namespace: postgres-db + spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:15 + + bootstrap: + recovery: + backup: + name: + source: + + backup: + # Same backup configuration as original + barmanObjectStore: + destinationPath: s3://postgres-backups/my-cluster + s3Credentials: + # ... same credentials as original + ``` + +3. **Apply the cluster:** + + ```bash + kubectl apply -f restored-cluster.yaml + ``` + +4. **Monitor restoration:** + + ```bash + kubectl get cluster my-postgres-cluster-restored -n postgres-db -w + kubectl get pods -n postgres-db -l cnpg.io/cluster=my-postgres-cluster-restored + ``` + +### Restore to Different Namespace + +You can restore a cluster to a different namespace: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: my-postgres-cluster-restored + namespace: postgres-db-restored # Different namespace +spec: + # ... same configuration + bootstrap: + recovery: + backup: + name: + namespace: postgres-db # Original namespace +``` + +### Verify Restored Cluster + +After restoration, verify the cluster: + +```bash +# Check cluster status +kubectl get cluster my-postgres-cluster-restored -n postgres-db + +# Check pods +kubectl get pods -n postgres-db -l cnpg.io/cluster=my-postgres-cluster-restored + +# Connect and verify data +kubectl exec -it my-postgres-cluster-restored-1 -n postgres-db -- psql -U postgres -c "SELECT version();" +``` + +## Backup Monitoring + +### Check Backup Schedule + +```bash +# Check CronJobs +kubectl get cronjobs -n postgres-db + +# Check recent backup jobs +kubectl get jobs -n postgres-db -l job-name=postgres-backup +``` + +### Monitor Backup Logs + +```bash +# Check backup pod logs +kubectl logs -n postgres-db -l cnpg.io/cluster=my-postgres-cluster | grep backup + +# Check CloudNative PG operator logs +kubectl logs -n cnpg-system deployment/cloudnative-pg-controller-manager | grep backup +``` + +### Set Up Alerts + +Consider setting up monitoring alerts for: + +- Backup failures +- WAL archiving failures +- Backup storage quota warnings +- Backup age (if backups are too old) + +## Backup Retention and Cleanup + +CloudNative PG automatically manages backup retention based on your +configuration: + +- **Base backups**: Retained according to `data.retention` (e.g., 30 days) +- **WAL files**: Retained according to `wal.retention` (e.g., 7 days) + +Old backups and WAL files are automatically cleaned up by the operator. + +### Manual Cleanup + +To manually delete a backup: + +```bash +kubectl delete backup -n postgres-db +``` + +**Note:** This only removes the Kubernetes resource. The actual backup files in +R2 are managed by the retention policy. + +## Troubleshooting + +### Backup Stuck in Pending + +1. **Check cluster status:** + + ```bash + kubectl get cluster -n + kubectl describe cluster -n + ``` + +2. **Check backup credentials:** + + ```bash + kubectl get secret postgres-backup-credentials -n + ``` + +3. **Check R2 connectivity:** + + ```bash + # Test from a pod + kubectl run -it --rm test-r2 --image=amazon/aws-cli --restart=Never -- \ + aws s3 ls s3://postgres-backups/ --endpoint-url=https://.r2.cloudflarestorage.com + ``` + +### Backup Fails + +1. **Check backup status:** + + ```bash + kubectl describe backup -n + ``` + +2. **Check operator logs:** + + ```bash + kubectl logs -n cnpg-system deployment/cloudnative-pg-controller-manager | grep + ``` + +3. **Common issues:** + - Invalid R2 credentials + - Insufficient storage space + - Network connectivity issues + - Cluster not in healthy state + +### WAL Archiving Not Working + +1. **Check cluster backup configuration:** + + ```bash + kubectl get cluster -n -o yaml | grep -A 20 backup + ``` + +2. **Verify WAL files in R2:** + + ```bash + aws s3 ls s3://postgres-backups/my-cluster/wal/ --endpoint-url=https://.r2.cloudflarestorage.com + ``` + +3. **Check cluster logs:** + + ```bash + kubectl logs -n | grep wal + ``` + +## Best Practices + +1. **Test Backups Regularly**: Verify backups can be restored +2. **Monitor Backup Success**: Set up alerts for backup failures +3. **Retention Policy**: Balance retention with storage costs +4. **WAL Retention**: Keep WAL files long enough for your RPO (Recovery Point + Objective) +5. **Separate Buckets**: Use dedicated R2 buckets for PostgreSQL backups +6. **Encryption**: Consider enabling encryption at rest in R2 +7. **Documentation**: Document your backup schedule and retention policies +8. **Recovery Testing**: Regularly test restore procedures + +## Integration with Backup Strategy + +CloudNative PG backups complement the three-layer backup strategy: + +- **Layer 1 (etcd)**: Control plane state +- **Layer 2 (Longhorn)**: Volume-level backups +- **Layer 3 (Velero)**: Application-aware backups +- **Layer 4 (CloudNative PG)**: Database-consistent backups with PITR + +**When to use CloudNative PG backups:** + +- Point-in-time recovery needed +- Database corruption requiring precise recovery +- Cross-cluster database migration +- Database-specific recovery scenarios + +**When to use other backups:** + +- Complete application restore (use Velero) +- Volume-level recovery (use Longhorn) +- Cluster-level recovery (use etcd + Velero) + +See [Disaster Recovery](./k3s-backup-disaster-recovery) for guidance on when to +use each backup layer. + +## Related Documentation + +- [Backup Strategy Overview](./k3s-backup) - Complete backup strategy +- [Disaster Recovery](./k3s-backup-disaster-recovery) - Recovery procedures +- [CloudNative PG Setup](../databases/setup-cloudnative-pg) - Operator + installation diff --git a/docusaurus/docs/kubernetes/k3s-backup-disaster-recovery.md b/docusaurus/docs/kubernetes/k3s-backup-disaster-recovery.md new file mode 100644 index 0000000..07232f6 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-backup-disaster-recovery.md @@ -0,0 +1,679 @@ +--- +title: Disaster Recovery Procedures +--- + +## Overview + +This guide covers disaster recovery procedures for the four-layer backup +strategy. Each recovery scenario addresses different types of failures and data +loss situations. + +## Restore Strategy Overview + +Understanding which backup to use and in what order is critical for successful +recovery. This section explains the restore decision process and priority order. + +### Backup Layer Purposes + +Each backup layer protects different aspects of your cluster: + +1. **etcd Snapshots (Layer 1)**: Control plane state + - Kubernetes API objects + - Cluster configuration + - Resource definitions + - **Use when**: Cluster control plane is corrupted or lost + +2. **Longhorn Backups (Layer 2)**: Persistent volume data + - Raw volume snapshots + - Independent of cluster state + - **Use when**: Volume data is corrupted or lost, but cluster is intact + +3. **Velero Backups (Layer 3)**: Application-aware backups + - Kubernetes resources + volumes together + - Application configurations + - **Use when**: Applications need to be restored, or complete namespace + recovery + +4. **CloudNative PG Backups (Layer 4)**: Database-consistent backups + - PostgreSQL base backups + WAL archiving + - Point-in-time recovery (PITR) + - **Use when**: Database corruption, PITR needed, or database-specific + recovery + +### Restore Order and Priority + +When recovering from a complete cluster failure, follow this order: + +1. **Restore Control Plane (etcd)** - Must be first if cluster is gone +2. **Restore Infrastructure (Velero)** - Longhorn, Velero, ArgoCD, etc. +3. **Restore Applications (Velero)** - Your workloads +4. **Verify/Restore Databases (CloudNative PG)** - If Velero didn't capture + correctly or PITR needed + +**Important**: Do NOT restore all layers simultaneously. Restore in order to +avoid conflicts and ensure dependencies are met. + +### Restore Decision Matrix + +| Failure Type | Primary Backup | Secondary Backup | Restore Order | +| -------------------------------- | -------------- | ---------------- | -------------------------- | +| Complete cluster loss | etcd | Velero | etcd → Velero (infra) → | +| | | | Velero (apps) → Verify DBs | +| Control plane corruption | etcd | - | etcd snapshot restore | +| Application failure | Velero | - | Velero restore | +| Volume data loss | Longhorn | Velero | Longhorn restore OR Velero | +| Database corruption | CloudNative PG | Longhorn | CloudNative PG PITR → Base | +| | | | backup → Longhorn | +| Database point-in-time recovery | CloudNative PG | - | CloudNative PG PITR | +| Single namespace loss | Velero | - | Velero namespace restore | +| Infrastructure component failure | Velero | - | Velero selective restore | + +### When NOT to Restore All Layers + +Avoid restoring multiple layers simultaneously: + +- **Don't restore etcd + Velero together**: Restore etcd first, then Velero +- **Don't restore Longhorn + Velero volumes together**: Choose one method +- **Don't restore CloudNative PG + Velero databases together**: Use CloudNative + PG for database recovery, Velero for application resources + +### Quick Decision Guide + +**Ask yourself:** + +1. **Is the cluster completely gone?** + - Yes → Start with etcd snapshot restore + - No → Skip to step 2 + +2. **Is the infrastructure (Longhorn, Velero, ArgoCD) broken?** + - Yes → Restore infrastructure via Velero + - No → Skip to step 3 + +3. **Are applications broken?** + - Yes → Restore applications via Velero + - No → Skip to step 4 + +4. **Is the database corrupted or do you need PITR?** + - Yes → Use CloudNative PG backup restore + - No → Verify database health + +## Recovery Scenarios + +### Scenario 1: Complete Cluster Failure + +This is the worst-case scenario where the entire cluster is lost and needs to be +rebuilt from scratch. + +#### Prerequisites + +- Access to Cloudflare R2 bucket with backups +- R2 credentials for all four backup layers +- Fresh server(s) for cluster rebuild + +#### Recovery Steps (In Order) + +**Step 1: Restore Control Plane (etcd)** + +1. **Reinstall K3s:** + + ```bash + curl -sfL https://get.k3s.io | sh - + sudo k3s kubectl get nodes + ``` + +2. **Restore etcd (if needed):** + + ```bash + sudo k3s server \ + --cluster-init \ + --etcd-s3 \ + --etcd-s3-bucket k3s-backup-repository \ + --etcd-s3-folder k3s-etcd-snapshots \ + --etcd-s3-endpoint ".r2.cloudflarestorage.com" \ + --etcd-s3-access-key "" \ + --etcd-s3-secret-key "" \ + --cluster-reset-restore-path + ``` + + This restores the control plane state from the etcd snapshot. + +**Step 2: Restore Infrastructure Components** + +3. **Reinstall Longhorn:** + + ```bash + helm repo add longhorn https://charts.longhorn.io + helm repo update + helm install longhorn longhorn/longhorn \ + --namespace longhorn-system \ + --create-namespace \ + --set persistence.defaultClass=true + ``` + + Wait for Longhorn pods to be running: + + ```bash + kubectl get pods -n longhorn-system --watch + ``` + +4. **Reinstall Velero:** + + ```bash + kubectl create namespace velero + kubectl apply -f velero-r2-secret.yaml + helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts + helm install velero vmware-tanzu/velero --namespace velero -f velero-values.yaml + ``` + + Make sure you have the `velero-r2-secret.yaml` and `velero-values.yaml` files + from your original setup. + +5. **Verify Velero can see backups:** + + ```bash + # Wait a minute for Velero to sync + velero backup get + ``` + + You should see your previous backups listed. + +**Step 3: Restore Applications** + +6. **Restore from Velero backup:** + + ```bash + velero restore create --from-backup --wait + ``` + +**Step 4: Verify and Restore Databases** + +7. **Verify restored applications:** + + ```bash + kubectl get pods --all-namespaces + kubectl get pvc --all-namespaces + ``` + +8. **Check database cluster status:** + + ```bash + kubectl get clusters.postgresql.cnpg.io -A + kubectl get pods -A -l cnpg.io/cluster + ``` + +9. **If databases need restoration:** + - If Velero restored databases correctly, verify they're healthy + - If databases are corrupted or missing, restore from CloudNative PG backups + (see + [PostgreSQL Database Recovery](#scenario-6-postgresql-database-recovery) + below) + - If point-in-time recovery is needed, use CloudNative PG PITR + +### Scenario 2: Control Plane Corruption + +When the etcd database is corrupted but the cluster is still running. + +#### Recovery Steps + +1. **Stop k3s on all nodes:** + + ```bash + sudo systemctl stop k3s + ``` + +2. **Restore from etcd snapshot:** + + ```bash + sudo k3s server \ + --cluster-init \ + --etcd-s3 \ + --etcd-s3-bucket k3s-backup-repository \ + --etcd-s3-folder k3s-etcd-snapshots \ + --etcd-s3-endpoint ".r2.cloudflarestorage.com" \ + --etcd-s3-access-key "" \ + --etcd-s3-secret-key "" \ + --cluster-reset-restore-path + ``` + +3. **Restart k3s on other nodes:** + ```bash + sudo systemctl start k3s + ``` + +### Scenario 3: Volume Data Loss + +When persistent volume data is lost but the cluster is intact. + +#### Recovery Steps + +1. **Identify the affected volumes:** + + ```bash + kubectl get pvc --all-namespaces + ``` + +2. **Access Longhorn UI:** + + ```bash + kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 + ``` + + Open `http://localhost:8080` + +3. **Restore from Longhorn backup:** + - Navigate to Backups + - Find the backup for the affected volume + - Create a new volume from the backup + - Update the PVC to use the restored volume + +4. **Or restore via Velero:** + ```bash + velero restore create --from-backup \ + --include-namespaces \ + --wait + ``` + +### Scenario 4: Single Application Failure + +When a single application needs to be restored. + +#### Recovery Steps + +**Option 1: Restore from Velero (Recommended)** + +```bash +velero restore create --from-backup \ + --include-namespaces \ + --wait +``` + +**Option 2: Restore from Longhorn** + +1. Restore the application's volumes from Longhorn backups +2. Recreate the application manifests +3. Update PVCs to point to restored volumes + +### Scenario 5: Partial Namespace Recovery + +When specific resources in a namespace need to be restored. + +#### Recovery Steps + +```bash +velero restore create --from-backup \ + --include-namespaces \ + --include-resources deployments,services,configmaps \ + --wait +``` + +### Scenario 6: PostgreSQL Database Recovery + +When PostgreSQL databases managed by CloudNative PG need to be restored. This +scenario covers database corruption, point-in-time recovery, and complete +database cluster restoration. + +#### When to Use CloudNative PG Backups vs Other Backups + +**Use CloudNative PG backups when:** + +- Database corruption is detected +- Point-in-time recovery (PITR) is needed +- Database-specific recovery is required +- Velero backup didn't capture database correctly +- Cross-cluster database migration + +**Use Velero/Longhorn backups when:** + +- Complete application restore (including database) +- Volume-level recovery is sufficient +- Database is part of larger application recovery + +#### Recovery Options + +**Option 1: Point-in-Time Recovery (PITR) - Recommended for Corruption** + +If you need to recover to a specific time before corruption occurred: + +1. **Identify the backup and target time:** + + ```bash + kubectl get backups -n + # Note the backup name and determine the recovery target time + ``` + +2. **Create a new cluster with PITR:** + + ```yaml + apiVersion: postgresql.cnpg.io/v1 + kind: Cluster + metadata: + name: -restored + namespace: + spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:15 + + bootstrap: + recovery: + backup: + name: + recoveryTarget: + targetTime: '2024-01-15 14:30:00' # Time before corruption + + backup: + # Same backup configuration as original cluster + barmanObjectStore: + destinationPath: s3://postgres-backups/ + s3Credentials: + accessKeyId: + name: postgres-backup-credentials + key: AWS_ACCESS_KEY_ID + secretAccessKey: + name: postgres-backup-credentials + key: AWS_SECRET_ACCESS_KEY + region: auto + endpoint: https://.r2.cloudflarestorage.com + ``` + +3. **Apply the restored cluster:** + + ```bash + kubectl apply -f restored-cluster.yaml + ``` + +4. **Monitor restoration:** + + ```bash + kubectl get cluster -restored -n -w + kubectl get pods -n -l cnpg.io/cluster=-restored + ``` + +**Option 2: Restore from Base Backup** + +If PITR is not needed, restore from a base backup: + +1. **List available backups:** + + ```bash + kubectl get backups -n + ``` + +2. **Create cluster from backup:** + + ```yaml + apiVersion: postgresql.cnpg.io/v1 + kind: Cluster + metadata: + name: -restored + namespace: + spec: + instances: 3 + imageName: ghcr.io/cloudnative-pg/postgresql:15 + + bootstrap: + recovery: + backup: + name: + source: + + backup: + # Same backup configuration as original + barmanObjectStore: + # ... same configuration + ``` + +3. **Apply and verify:** + + ```bash + kubectl apply -f restored-cluster.yaml + kubectl get cluster -restored -n + ``` + +**Option 3: Restore to Different Namespace** + +If you need to restore to a different namespace: + +```yaml +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: -restored + namespace: # Different namespace +spec: + # ... same configuration + bootstrap: + recovery: + backup: + name: + namespace: # Original namespace +``` + +#### Recovery Steps for Database Corruption + +1. **Stop the corrupted cluster (if needed):** + + ```bash + kubectl delete cluster -n + ``` + +2. **Choose recovery method:** + - If you know the exact time before corruption → Use PITR (Option 1) + - If you need the latest backup → Use base backup (Option 2) + +3. **Create restored cluster** using one of the options above + +4. **Update application connections:** + - Update service endpoints if cluster name changed + - Update connection strings in application configs + - Verify applications can connect to restored database + +5. **Verify data integrity:** + + ```bash + # Connect to restored database + kubectl exec -it -restored-1 -n -- \ + psql -U postgres -c "SELECT COUNT(*) FROM ;" + + # Compare with expected data + # Run application-specific data validation + ``` + +#### Recovery Decision Flow + +``` +Database Issue Detected + │ + ├─ Need PITR? ──Yes──> Use CloudNative PG PITR (Option 1) + │ + └─No──> Latest backup sufficient? ──Yes──> Use CloudNative PG base backup (Option 2) + │ + └─No──> Try Longhorn volume restore + │ + └─No──> Use Velero backup (last resort) +``` + +## Recovery Verification + +After any recovery operation, verify the following: + +### Cluster Health + +```bash +# Check nodes +kubectl get nodes + +# Check pods +kubectl get pods --all-namespaces + +# Check services +kubectl get svc --all-namespaces +``` + +### Application Functionality + +1. **Test application endpoints:** + + ```bash + kubectl get ingress --all-namespaces + curl + ``` + +2. **Verify data integrity:** + - Check application logs + - Verify database connections + - Test critical functionality + +3. **Check persistent volumes:** + ```bash + kubectl get pvc --all-namespaces + kubectl get volumes -n longhorn-system + ``` + +### Backup System Health + +```bash +# Check etcd snapshots +sudo k3s etcd-snapshot list + +# Check Longhorn backups +kubectl get recurringjobs -n longhorn-system +kubectl get backups -n longhorn-system + +# Check Velero backups +velero backup get +kubectl get schedules -n velero + +# Check CloudNative PG backups +kubectl get backups -A +kubectl get cronjobs -A | grep postgres-backup +``` + +## Recovery Testing + +Regular recovery testing is crucial to ensure your backup strategy works. + +### Test Schedule + +- **Monthly**: Test restoring a single application +- **Quarterly**: Test restoring a namespace +- **Annually**: Test complete cluster recovery + +### Test Procedure + +1. **Create a test namespace:** + + ```bash + kubectl create namespace backup-test + ``` + +2. **Deploy a test application:** + + ```bash + kubectl apply -f test-app.yaml -n backup-test + ``` + +3. **Create a backup:** + + ```bash + velero backup create test-backup --include-namespaces backup-test --wait + ``` + +4. **Delete the test application:** + + ```bash + kubectl delete namespace backup-test + ``` + +5. **Restore from backup:** + + ```bash + velero restore create --from-backup test-backup --wait + ``` + +6. **Verify restoration:** + + ```bash + kubectl get all -n backup-test + ``` + +7. **Clean up:** + ```bash + kubectl delete namespace backup-test + velero backup delete test-backup + ``` + +## Recovery Best Practices + +1. **Document Recovery Procedures**: Keep detailed documentation of recovery + steps +2. **Regular Testing**: Test recovery procedures regularly +3. **Backup Verification**: Verify backups before you need them +4. **Recovery Runbooks**: Create runbooks for common recovery scenarios +5. **Communication Plan**: Have a plan for communicating during disasters +6. **Recovery Time Objectives**: Define RTO (Recovery Time Objective) and RPO + (Recovery Point Objective) +7. **Backup Monitoring**: Set up alerts for backup failures +8. **Documentation**: Keep recovery documentation up to date + +## Troubleshooting Recovery Issues + +### Velero Restore Fails + +1. **Check restore status:** + + ```bash + velero restore describe + ``` + +2. **Review restore logs:** + + ```bash + velero restore logs + ``` + +3. **Check resource conflicts:** + - Some resources may already exist + - Use `--restore-resource-filters` to exclude conflicting resources + +### Longhorn Volume Restore Fails + +1. **Check volume status:** + + ```bash + kubectl get volumes -n longhorn-system + kubectl describe volume -n longhorn-system + ``` + +2. **Verify backup exists:** + - Check Longhorn UI → Backups + - Verify backup is accessible + +3. **Check storage space:** + ```bash + kubectl get nodes + # Check available disk space on nodes + ``` + +### etcd Restore Fails + +1. **Verify snapshot exists:** + - Check R2 bucket for snapshot files + - Verify snapshot name is correct + +2. **Check k3s logs:** + + ```bash + sudo journalctl -u k3s -f + ``` + +3. **Verify R2 credentials:** + - Check R2 access key and secret + - Verify bucket permissions + +## References + +- **[etcd Snapshots](./k3s-backup-etcd)** - etcd backup and restore +- **[Longhorn Backups](./k3s-backup-longhorn)** - Volume backup and restore +- **[Velero Backups](./k3s-backup-velero)** - Cluster backup and restore +- **[CloudNative PG Backups](./k3s-backup-cloudnative-pg)** - PostgreSQL backup + and restore diff --git a/docusaurus/docs/kubernetes/k3s-backup-etcd.md b/docusaurus/docs/kubernetes/k3s-backup-etcd.md new file mode 100644 index 0000000..5c984f6 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-backup-etcd.md @@ -0,0 +1,265 @@ +--- +title: K3s etcd Snapshots +--- + +## Overview + +K3s uses an embedded etcd database to store cluster state. Regular snapshots +ensure you can recover the control plane in case of cluster failure. This layer +protects your Kubernetes API objects, cluster configuration, and control plane +state. + +## Configuration + +The etcd backup is configured via Ansible automation. The configuration is +stored in `/etc/rancher/k3s/config.yaml` on each control plane node: + +```yaml +etcd-s3: true +etcd-s3-bucket: k3s-backup-repository +etcd-s3-folder: k3s-etcd-snapshots +etcd-s3-endpoint: '.r2.cloudflarestorage.com' +etcd-s3-access-key: 'YOUR_R2_ACCESS_KEY_ID' +etcd-s3-secret-key: 'YOUR_R2_SECRET_ACCESS_KEY' +etcd-snapshot-schedule-cron: '0 1 * * *' +etcd-snapshot-retention: 5 +``` + +### Configuration Parameters + +- `etcd-s3`: Enable S3-compatible storage for etcd snapshots +- `etcd-s3-bucket`: Your Cloudflare R2 bucket name +- `etcd-s3-folder`: Folder path within the bucket for snapshots +- `etcd-s3-endpoint`: R2 endpoint URL +- `etcd-s3-access-key`: R2 access key ID +- `etcd-s3-secret-key`: R2 secret access key +- `etcd-snapshot-schedule-cron`: Cron expression for automatic snapshots + (default: daily at 1:00 AM) +- `etcd-snapshot-retention`: Number of snapshots to retain (default: 5) + +## Ansible Automation + +If you use Ansible for automation, create a playbook to configure all master +nodes. Here's an example playbook structure: + +```yaml +--- +- name: Configure K3s server node + hosts: master_nodes + become: true + tasks: + - name: Ensure K3s config directory exists + file: + path: /etc/rancher/k3s + state: directory + owner: root + group: root + mode: '0755' + + - name: Place K3s config file with etcd backup settings + copy: + dest: /etc/rancher/k3s/config.yaml + owner: root + group: root + mode: '0644' + content: | + etcd-s3: true + etcd-s3-bucket: your-backup-bucket + etcd-s3-folder: k3s-etcd-snapshots + etcd-s3-endpoint: ".r2.cloudflarestorage.com" + etcd-s3-access-key: "YOUR_R2_ACCESS_KEY_ID" + etcd-s3-secret-key: "YOUR_R2_SECRET_ACCESS_KEY" + etcd-snapshot-schedule-cron: "0 1 * * *" + etcd-snapshot-retention: 5 + + - name: Restart K3s to apply configuration + systemd: + name: k3s + state: restarted +``` + +Run the playbook: + +```bash +ansible-playbook -i inventory.yml playbooks/etcd-cloudflare-r2.yaml --ask-become-pass +``` + +This playbook: + +- Creates the `/etc/rancher/k3s` directory if it doesn't exist +- Places the configuration file with R2 credentials +- Restarts the k3s service to apply changes + +### Manual Configuration + +If you prefer to configure manually on each control plane node: + +1. **Create the config directory:** + + ```bash + sudo mkdir -p /etc/rancher/k3s + ``` + +2. **Create the config file:** + + ```bash + sudo nano /etc/rancher/k3s/config.yaml + ``` + +3. **Add the configuration** (see Configuration section above) + +4. **Restart k3s:** + ```bash + sudo systemctl restart k3s + ``` + +## Manual Snapshot + +You can trigger a manual snapshot at any time: + +```bash +sudo k3s etcd-snapshot save +``` + +This creates an immediate snapshot and uploads it to your R2 bucket. + +### Snapshot with Custom Name + +```bash +sudo k3s etcd-snapshot save my-custom-snapshot-name +``` + +## Verification + +1. **Check snapshot files in R2:** + - Log into Cloudflare dashboard + - Navigate to your R2 bucket + - Check the `k3s-etcd-snapshots` folder for snapshot files + +2. **List local snapshots:** + + ```bash + sudo k3s etcd-snapshot list + ``` + +3. **Check snapshot schedule:** + ```bash + sudo cat /etc/rancher/k3s/config.yaml | grep etcd-snapshot-schedule + ``` + +## Restore from etcd Snapshot + +To restore a cluster from an etcd snapshot: + +### Prerequisites + +- Fresh k3s installation (or cluster reset) +- Access to R2 bucket with snapshots +- R2 credentials + +### Restore Procedure + +1. **List available snapshots in R2:** + - Check your Cloudflare R2 bucket + - Note the snapshot file name + +2. **On a fresh k3s installation, restore the snapshot:** + + ```bash + sudo k3s server \ + --cluster-init \ + --etcd-s3 \ + --etcd-s3-bucket k3s-backup-repository \ + --etcd-s3-folder k3s-etcd-snapshots \ + --etcd-s3-endpoint ".r2.cloudflarestorage.com" \ + --etcd-s3-access-key "YOUR_R2_ACCESS_KEY_ID" \ + --etcd-s3-secret-key "YOUR_R2_SECRET_ACCESS_KEY" \ + --cluster-reset-restore-path + ``` + +3. **Verify cluster state:** + ```bash + sudo k3s kubectl get nodes + sudo k3s kubectl get pods --all-namespaces + ``` + +### Cluster Reset + +If you need to reset the cluster to restore from a snapshot: + +```bash +sudo k3s-killall.sh +sudo k3s-uninstall.sh +# Then reinstall and restore as shown above +``` + +## Troubleshooting + +### Snapshot Not Created + +1. **Verify k3s config:** + + ```bash + sudo cat /etc/rancher/k3s/config.yaml + ``` + +2. **Check k3s logs:** + + ```bash + sudo journalctl -u k3s -f + ``` + +3. **Test R2 connectivity:** + - Verify R2 credentials are correct + - Check network connectivity to R2 endpoint + - Verify bucket exists and is accessible + +### Snapshot Upload Fails + +1. **Check R2 credentials:** + - Verify access key and secret key are correct + - Ensure the API token has Object Read & Write permissions + +2. **Verify bucket configuration:** + - Check bucket name matches configuration + - Verify endpoint URL is correct + +3. **Check network:** + ```bash + curl -I https://.r2.cloudflarestorage.com + ``` + +### Snapshot Not Scheduled + +1. **Verify cron expression:** + + ```bash + sudo cat /etc/rancher/k3s/config.yaml | grep etcd-snapshot-schedule-cron + ``` + +2. **Check k3s service status:** + + ```bash + sudo systemctl status k3s + ``` + +3. **Review k3s logs for snapshot activity:** + ```bash + sudo journalctl -u k3s | grep etcd-snapshot + ``` + +## Best Practices + +1. **Regular Testing**: Periodically test restoring from snapshots to ensure + they work +2. **Monitor Retention**: Adjust retention based on your needs (default: 5 + snapshots) +3. **Secure Credentials**: Store R2 credentials securely, consider using Vault +4. **Document Snapshots**: Keep a log of important snapshots (e.g., before major + upgrades) +5. **Multiple Buckets**: Consider separate buckets for different environments + +## References + +- **K3s etcd documentation**: https://docs.k3s.io/backup-restore +- **K3s backup guide**: https://docs.k3s.io/backup-restore/backup diff --git a/docusaurus/docs/kubernetes/k3s-backup-longhorn.md b/docusaurus/docs/kubernetes/k3s-backup-longhorn.md new file mode 100644 index 0000000..06c9d01 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-backup-longhorn.md @@ -0,0 +1,355 @@ +--- +title: Longhorn Volume Backups +--- + +## Overview + +Longhorn provides native backup functionality for persistent volumes. This +ensures your application data is protected independently of the cluster state. +Longhorn backups capture the actual data stored in your persistent volumes and +store them in object storage (Cloudflare R2). + +## Configuration Files + +The Longhorn backup system consists of three components that you need to create: + +1. **Secret** - R2 credentials +2. **BackupTarget** - Backup destination configuration +3. **RecurringJob** - Scheduled backup job + +## Setup Steps + +1. **Create the R2 Secret:** + + Create a file named `longhorn-r2-secret.yaml` with the following content: + + ```yaml + apiVersion: v1 + kind: Secret + metadata: + name: r2-longhorn-secret + namespace: longhorn-system + type: Opaque + stringData: + AWS_ACCESS_KEY_ID: 'YOUR_R2_ACCESS_KEY_ID' + AWS_SECRET_ACCESS_KEY: 'YOUR_R2_SECRET_ACCESS_KEY' + AWS_ENDPOINTS: 'https://.r2.cloudflarestorage.com' + ``` + + Apply the secret: + + ```bash + kubectl apply -f longhorn-r2-secret.yaml + ``` + +2. **Configure the Backup Target:** + + Create a file named `backup-target.yaml` with the content shown in the + "Backup Target Configuration" section below, then apply it: + + ```bash + kubectl apply -f backup-target.yaml + ``` + +3. **Create the Daily Backup Job:** + + Create a file named `daily-backup-job.yaml` with the content shown in the + "Recurring Job Configuration" section below, then apply it: + + ```bash + kubectl apply -f daily-backup-job.yaml + ``` + +## Backup Target Configuration + +The `BackupTarget` resource configures where backups are stored: + +```yaml +apiVersion: longhorn.io/v1beta2 +kind: BackupTarget +metadata: + name: default + namespace: longhorn-system +spec: + backupTargetURL: 's3://k3s-backup-repository@auto/' + credentialSecret: 'r2-longhorn-secret' +``` + +### Backup Target URL Format + +For Cloudflare R2: + +``` +s3://@auto/ +``` + +- `@auto` is the region (R2 doesn't use regions, so `auto` works) +- Optional path can be added for organization (e.g., + `s3://bucket@auto/longhorn-backups/`) + +## Recurring Job Configuration + +The `RecurringJob` defines the backup schedule: + +```yaml +apiVersion: longhorn.io/v1beta2 +kind: RecurringJob +metadata: + name: daily-volume-backups + namespace: longhorn-system +spec: + task: 'backup' + cron: '0 2 * * *' # Daily at 2:00 AM + retain: 7 # Keep 7 backups + concurrency: 1 # One job at a time +``` + +### Recurring Job Parameters + +- `task`: Type of task (`backup`, `snapshot`, or `snapshot-cleanup`) +- `cron`: Cron expression for schedule +- `retain`: Number of backups to keep +- `concurrency`: How many jobs can run simultaneously + +## Volume Labeling + +For volumes to be included in the daily backup, they must have the appropriate +label. There are several ways to ensure your volumes are backed up: + +### Option 1: Label PVCs at Creation Time (Recommended) + +When creating PVCs via Helm charts or manifests, add the backup label: + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-app-data + namespace: my-app + labels: + recurring-job.longhorn.io/daily-volume-backups: enabled +spec: + accessModes: + - ReadWriteOnce + storageClassName: longhorn + resources: + requests: + storage: 10Gi +``` + +### Option 2: Label Existing PVCs + +For existing PVCs, add the label manually: + +```bash +# Label a specific PVC +kubectl label pvc -n recurring-job.longhorn.io/daily-volume-backups=enabled + +# Label all PVCs using Longhorn storage class +kubectl get pvc --all-namespaces -l storageClassName=longhorn -o name | \ + xargs -I {} kubectl label {} recurring-job.longhorn.io/daily-volume-backups=enabled +``` + +### Option 3: Label Longhorn Volumes Directly + +If PVC labels don't propagate to Longhorn volumes, label them directly: + +```bash +kubectl label volume -n longhorn-system recurring-job.longhorn.io/daily-volume-backups=enabled +``` + +### Option 4: Apply to All Volumes + +To apply the recurring job to all volumes (not recommended for production), +leave the `groups` and `labels` fields empty in the RecurringJob spec. + +## Verification + +1. **Check volume labels:** + + ```bash + kubectl get volumes -n longhorn-system --show-labels | grep recurring-job + ``` + +2. **Monitor backup jobs:** + + ```bash + kubectl get jobs -n longhorn-system + ``` + +3. **Check backup logs:** + + ```bash + kubectl logs job/ -n longhorn-system + ``` + +4. **Verify backups in R2 bucket:** + - Check your Cloudflare R2 bucket for backup files + +5. **Check recurring jobs:** + + ```bash + kubectl get recurringjobs -n longhorn-system + ``` + +6. **Check backup status in Longhorn UI:** + ```bash + kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 + ``` + Navigate to `http://localhost:8080` → Backups + +## Restore from Longhorn Backup + +### Via Longhorn UI + +1. **Access Longhorn UI:** + + ```bash + kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 + ``` + + Open `http://localhost:8080` in your browser + +2. **Navigate to Backups** in the Longhorn UI + +3. **Select the backup** you want to restore + +4. **Create a new volume** from the backup: + - Click on the backup + - Click "Create Volume" + - Choose a name for the restored volume + +5. **Update your PVC** to use the restored volume: + - Delete the old PVC (if needed) + - Create a new PVC pointing to the restored volume + - Or update your application to use the restored volume + +### Via kubectl + +1. **List available backups:** + + ```bash + kubectl get backups -n longhorn-system + ``` + +2. **Create a volume from backup:** + ```bash + kubectl create -f - < manual-backup-$(date +%s) -n longhorn-system + ``` + +## Troubleshooting + +### Backups Not Running + +1. **Check recurring job status:** + + ```bash + kubectl get recurringjobs -n longhorn-system + kubectl describe recurringjob daily-volume-backups -n longhorn-system + ``` + +2. **Verify volume labels:** + + ```bash + kubectl get volumes -n longhorn-system --show-labels + ``` + +3. **Check Longhorn manager logs:** + ```bash + kubectl logs -n longhorn-system -l app=longhorn-manager | grep backup + ``` + +### Backup Target Not Configured + +1. **Check backup target:** + + ```bash + kubectl get backuptarget -n longhorn-system + kubectl describe backuptarget default -n longhorn-system + ``` + +2. **Verify secret exists:** + + ```bash + kubectl get secret r2-longhorn-secret -n longhorn-system + ``` + +3. **Test backup target connection:** + - Use Longhorn UI → Settings → Backup + - Click "Test" to verify connection + +### Backup Failures + +1. **Check backup job logs:** + + ```bash + kubectl get jobs -n longhorn-system + kubectl logs job/ -n longhorn-system + ``` + +2. **Verify R2 credentials:** + + ```bash + kubectl get secret r2-longhorn-secret -n longhorn-system -o yaml + ``` + +3. **Check network connectivity:** + - Verify R2 endpoint is reachable + - Check firewall rules + +### Volume Not Backing Up + +1. **Verify volume has the label:** + + ```bash + kubectl get volume -n longhorn-system --show-labels + ``` + +2. **Check if volume is attached:** + + ```bash + kubectl get volume -n longhorn-system -o yaml | grep attached + ``` + +3. **Verify recurring job matches:** + ```bash + kubectl get recurringjob daily-volume-backups -n longhorn-system -o yaml + ``` + +## Best Practices + +1. **Label Strategy**: Use consistent labeling for all production volumes +2. **Retention Policy**: Adjust retention based on your R2 storage costs and + needs +3. **Test Restores**: Regularly test restoring volumes from backups +4. **Monitor Costs**: Keep an eye on R2 storage usage and costs +5. **Backup Verification**: Set up monitoring to alert on backup failures +6. **Separate Environments**: Use different backup targets or paths for dev/prod + +## References + +- **Longhorn documentation**: https://longhorn.io/docs/ +- **Longhorn backup guide**: + https://longhorn.io/docs/1.5.3/snapshots-and-backups/ diff --git a/docusaurus/docs/kubernetes/k3s-backup-velero.md b/docusaurus/docs/kubernetes/k3s-backup-velero.md new file mode 100644 index 0000000..bb571e8 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-backup-velero.md @@ -0,0 +1,398 @@ +--- +title: Velero Cluster Backups +--- + +## Overview + +Velero provides application-aware backups that capture both Kubernetes resources +and persistent volume data using CSI snapshots. This layer provides the most +comprehensive backup solution, allowing you to restore entire applications or +the entire cluster. + +## Installation + +Velero is installed via Helm with custom values. First, create the necessary +configuration files: + +1. **Create the R2 secret file** (`velero-r2-secret.yaml`): + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: velero-r2-secret + namespace: velero +type: Opaque +stringData: + cloud: | + [default] + aws_access_key_id = YOUR_R2_ACCESS_KEY_ID + aws_secret_access_key = YOUR_R2_SECRET_ACCESS_KEY +``` + +2. **Create the Helm values file** (`velero-values.yaml`) - see Configuration + section below + +3. **Install Velero:** + +```bash +kubectl create namespace velero +kubectl apply -f velero-r2-secret.yaml +helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts +helm repo update +helm install velero vmware-tanzu/velero --namespace velero -f velero-values.yaml +``` + +### Prerequisites + +- Longhorn installed (for CSI snapshots) +- Cloudflare R2 bucket created +- R2 API credentials + +## Configuration + +Create a `velero-values.yaml` file with the following configuration: + +```yaml +credentials: + useSecret: true + existingSecret: velero-r2-secret + +configuration: + backupStorageLocation: + - name: default + provider: aws + bucket: 'your-velero-backup-bucket' + config: + s3Url: 'https://.r2.cloudflarestorage.com' + region: auto + prefix: 'velero' + volumeSnapshotLocation: + - name: default + provider: velero.io/csi + features: + - EnableCSI + +initContainers: + - name: velero-plugin-for-aws + image: velero/velero-plugin-for-aws:v1.12.1 + volumeMounts: + - mountPath: /target + name: plugins +``` + +Replace: + +- `your-velero-backup-bucket` with your R2 bucket name +- `` with your Cloudflare account ID + +## Daily Backup Schedule + +Create a `daily-cluster-backup.yaml` file with the following schedule: + +```yaml +apiVersion: velero.io/v1 +kind: Schedule +metadata: + name: daily-cluster-backup + namespace: velero +spec: + schedule: '0 3 * * *' # Daily at 3:00 AM + template: + ttl: '336h0m0s' # Retain for 14 days + excludedNamespaces: + - kube-system + - velero + - longhorn-system +``` + +Apply the schedule: + +```bash +kubectl apply -f daily-cluster-backup.yaml +``` + +### Schedule Configuration + +- `schedule`: Cron expression for backup timing +- `ttl`: Time-to-live for backups (336h = 14 days) +- `excludedNamespaces`: Namespaces to exclude from backup + +## Verification + +1. **Check Velero pods:** + + ```bash + kubectl get pods -n velero + ``` + + All pods should be in `Running` state. + +2. **List backup schedules:** + + ```bash + kubectl get schedules -n velero + ``` + +3. **List backups:** + + ```bash + velero backup get + ``` + +4. **Check backup details:** + + ```bash + velero backup describe + ``` + +5. **Check Velero server logs:** + ```bash + kubectl logs -n velero deployment/velero + ``` + +## Installing Velero CLI + +To interact with Velero, install the CLI client: + +```bash +# Download and extract (Linux) +wget https://github.com/vmware-tanzu/velero/releases/download/v1.16.1/velero-v1.16.1-linux-amd64.tar.gz +tar -xvf velero-v1.16.1-linux-amd64.tar.gz +sudo mv velero-v1.16.1-linux-amd64/velero /usr/local/bin/velero + +# Verify installation +velero version +``` + +### Configure Velero CLI + +Point the CLI to your Velero server: + +```bash +velero client config set server=http://localhost:8085 +``` + +Or use port-forwarding: + +```bash +kubectl port-forward -n velero deployment/velero 8085:8085 +``` + +## Manual Backup Operations + +### Create a Manual Backup + +**Full cluster backup:** + +```bash +velero backup create --wait +``` + +**Backup specific namespaces:** + +```bash +velero backup create --include-namespaces , --wait +``` + +**Backup with specific resources:** + +```bash +velero backup create \ + --include-resources deployments,services,configmaps,secrets \ + --wait +``` + +### List Backups + +```bash +velero backup get +``` + +### Describe Backup + +```bash +velero backup describe +``` + +### Delete Backup + +```bash +velero backup delete +``` + +### Restore from Backup + +**Full restore:** + +```bash +velero restore create --from-backup --wait +``` + +**Restore specific namespaces:** + +```bash +velero restore create --from-backup \ + --include-namespaces \ + --wait +``` + +**Restore specific resources:** + +```bash +velero restore create --from-backup \ + --include-resources deployments,services \ + --wait +``` + +**Restore with namespace mapping:** + +```bash +velero restore create --from-backup \ + --namespace-mappings old-namespace:new-namespace \ + --wait +``` + +## Backup Scheduling + +### Create Custom Schedule + +```yaml +apiVersion: velero.io/v1 +kind: Schedule +metadata: + name: weekly-full-backup + namespace: velero +spec: + schedule: '0 2 * * 0' # Every Sunday at 2:00 AM + template: + ttl: '720h0m0s' # Retain for 30 days + includedNamespaces: + - production +``` + +### Pause/Resume Schedule + +```bash +# Pause +kubectl patch schedule daily-cluster-backup -n velero --type merge -p '{"spec":{"paused":true}}' + +# Resume +kubectl patch schedule daily-cluster-backup -n velero --type merge -p '{"spec":{"paused":false}}' +``` + +## Restore Operations + +### Check Restore Status + +```bash +velero restore get +velero restore describe +``` + +### Restore Logs + +```bash +velero restore logs +``` + +### Delete Restore + +```bash +velero restore delete +``` + +## Troubleshooting + +### Velero Pod Not Starting + +1. **Check pod status:** + + ```bash + kubectl get pods -n velero + kubectl describe pod -n velero + ``` + +2. **Check logs:** + + ```bash + kubectl logs -n velero deployment/velero + ``` + +3. **Verify secret:** + ```bash + kubectl get secret velero-r2-secret -n velero + ``` + +### Backup Failures + +1. **Check backup status:** + + ```bash + velero backup describe + ``` + +2. **Check Velero logs:** + + ```bash + kubectl logs -n velero deployment/velero | grep + ``` + +3. **Verify R2 connectivity:** + - Check R2 credentials + - Verify bucket exists and is accessible + - Check network connectivity + +### CSI Snapshot Issues + +1. **Check VolumeSnapshotClass:** + + ```bash + kubectl get volumesnapshotclass + ``` + +2. **Verify CSI driver:** + + ```bash + kubectl get csidriver + ``` + +3. **Check Longhorn CSI:** + ```bash + kubectl get pods -n longhorn-system | grep csi + ``` + +### Restore Failures + +1. **Check restore status:** + + ```bash + velero restore describe + ``` + +2. **Review restore logs:** + + ```bash + velero restore logs + ``` + +3. **Verify target namespace exists:** + ```bash + kubectl get namespaces + ``` + +## Best Practices + +1. **Regular Testing**: Test restores regularly to ensure backups are valid +2. **Retention Policy**: Balance retention with storage costs +3. **Namespace Exclusion**: Exclude infrastructure namespaces from backups +4. **Backup Verification**: Monitor backup completion and success rates +5. **Documentation**: Document important backups (e.g., before major upgrades) +6. **Multiple Schedules**: Use different schedules for different environments +7. **Resource Filtering**: Only backup what you need to reduce backup size + +## References + +- **Velero documentation**: https://velero.io/docs/ +- **Velero GitHub**: https://github.com/vmware-tanzu/velero +- **Velero Helm chart**: https://github.com/vmware-tanzu/helm-charts diff --git a/docusaurus/docs/kubernetes/k3s-backup.md b/docusaurus/docs/kubernetes/k3s-backup.md index f8ebc3d..d776470 100644 --- a/docusaurus/docs/kubernetes/k3s-backup.md +++ b/docusaurus/docs/kubernetes/k3s-backup.md @@ -1,253 +1,131 @@ --- -title: K3S Backup +title: Production Backup Strategy Overview --- -### **Part 1: Prerequisites , Cloudflare R2 Setup** +## Overview -Before we touch the cluster, let's prepare our backup destination. +This production K3s cluster uses a comprehensive four-layer backup strategy to +ensure data protection at different levels: -1. **Create an R2 Bucket:** +1. **[K3s etcd Snapshots](./k3s-backup-etcd)** - Control plane database backups +2. **[Longhorn Volume Backups](./k3s-backup-longhorn)** - Persistent volume + backups +3. **[Velero Cluster Backups](./k3s-backup-velero)** - Application-aware cluster + backups +4. **[CloudNative PG Backups](./k3s-backup-cloudnative-pg)** - PostgreSQL + database-consistent backups with point-in-time recovery - - In your Cloudflare dashboard, go to **R2** and click **Create bucket**. - - Give it a unique name (e.g., `k3s-backup-repository`). Note this name. - - Note your **S3 Endpoint URL** from the bucket's main page. It looks like: `https://.r2.cloudflarestorage.com`. +All backups are stored in Cloudflare R2, providing off-site redundancy and +disaster recovery capabilities. -2. **Create R2 API Credentials:** +## Backup Schedule Summary - - On the main R2 page, click **Manage R2 API Tokens**. - - Click **Create API Token**. - - Give it a name (e.g., `k3s-backup-token`) and grant it **Object Read & Write** permissions. - - Click **Create API Token** and securely copy the **Access Key ID** and the **Secret Access Key**. +| Layer | Schedule | Retention | Destination | +| ---------------- | ---------------------- | --------- | ------------- | +| K3s etcd | Daily at 1:00 AM | 5 days | Cloudflare R2 | +| Longhorn Volumes | Daily at 2:00 AM | 7 days | Cloudflare R2 | +| Velero Cluster | Daily at 3:00 AM | 14 days | Cloudflare R2 | +| CloudNative PG | Every 6 hours (4x/day) | 30 days | Cloudflare R2 | -You now have four critical pieces of information: +## Prerequisites: Cloudflare R2 Setup -- Bucket Name -- S3 Endpoint URL -- Access Key ID -- Secret Access Key +Before configuring backups, you need a Cloudflare R2 bucket and API credentials: -### **Part 2: The Foundation , K3s Installation** +1. **Create an R2 Bucket:** + - In your Cloudflare dashboard, go to **R2** and click **Create bucket** + - Give it a unique name (e.g., `k3s-backup-repository`) + - Note your **S3 Endpoint URL** from the bucket's main page: + `https://.r2.cloudflarestorage.com` -Install K3s on your server node. Using the default installation script is straightforward. +2. **Create R2 API Credentials:** + - On the main R2 page, click **Manage R2 API Tokens** + - Click **Create API Token** + - Give it a name (e.g., `k3s-backup-token`) and grant it **Object Read & + Write** permissions + - Securely copy the **Access Key ID** and **Secret Access Key** -```bash -curl -sfL https://get.k3s.io | sh - -# Wait a moment for it to start -sudo k3s kubectl get nodes -``` +You'll need these credentials for all three backup layers. -### **Part 3: The Storage Layer , Longhorn Setup** - -We will install Longhorn using Helm, the standard package manager for Kubernetes. - -1. **Add the Longhorn Helm Repository:** +## Why Four Layers? - ```bash - helm repo add longhorn https://charts.longhorn.io - helm repo update - ``` - -2. **Install Longhorn:** - - ```bash - helm install longhorn longhorn/longhorn \ - --namespace longhorn-system \ - --create-namespace \ - --set persistence.defaultClass=true - ``` - - - `persistence.defaultClass=true`: This is crucial. It makes Longhorn the default storage provider for any `PersistentVolumeClaim` (PVC). - -3. **Verify the Installation:** - - ```bash - kubectl get pods -n longhorn-system --watch - # Wait until all pods are Running. This can take several minutes. - ``` - -4. **Configure Longhorn's Native Backup (Secondary Protection):** - - - Access the Longhorn UI. You can do this via port-forwarding: - ```bash - kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 - ``` - Now open `http://localhost:8080` in your browser. - - Navigate to **Settings \> Backup**. - - Set the **Backup Target** to your R2 endpoint and bucket: `s3://@/` (for R2, region can be `auto`). For example: `s3://k3s-backup-repository@auto/longhorn` - - Create a Kubernetes secret containing your R2 credentials: - ```bash - kubectl create secret generic r2-longhorn-secret -n longhorn-system \ - --from-literal=AWS_ACCESS_KEY_ID='YOUR_R2_ACCESS_KEY_ID' \ - --from-literal=AWS_SECRET_ACCESS_KEY='YOUR_R2_SECRET_ACCESS_KEY' - ``` - - Set the **Backup Target Credential Secret** in the Longhorn UI to `r2-longhorn-secret`. - - Click **Save**. - -### **Part 4: The Primary Backup Layer , Velero Setup** - -This is the core of our application recovery strategy. - -1. **Create a Credentials File for Velero:** - Create a file named `credentials-velero`: - - ```ini - [default] - aws_access_key_id = YOUR_R2_ACCESS_KEY_ID - aws_secret_access_key = YOUR_R2_SECRET_ACCESS_KEY - ``` - -2. **Install Velero with Helm:** - This command will install Velero and configure it to use R2 as the backup destination and enable the crucial CSI plugin for Longhorn snapshots. - - ```bash - helm repo add vmware-tanzu https://vmware-tanzu.github.io/helm-charts - helm repo update - - helm install velero vmware-tanzu/velero \ - --namespace velero \ - --create-namespace \ - --set-file credentials.secretContents.cloud=credentials-velero \ - --set configuration.provider=aws \ - --set configuration.backupStorageLocation.name=default \ - --set configuration.backupStorageLocation.bucket= \ - --set configuration.backupStorageLocation.config.region=auto \ - --set configuration.backupStorageLocation.config.s3Url= \ - --set-string snapshotsEnabled=true \ - --set-string deployRestic=false \ - --set initContainers[0].name=velero-plugin-for-aws \ - --set initContainers[0].image=velero/velero-plugin-for-aws:v1.10.0 \ - --set initContainers[0].volumeMounts[0].mountPath=/target \ - --set initContainers[0].volumeMounts[0].name=plugins \ - --set initContainers[1].name=velero-plugin-for-csi \ - --set initContainers[1].image=velero/velero-plugin-for-csi:v0.6.2 \ - --set initContainers[1].volumeMounts[0].mountPath=/target \ - --set initContainers[1].volumeMounts[0].name=plugins - ``` - -3. **Verify the Velero Installation:** - - ```bash - kubectl get pods -n velero --watch - # Wait for the velero pod to be Running. - ``` - - You have now installed Velero and given it access to your R2 bucket. - -### **Part 5: The Test , Break and Rebuild** - -Now for the fun part. Let's prove the system works. - -**Step 1: Deploy a Stateful Application** - -Create a file `my-app.yaml`: - -```yaml -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: my-app-pvc -spec: - accessModes: - - ReadWriteOnce - storageClassName: longhorn - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 -kind: Pod -metadata: - name: my-app-pod -spec: - containers: - - name: my-app - image: busybox - command: ["/bin/sh", "-c"] - args: - - while true; do - echo "$(date)" >> /data/test.log; - sleep 5; - done - volumeMounts: - - name: data - mountPath: /data - volumes: - - name: data - persistentVolumeClaim: - claimName: my-app-pvc -``` +Each backup layer serves a specific purpose: -Deploy it: +- **etcd Snapshots**: Protect the Kubernetes control plane state (API objects, + cluster configuration) +- **Longhorn Backups**: Protect persistent volume data independently of cluster + state +- **Velero Backups**: Provide application-aware backups that capture both + resources and volumes together +- **CloudNative PG Backups**: Provide PostgreSQL-consistent backups with + point-in-time recovery capabilities -```bash -kubectl apply -f my-app.yaml -``` +This multi-layer approach ensures you can recover from different types of +failures: -**Step 2: Create a Backup with Velero** +- Control plane corruption → Restore from etcd snapshot +- Volume data loss → Restore from Longhorn backup +- Complete cluster failure → Restore from Velero backup +- Database corruption/PITR → Restore from CloudNative PG backup -```bash -velero backup create my-first-backup --include-namespaces default -``` +## Quick Links -This command tells Velero to back up all resources in the `default` namespace. Because you enabled the CSI plugin, Velero automatically finds the PVC and triggers Longhorn to create a volume snapshot, which is then backed up alongside the Pod and PVC definitions. +- **[Setup etcd Snapshots](./k3s-backup-etcd)** - Configure control plane + backups +- **[Setup Longhorn Backups](./k3s-backup-longhorn)** - Configure volume backups +- **[Setup Velero Backups](./k3s-backup-velero)** - Configure cluster backups +- **[Setup CloudNative PG Backups](./k3s-backup-cloudnative-pg)** - Configure + PostgreSQL backups +- **[Disaster Recovery](./k3s-backup-disaster-recovery)** - Recovery procedures -**Step 3: The Disaster , Destroy the Cluster** +## Monitoring and Maintenance -Let's simulate a total cluster failure. We will completely remove K3s. +### Check Backup Status -```bash -# First, delete the application to simulate data loss -kubectl delete -f my-app.yaml +**etcd Snapshots:** -# Now, obliterate the cluster -/usr/local/bin/k3s-uninstall.sh +```bash +sudo k3s etcd-snapshot list ``` -Your cluster is now gone. All that remains is your R2 bucket. +**Longhorn Backups:** -**Step 4: The Recovery , Rebuild and Restore** - -1. **Re-install a Clean K3s Cluster:** - - ```bash - curl -sfL https://get.k3s.io | sh - - sudo k3s kubectl get nodes - ``` - -2. **Re-install Longhorn:** You must have the storage provider available before you can restore data to it. - - ```bash - helm repo add longhorn https://charts.longhorn.io - helm repo update - helm install longhorn longhorn/longhorn --namespace longhorn-system --create-namespace --set persistence.defaultClass=true - # Wait for Longhorn pods to be running - kubectl get pods -n longhorn-system --watch - ``` +```bash +kubectl get recurringjobs -n longhorn-system +kubectl get jobs -n longhorn-system +``` -3. **Re-install Velero with the EXACT same configuration:** Run the same Helm install command from Part 4 again. This is critical, as it reconnects Velero to your R2 bucket where the backups live. +**Velero Backups:** -4. **Verify Velero Sees Your Backup:** +```bash +kubectl get schedules -n velero +velero backup get +``` - ```bash - # It may take a minute for Velero to sync. - velero backup get - # You should see 'my-first-backup' in the list! - ``` +**CloudNative PG Backups:** -5. **Restore Everything:** +```bash +kubectl get backups -n +kubectl get cronjobs -n +``` - ```bash - velero restore create --from-backup my-first-backup - ``` +### Backup Health Checks -6. **Verify the Restore:** +Regularly verify that backups are completing successfully: - ```bash - kubectl get pods --watch - # You will see 'my-app-pod' get created. +1. **Check R2 bucket** for recent backup files +2. **Review Velero backup logs:** + ```bash + kubectl logs -n velero deployment/velero + ``` +3. **Check Longhorn backup jobs:** + ```bash + kubectl get jobs -n longhorn-system -l app=longhorn-manager + ``` - # Check the data that was restored - kubectl exec my-app-pod -- cat /data/test.log - ``` +## References -You will see the log file with the timestamps from before you destroyed the cluster. You have successfully recovered your application and its persistent state from nothing but a backup file in Cloudflare R2. +- **[etcd Snapshots](./k3s-backup-etcd)** - Control plane backup documentation +- **[Longhorn Backups](./k3s-backup-longhorn)** - Volume backup documentation +- **[Velero Backups](./k3s-backup-velero)** - Cluster backup documentation +- **[CloudNative PG Backups](./k3s-backup-cloudnative-pg)** - PostgreSQL backup + documentation diff --git a/docusaurus/docs/kubernetes/k3s-maintenance-health.md b/docusaurus/docs/kubernetes/k3s-maintenance-health.md new file mode 100644 index 0000000..5459fa4 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-maintenance-health.md @@ -0,0 +1,324 @@ +--- +title: Health Checks and Monitoring +--- + +## Overview + +Regular health checks help you identify issues before they become critical. This +guide covers how to monitor your K3s cluster's components, nodes, and workloads +to ensure everything is running smoothly. + +## Cluster-Level Health Checks + +### Check All Nodes Status + +The most basic health check is verifying all nodes are in a `Ready` state: + +```bash +kubectl get nodes +``` + +**Expected Output:** + +```bash +NAME STATUS ROLES AGE VERSION +k3s-server-1.cluster Ready control-plane,master 30d v1.28.5+k3s1 +k3s-server-2.cluster Ready control-plane,master 30d v1.28.5+k3s1 +k3s-server-3.cluster Ready control-plane,master 30d v1.28.5+k3s1 +``` + +**What to Look For:** + +- All nodes should show `STATUS: Ready` +- No nodes should be in `NotReady` or `Unknown` state +- Version should be consistent across nodes + +### Detailed Node Information + +Get detailed information about a specific node: + +```bash +kubectl describe node +``` + +This shows: + +- Node conditions (Ready, MemoryPressure, DiskPressure, PIDPressure) +- Resource capacity and allocation +- System information +- Recent events + +**Check Node Conditions:** + +```bash +kubectl get nodes -o custom-columns=NAME:.metadata.name,STATUS:.status.conditions[-1].type,REASON:.status.conditions[-1].reason +``` + +### Check Cluster Components + +Verify all system components are running: + +```bash +kubectl get pods -n kube-system +``` + +**Key Components to Verify:** + +- `coredns-*` - DNS service +- `local-path-provisioner-*` - Storage provisioner (if using) +- `traefik-*` - Ingress controller (if enabled) +- `svclb-*` - Service load balancer (if enabled) + +For HA clusters, also check: + +- `etcd-*` - etcd pods (should match number of control plane nodes) + +### Check All Namespaces + +Get an overview of pods across all namespaces: + +```bash +kubectl get pods -A +``` + +Look for pods in `Error`, `CrashLoopBackOff`, or `Pending` states. + +## Component-Specific Health Checks + +### etcd Health (HA Clusters) + +For HA clusters with embedded etcd, check etcd health: + +```bash +kubectl get pods -n kube-system | grep etcd +``` + +All etcd pods should be `Running`. Check etcd logs if issues: + +```bash +kubectl logs -n kube-system etcd- +``` + +### DNS Health + +Test DNS resolution: + +```bash +# Create a test pod +kubectl run -it --rm --restart=Never test-dns --image=busybox -- nslookup kubernetes.default + +# Should resolve to the Kubernetes service IP +``` + +### Storage Health + +If using Longhorn, check storage system: + +```bash +kubectl get pods -n longhorn-system +kubectl get volumes -n longhorn-system +``` + +## Resource Monitoring + +### Node Resource Usage + +Check CPU and memory usage across nodes: + +```bash +kubectl top nodes +``` + +**Expected Output:** + +```bash +NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% +k3s-server-1.cluster 500m 12% 2Gi 25% +k3s-server-2.cluster 450m 11% 1.8Gi 22% +k3s-server-3.cluster 520m 13% 2.1Gi 26% +``` + +### Pod Resource Usage + +Check resource usage by pod: + +```bash +kubectl top pods -A +``` + +### Disk Usage + +Check disk usage on nodes (SSH into each node): + +```bash +df -h +``` + +Pay attention to: + +- `/var/lib/rancher/k3s` - K3s data directory +- Root filesystem usage +- Any mount points for persistent storage + +## Application Health Checks + +### Check Service Endpoints + +Verify services have healthy endpoints: + +```bash +kubectl get endpoints -A +``` + +Each service should have at least one endpoint (unless intentionally scaled to +zero). + +### Check Ingress Status + +If using Traefik or another ingress controller: + +```bash +kubectl get ingress -A +``` + +Verify ingress resources are properly configured and receiving traffic. + +### Application Pod Health + +Check specific application pods: + +```bash +kubectl get pods -n +kubectl describe pod -n +``` + +Look for: + +- Pod status (Running, Pending, Error, CrashLoopBackOff) +- Restart counts (should be low) +- Resource limits and requests + +## Log Monitoring + +### View Recent Events + +Check for recent cluster events: + +```bash +kubectl get events -A --sort-by='.lastTimestamp' | tail -20 +``` + +### Check Component Logs + +View logs for specific components: + +```bash +# K3s service logs (on the node) +sudo journalctl -u k3s -n 100 + +# Kubernetes component logs +kubectl logs -n kube-system +``` + +### Application Logs + +Check application logs: + +```bash +kubectl logs -n +kubectl logs -n --previous # Previous container instance +``` + +## Automated Health Checks + +### Create a Health Check Script + +You can create a simple script to run regular health checks: + +```bash +#!/bin/bash +# health-check.sh + +echo "=== Node Status ===" +kubectl get nodes + +echo -e "\n=== System Pods ===" +kubectl get pods -n kube-system | grep -v Running + +echo -e "\n=== Resource Usage ===" +kubectl top nodes + +echo -e "\n=== Recent Events ===" +kubectl get events -A --sort-by='.lastTimestamp' | tail -10 +``` + +Make it executable and run periodically: + +```bash +chmod +x health-check.sh +./health-check.sh +``` + +### Set Up Cron Job + +Schedule regular health checks: + +```bash +# Add to crontab +crontab -e + +# Run health check every hour +0 * * * * /path/to/health-check.sh >> /var/log/k3s-health.log 2>&1 +``` + +## Health Check Checklist + +Run these checks regularly (weekly recommended): + +- [ ] All nodes are `Ready` +- [ ] No pods in `Error` or `CrashLoopBackOff` state +- [ ] System components are running +- [ ] Resource usage is within acceptable limits +- [ ] No disk space issues +- [ ] DNS resolution working +- [ ] Services have healthy endpoints +- [ ] No critical events in recent logs +- [ ] etcd health (for HA clusters) +- [ ] Storage system healthy (if using Longhorn) + +## Alerting Thresholds + +Consider setting up alerts for: + +- **Node NotReady** - Immediate attention required +- **Pod CrashLoopBackOff** - Application issue +- **High CPU Usage** (>80%) - May need scaling +- **High Memory Usage** (>85%) - Risk of OOM kills +- **Disk Usage** (>85%) - Risk of node issues +- **Pod Restarts** (>5 in 1 hour) - Application instability + +## Troubleshooting Health Issues + +If health checks reveal issues: + +1. **Node Not Ready:** + - Check node connectivity: `ping ` + - Verify K3s service: `sudo systemctl status k3s` (on the node) + - Review node logs: `sudo journalctl -u k3s -n 100` + +2. **Pod Not Starting:** + - Check pod events: `kubectl describe pod ` + - Review pod logs: `kubectl logs ` + - Verify resource availability: `kubectl describe node` + +3. **High Resource Usage:** + - Identify resource-heavy pods: `kubectl top pods -A` + - Check resource limits: `kubectl describe pod ` + - Consider scaling or resource optimization + +## Related Documentation + +- [K3s Maintenance Overview](./k3s-maintenance) - Maintenance overview +- [Troubleshooting](./k3s-maintenance-troubleshooting) - Detailed + troubleshooting guide +- [Updating K3s](./k3s-maintenance-updates) - Post-update health verification diff --git a/docusaurus/docs/kubernetes/k3s-maintenance-nodes.md b/docusaurus/docs/kubernetes/k3s-maintenance-nodes.md new file mode 100644 index 0000000..2c72b34 --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-maintenance-nodes.md @@ -0,0 +1,408 @@ +--- +title: Node Management +--- + +## Overview + +As your K3s cluster grows or hardware changes, you'll need to add, remove, or +replace nodes. This guide covers how to safely manage your cluster's node +lifecycle. + +## Adding Nodes + +### Adding a Control Plane Node (HA Cluster) + +To add an additional control plane node to an existing HA cluster: + +1. **Prepare the Node:** + - Install the operating system + - Configure network (DNS, static IP if needed) + - Ensure node can reach existing cluster nodes + +2. **Get the Cluster Token:** + + ```bash + # On an existing control plane node + sudo cat /var/lib/rancher/k3s/server/node-token + ``` + +3. **Install K3s on New Node:** + + ```bash + # Replace with your values + curl -sfL https://get.k3s.io | K3S_TOKEN= sh -s - server \ + --server https://:6443 \ + --node-name + ``` + +4. **Verify Node Joined:** + ```bash + kubectl get nodes + ``` + The new node should appear in the list with `Ready` status. + +### Adding a Worker Node (Agent) + +To add a worker node to your cluster: + +1. **Prepare the Node:** + - Install the operating system + - Configure network + - Ensure connectivity to control plane + +2. **Get Required Information:** + + ```bash + # On control plane node + sudo cat /var/lib/rancher/k3s/server/node-token # Agent token + # Note the server URL (usually https://:6443) + ``` + +3. **Install K3s Agent:** + + ```bash + curl -sfL https://get.k3s.io | K3S_URL=https://:6443 K3S_TOKEN= sh - + ``` + +4. **Verify Node Joined:** + ```bash + kubectl get nodes + ``` + +## Removing Nodes + +### Removing a Worker Node + +1. **Drain the Node:** + + ```bash + kubectl drain --ignore-daemonsets --delete-emptydir-data + ``` + + This safely evicts all pods from the node. + +2. **Delete the Node:** + + ```bash + kubectl delete node + ``` + +3. **Stop K3s on the Node:** + + ```bash + # On the node being removed + sudo systemctl stop k3s-agent # For worker nodes + # or + sudo systemctl stop k3s # If it was a server node + ``` + +4. **Uninstall K3s (Optional):** + ```bash + # On the node + /usr/local/bin/k3s-uninstall.sh # For agent + # or + /usr/local/bin/k3s-killall.sh # For server + ``` + +### Removing a Control Plane Node (HA Cluster) + +**Important:** In an HA cluster, ensure you maintain quorum. For a 3-node +cluster, you need at least 2 nodes running. + +1. **Verify Cluster Health:** + + ```bash + kubectl get nodes + kubectl get pods -n kube-system | grep etcd + ``` + + Ensure other etcd nodes are healthy. + +2. **Drain the Node:** + + ```bash + kubectl drain --ignore-daemonsets --delete-emptydir-data + ``` + +3. **Remove from etcd Cluster (if needed):** + - For embedded etcd, the node should be automatically removed + - Monitor etcd health after removal + +4. **Delete the Node:** + + ```bash + kubectl delete node + ``` + +5. **Stop and Uninstall on Node:** + ```bash + # On the node + sudo systemctl stop k3s + /usr/local/bin/k3s-killall.sh + ``` + +## Replacing Nodes + +### Replacing a Failed Node + +When a node fails and needs replacement: + +1. **Remove the Failed Node:** + + ```bash + # If node is still accessible + kubectl drain --ignore-daemonsets --delete-emptydir-data --force + kubectl delete node + ``` + + If node is not accessible: + + ```bash + # Force delete (use with caution) + kubectl delete node --force --grace-period=0 + ``` + +2. **Prepare Replacement Node:** + - Use same hostname if possible (or update DNS) + - Configure with same network settings + +3. **Add Replacement Node:** + - Follow "Adding Nodes" procedure above + - Use same node name if possible + +4. **Verify Replacement:** + ```bash + kubectl get nodes + kubectl get pods -A + ``` + Ensure pods reschedule and cluster is healthy. + +### Replacing Control Plane Node (HA) + +1. **Ensure Quorum:** + - Verify remaining control plane nodes are healthy + - For 3-node cluster, need at least 2 nodes + +2. **Remove Failed Node:** + + ```bash + kubectl delete node + ``` + +3. **Add Replacement:** + - Follow "Adding Control Plane Node" procedure + - Use same node name and configuration + +4. **Verify etcd Health:** + ```bash + kubectl get pods -n kube-system | grep etcd + # Should show expected number of etcd pods + ``` + +## Node Labeling and Tainting + +### Adding Labels + +Labels help organize and select nodes: + +```bash +# Add label to node +kubectl label nodes = + +# Example: Label node by type +kubectl label nodes k3s-worker-1.cluster node-type=worker +kubectl label nodes k3s-server-1.cluster node-type=control-plane +``` + +### Using Node Selectors + +Use node selectors in pod specs to schedule on specific nodes: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: my-pod +spec: + nodeSelector: + node-type: worker + containers: + - name: app + image: nginx +``` + +### Tainting Nodes + +Taints prevent pods from scheduling on nodes (unless they have matching +tolerations): + +```bash +# Add taint +kubectl taint nodes =: + +# Example: Make node dedicated for specific workload +kubectl taint nodes k3s-worker-1.cluster dedicated=app:NoSchedule + +# Remove taint +kubectl taint nodes - +``` + +**Taint Effects:** + +- `NoSchedule`: Pods without toleration won't be scheduled +- `PreferNoSchedule`: Prefer not to schedule, but allow if needed +- `NoExecute`: Evict existing pods without toleration + +### Adding Tolerations + +To allow pods to schedule on tainted nodes: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: my-pod +spec: + tolerations: + - key: 'dedicated' + operator: 'Equal' + value: 'app' + effect: 'NoSchedule' + containers: + - name: app + image: nginx +``` + +## Node Maintenance Mode + +### Cordon/Uncordon + +Temporarily prevent scheduling on a node: + +```bash +# Prevent new pods from scheduling +kubectl cordon + +# Allow scheduling again +kubectl uncordon +``` + +### Drain for Maintenance + +Safely prepare node for maintenance: + +```bash +# Drain node (evicts pods, marks unschedulable) +kubectl drain --ignore-daemonsets --delete-emptydir-data + +# Perform maintenance... + +# Make node schedulable again +kubectl uncordon +``` + +## Node Configuration + +### Viewing Node Configuration + +```bash +# Get node details +kubectl get node -o yaml + +# Get node status +kubectl describe node +``` + +### Updating Node Configuration + +Most node configuration is done via K3s installation parameters. To change: + +1. **Stop K3s:** + + ```bash + sudo systemctl stop k3s + ``` + +2. **Edit Configuration:** + + ```bash + sudo nano /etc/rancher/k3s/config.yaml + ``` + +3. **Restart K3s:** + ```bash + sudo systemctl start k3s + ``` + +## Best Practices + +1. **Always Drain Before Removal:** + - Safely evict pods before removing nodes + - Prevents data loss and service interruption + +2. **Maintain Quorum (HA Clusters):** + - Never remove nodes that would break etcd quorum + - For 3-node cluster, always keep at least 2 nodes + +3. **Use Consistent Naming:** + - Use DNS names instead of IPs + - Maintain consistent hostnames + +4. **Label Nodes Appropriately:** + - Use labels for organization + - Helps with pod scheduling and management + +5. **Monitor After Changes:** + - Watch cluster health after node changes + - Verify pods reschedule correctly + +6. **Backup Before Major Changes:** + - Take etcd snapshots before removing control plane nodes + - Backup persistent volumes if needed + +## Troubleshooting Node Issues + +### Node Won't Join Cluster + +1. **Check Network Connectivity:** + + ```bash + ping + telnet 6443 + ``` + +2. **Verify Token:** + - Ensure token is correct + - Check token hasn't expired + +3. **Check Firewall:** + - Ensure ports 6443, 10250 are open + - Verify node can reach server + +4. **Review Logs:** + ```bash + sudo journalctl -u k3s -n 100 + ``` + +### Node Shows as NotReady + +1. **Check K3s Service:** + + ```bash + sudo systemctl status k3s + ``` + +2. **Verify Network:** + - Check node can reach other nodes + - Verify DNS resolution + +3. **Check Resources:** + - Verify sufficient disk space + - Check memory availability + +## Related Documentation + +- [K3s Maintenance Overview](./k3s-maintenance) - Maintenance overview +- [Updating K3s](./k3s-maintenance-updates) - Node update procedures +- [Health Checks](./k3s-maintenance-health) - Node health verification +- [K3s Setup](./k3s-setup) - Initial cluster setup diff --git a/docusaurus/docs/kubernetes/k3s-maintenance-troubleshooting.md b/docusaurus/docs/kubernetes/k3s-maintenance-troubleshooting.md new file mode 100644 index 0000000..73bc2ac --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-maintenance-troubleshooting.md @@ -0,0 +1,498 @@ +--- +title: Troubleshooting Common Issues +--- + +## Overview + +When things go wrong in your K3s cluster, having a systematic troubleshooting +approach helps you resolve issues quickly. This guide covers common problems and +their solutions. + +## General Troubleshooting Approach + +1. **Gather Information**: Collect logs, events, and status information +2. **Identify the Scope**: Determine if it's a node, pod, service, or + cluster-wide issue +3. **Check Recent Changes**: Review what changed recently (updates, deployments, + config changes) +4. **Isolate the Problem**: Narrow down to specific components +5. **Apply Fixes**: Start with least invasive solutions +6. **Verify Resolution**: Confirm the issue is resolved and monitor + +## Node Issues + +### Node Not Ready + +**Symptoms:** + +- Node shows `NotReady` status in `kubectl get nodes` +- Pods cannot be scheduled on the node + +**Diagnosis:** + +```bash +# Check node status +kubectl get nodes +kubectl describe node + +# Check node conditions +kubectl get node -o yaml | grep -A 10 conditions +``` + +**Common Causes and Solutions:** + +1. **K3s Service Not Running:** + + ```bash + # On the affected node + sudo systemctl status k3s + sudo systemctl start k3s + sudo journalctl -u k3s -n 100 + ``` + +2. **Network Connectivity Issues:** + + ```bash + # Test connectivity from other nodes + ping + # Check DNS resolution + nslookup + ``` + +3. **Disk Space Issues:** + + ```bash + # On the affected node + df -h + # Check K3s data directory + du -sh /var/lib/rancher/k3s/* + ``` + +4. **Certificate Issues:** + ```bash + # Check certificate expiration + sudo k3s certificate rotate-ca + ``` + +### Node Resource Exhaustion + +**Symptoms:** + +- Pods stuck in `Pending` state +- Node shows `MemoryPressure` or `DiskPressure` + +**Diagnosis:** + +```bash +# Check node resources +kubectl describe node +kubectl top node + +# Check resource usage by pod +kubectl top pods -A --sort-by=memory +``` + +**Solutions:** + +1. **Free Up Resources:** + + ```bash + # Identify resource-heavy pods + kubectl top pods -A + # Delete unnecessary pods or scale down deployments + ``` + +2. **Add Resource Limits:** + + ```yaml + # In your pod/deployment spec + resources: + requests: + memory: '64Mi' + cpu: '250m' + limits: + memory: '128Mi' + cpu: '500m' + ``` + +3. **Add More Nodes:** + - Scale your cluster by adding worker nodes + +## Pod Issues + +### Pod Stuck in Pending + +**Symptoms:** + +- Pod shows `Pending` status +- Pod never starts + +**Diagnosis:** + +```bash +# Check pod events +kubectl describe pod -n + +# Check for resource constraints +kubectl get nodes +kubectl top nodes +``` + +**Common Causes:** + +1. **Insufficient Resources:** + - No nodes have available CPU/memory + - Solution: Free up resources or add nodes + +2. **Node Selector/Affinity Issues:** + + ```bash + # Check pod spec for node selectors + kubectl get pod -n -o yaml | grep -A 5 nodeSelector + # Verify nodes match selector + kubectl get nodes --show-labels + ``` + +3. **PVC Not Bound:** + ```bash + # Check PVC status + kubectl get pvc -n + # Check storage class + kubectl get storageclass + ``` + +### Pod CrashLoopBackOff + +**Symptoms:** + +- Pod repeatedly crashes and restarts +- Pod shows `CrashLoopBackOff` status + +**Diagnosis:** + +```bash +# Check pod logs +kubectl logs -n +kubectl logs -n --previous + +# Check pod events +kubectl describe pod -n + +# Check container exit codes +kubectl get pod -n -o jsonpath='{.status.containerStatuses[*].lastState.terminated.exitCode}' +``` + +**Common Causes:** + +1. **Application Errors:** + - Check application logs for errors + - Verify configuration files + - Check environment variables + +2. **Resource Limits:** + + ```bash + # Check if OOM killed + kubectl describe pod -n | grep -i oom + # Increase memory limits if needed + ``` + +3. **Missing Dependencies:** + - Verify required services are available + - Check service endpoints: `kubectl get endpoints -n ` + +4. **Configuration Issues:** + - Verify ConfigMaps and Secrets are correct + - Check volume mounts + +### Pod Image Pull Errors + +**Symptoms:** + +- Pod shows `ImagePullBackOff` or `ErrImagePull` +- Container cannot start + +**Diagnosis:** + +```bash +# Check pod events +kubectl describe pod -n + +# Verify image exists and is accessible +docker pull +``` + +**Solutions:** + +1. **Private Registry Authentication:** + + ```bash + # Create image pull secret + kubectl create secret docker-registry \ + --docker-server= \ + --docker-username= \ + --docker-password= \ + -n + + # Add to pod spec + imagePullSecrets: + - name: + ``` + +2. **Network Issues:** + - Check cluster can reach registry + - Verify DNS resolution for registry + +## Network Issues + +### Service Not Accessible + +**Symptoms:** + +- Cannot access service from within or outside cluster +- Service endpoints are empty + +**Diagnosis:** + +```bash +# Check service +kubectl get svc -n +kubectl describe svc -n + +# Check endpoints +kubectl get endpoints -n + +# Check pods +kubectl get pods -n -l +``` + +**Solutions:** + +1. **No Endpoints:** + - Verify pod labels match service selector + - Check pods are running and ready + +2. **Port Mismatch:** + - Verify service port matches pod container port + - Check targetPort in service spec + +3. **Network Policies:** + ```bash + # Check for network policies blocking traffic + kubectl get networkpolicies -A + ``` + +### DNS Resolution Issues + +**Symptoms:** + +- Cannot resolve service names +- DNS queries fail + +**Diagnosis:** + +```bash +# Check CoreDNS pods +kubectl get pods -n kube-system | grep coredns + +# Test DNS from pod +kubectl run -it --rm --restart=Never test-dns --image=busybox -- nslookup kubernetes.default + +# Check CoreDNS logs +kubectl logs -n kube-system -l k8s-app=kube-dns +``` + +**Solutions:** + +1. **CoreDNS Not Running:** + + ```bash + # Restart CoreDNS + kubectl delete pod -n kube-system -l k8s-app=kube-dns + ``` + +2. **DNS Configuration:** + - Check CoreDNS ConfigMap: + `kubectl get configmap coredns -n kube-system -o yaml` + - Verify upstream DNS servers + +## Storage Issues + +### PVC Not Binding + +**Symptoms:** + +- PVC shows `Pending` status +- Pods cannot start due to missing volumes + +**Diagnosis:** + +```bash +# Check PVC status +kubectl get pvc -n +kubectl describe pvc -n + +# Check storage class +kubectl get storageclass +kubectl describe storageclass +``` + +**Solutions:** + +1. **Storage Class Issues:** + - Verify storage class exists and is default + - Check provisioner is running (e.g., Longhorn) + +2. **Insufficient Storage:** + - Check available storage in storage system + - For Longhorn: `kubectl get volumes -n longhorn-system` + +3. **Access Mode Mismatch:** + - Verify PVC access mode matches storage class capabilities + +### Volume Mount Errors + +**Symptoms:** + +- Pod cannot mount volume +- Permission denied errors + +**Diagnosis:** + +```bash +# Check pod events +kubectl describe pod -n + +# Verify volume exists +kubectl get pv +kubectl get pvc -n +``` + +**Solutions:** + +1. **Volume Not Found:** + - Verify PVC exists and is bound + - Check volume name in pod spec + +2. **Permission Issues:** + - Check security context in pod spec + - Verify volume supports required access mode + +## Certificate Issues + +### Certificate Expiration + +**Symptoms:** + +- Authentication failures +- TLS handshake errors + +**Diagnosis:** + +```bash +# Check certificate expiration (on node) +sudo k3s certificate rotate-ca --check + +# Check API server certificate +openssl x509 -in /var/lib/rancher/k3s/server/tls/server.crt -noout -dates +``` + +**Solutions:** + +1. **Rotate Certificates:** + + ```bash + # On each node + sudo k3s certificate rotate-ca + ``` + +2. **Manual Certificate Renewal:** + - Follow K3s certificate renewal documentation + - May require cluster restart in some cases + +## etcd Issues (HA Clusters) + +### etcd Pod Not Running + +**Symptoms:** + +- etcd pod in `Error` or `CrashLoopBackOff` +- Cluster connectivity issues + +**Diagnosis:** + +```bash +# Check etcd pods +kubectl get pods -n kube-system | grep etcd + +# Check etcd logs +kubectl logs -n kube-system etcd- +``` + +**Solutions:** + +1. **etcd Data Corruption:** + - Restore from etcd snapshot + - See [Backup and Disaster Recovery](./k3s-backup-disaster-recovery) + +2. **Quorum Loss:** + - Ensure majority of etcd nodes are running + - In 3-node cluster, need at least 2 nodes + +## Log Analysis + +### Viewing Logs + +```bash +# Pod logs +kubectl logs -n +kubectl logs -n --previous +kubectl logs -n --tail=100 -f + +# Component logs +kubectl logs -n kube-system + +# K3s service logs (on node) +sudo journalctl -u k3s -n 100 +sudo journalctl -u k3s -f +``` + +### Common Log Patterns + +- **OOM Killed**: `Out of memory` or `OOMKilled` +- **Image Pull**: `Failed to pull image` or `ImagePullBackOff` +- **Crash**: `container exited with code` or `CrashLoopBackOff` +- **Network**: `connection refused` or `timeout` + +## Getting Help + +If you cannot resolve an issue: + +1. **Collect Information:** + + ```bash + # Cluster info + kubectl cluster-info dump > cluster-info.txt + + # Node info + kubectl get nodes -o yaml > nodes.yaml + + # Recent events + kubectl get events -A > events.txt + ``` + +2. **Check Documentation:** + - [K3s Documentation](https://docs.k3s.io/) + - [Kubernetes Troubleshooting](https://kubernetes.io/docs/tasks/debug/) + +3. **Community Resources:** + - K3s GitHub Issues + - Kubernetes Slack/Discord + +## Related Documentation + +- [K3s Maintenance Overview](./k3s-maintenance) - Maintenance overview +- [Health Checks](./k3s-maintenance-health) - Proactive health monitoring +- [Updating K3s](./k3s-maintenance-updates) - Update-related issues +- [Backup and Disaster Recovery](./k3s-backup-disaster-recovery) - Recovery + procedures diff --git a/docusaurus/docs/kubernetes/k3s-maintenance-updates.md b/docusaurus/docs/kubernetes/k3s-maintenance-updates.md new file mode 100644 index 0000000..7be14aa --- /dev/null +++ b/docusaurus/docs/kubernetes/k3s-maintenance-updates.md @@ -0,0 +1,256 @@ +--- +title: Updating K3s +--- + +## Overview + +Updating K3s involves safely taking each node offline (one at a time), +performing the update, then bringing the node back into the cluster. This +process ensures your workloads remain available during the update. + +## Pre-Update Checklist + +Before starting any update, complete these steps: + +1. **Backup Your Cluster**: Ensure you have recent backups of: + - etcd snapshots (for control plane nodes) + - Persistent volumes (via Longhorn or Velero) + - Important configuration files + +2. **Check Current Version**: Verify your current K3s version: + + ```bash + k3s --version + ``` + +3. **Review Release Notes**: Check the + [K3s release notes](https://github.com/k3s-io/k3s/releases) for breaking + changes or important updates. + +4. **Plan Update Order**: For multi-node clusters: + - Update worker nodes first (if you have any) + - Then update control plane nodes one at a time + - Always maintain quorum in HA setups (e.g., 2 out of 3 nodes available) + +## Update Process + +### Step 1: Drain the Node + +When performing maintenance (such as updating K3s), it's important to +**"drain"** the node to protect your workloads and avoid interruptions. + +#### What Does "Draining" a Node Mean? + +- **Draining** safely evicts all non-essential pods from the node, allowing + Kubernetes to reschedule them on other nodes. +- It also makes the node "unschedulable," ensuring no new pods can be assigned + to the node while it's offline. + +#### What Does "Evicting" a Pod Mean? + +In Kubernetes, "evicting" refers to the process of safely terminating Pods on a +node, typically to free up resources or for maintenance, allowing them to be +rescheduled on other nodes. + +#### How to Drain a Node + +To drain a node, run the following command replacing `` with the name +of the node you want to update: + +```bash +kubectl drain --ignore-daemonsets --delete-emptydir-data +``` + +**Explanation of Command Options:** + +- `--ignore-daemonsets`: Prevents Kubernetes from evicting system-critical pods + managed by DaemonSets (these won't be touched). +- `--delete-emptydir-data`: Deletes any storage associated with `EmptyDir` + volumes (used for temporary data in pods). + +**Example:** + +```bash +kubectl drain k3s-server-1.cluster --ignore-daemonsets --delete-emptydir-data +``` + +Wait for the drain to complete. You should see output indicating that pods have +been evicted and the node is now unschedulable. + +### Step 2: Stop the K3s Service + +To update K3s, we first need to stop the running K3s service on the node: + +```bash +sudo systemctl stop k3s +``` + +This command stops K3s gracefully, which ensures everything halts correctly and +there's no risk of corruption during the update. + +### Step 3: Update K3s + +Now, let's update K3s to its newest version. You can use the official K3s +installation script to do this in a streamlined way. Running the script below +will automatically detect the current installation and update it to the latest +available version: + +```bash +curl -sfL https://get.k3s.io | sh - +``` + +**To Update to a Specific Version:** + +If you need to update to a specific version (recommended for production), you +can specify the version: + +```bash +curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION=v1.28.5+k3s1 sh - +``` + +Replace `v1.28.5+k3s1` with your desired version. Check +[K3s releases](https://github.com/k3s-io/k3s/releases) for available versions. + +The script will download, install, and configure the new version of K3s while +keeping all your configurations in place. + +### Step 4: Start the K3s Service + +Once the update finishes, restart the K3s service on the node to bring it back +online: + +```bash +sudo systemctl start k3s +``` + +This will load the new K3s version and all services will resume. + +### Step 5: Verify the Service Started + +Check that K3s started successfully: + +```bash +sudo systemctl status k3s +``` + +You should see `Active: active (running)`. If there are any errors, check the +logs: + +```bash +sudo journalctl -u k3s -f +``` + +### Step 6: Uncordon the Node + +#### What Is "Uncordoning"? + +After an update, we need to make the node available again for scheduling new +pods, i.e., undo the "unschedulable" state created during the drain. + +#### How to Uncordon a Node + +To let Kubernetes know this node is now ready to schedule new pods again: + +```bash +kubectl uncordon +``` + +This command marks the node as "schedulable," meaning new pods can now be +assigned to it. + +**Example:** + +```bash +kubectl uncordon k3s-server-1.cluster +``` + +### Step 7: Verify the Update + +Once the node is back online, verify the K3s version to confirm that the update +was successful: + +```bash +k3s --version +``` + +Check that the output shows the new version installed. + +Also verify the node is ready: + +```bash +kubectl get nodes +``` + +You should see the node status as `Ready`. + +## Post-Update Verification + +After updating all nodes, perform these checks: + +1. **Check All Nodes Are Ready:** + + ```bash + kubectl get nodes + ``` + +2. **Verify Cluster Components:** + + ```bash + kubectl get pods -A + ``` + + Ensure all system pods are running. + +3. **Test Application Functionality:** + - Access your applications + - Verify services are responding + - Check ingress routing + +4. **Monitor for Issues:** + - Watch logs for errors: `kubectl logs -n ` + - Check resource usage: `kubectl top nodes` + - Monitor for 24-48 hours after updates + +## Updating Worker Nodes + +If you have worker nodes (agents), the process is similar but uses the agent +installation script: + +1. Drain the worker node +2. Stop the K3s agent service: `sudo systemctl stop k3s-agent` +3. Update using the agent script: + ```bash + curl -sfL https://get.k3s.io | K3S_URL=https://:6443 K3S_TOKEN= sh - + ``` +4. Start the service: `sudo systemctl start k3s-agent` +5. Uncordon the node + +## Troubleshooting Update Issues + +### Node Won't Start After Update + +1. Check service status: `sudo systemctl status k3s` +2. Review logs: `sudo journalctl -u k3s -n 100` +3. Verify configuration files in `/etc/rancher/k3s/` +4. Check for certificate issues: `kubectl get nodes` should show the node + +### Pods Not Scheduling After Uncordon + +1. Check node conditions: `kubectl describe node ` +2. Verify node has resources: + `kubectl describe node | grep -A 5 "Allocated resources"` +3. Check for taints: `kubectl describe node | grep Taints` + +### Cluster Connectivity Issues + +1. Verify network connectivity between nodes +2. Check firewall rules +3. Verify DNS resolution for node names +4. Review etcd health (for HA clusters) + +## Related Documentation + +- [K3s Maintenance Overview](./k3s-maintenance) - Other maintenance tasks +- [Health Checks](./k3s-maintenance-health) - Post-update health verification +- [Troubleshooting](./k3s-maintenance-troubleshooting) - Resolving update issues +- [Backup Strategy](./k3s-backup) - Pre-update backup procedures diff --git a/docusaurus/docs/kubernetes/k3s-maintenance.md b/docusaurus/docs/kubernetes/k3s-maintenance.md index fc67f25..d1c901b 100644 --- a/docusaurus/docs/kubernetes/k3s-maintenance.md +++ b/docusaurus/docs/kubernetes/k3s-maintenance.md @@ -1,85 +1,100 @@ --- -title: K3S Maintenance +title: K3s Maintenance Overview --- -### Steps for Updating K3S +## Overview -Updating K3S involves safely taking each node offline (one at a time), performing the update, then bringing the node back into the cluster. +Regular maintenance is essential for keeping your K3s cluster healthy, secure, +and up-to-date. This section covers the key maintenance tasks you'll need to +perform throughout your cluster's lifecycle. -Before doing any updates, **backup your data**. Ensure you have backups of your K3S server data and important configuration files. This is especially crucial if something goes wrong during the update and you need to restore to a previous state. +## Maintenance Categories -### Draining the Node -When performing maintenance (such as updating K3S), it’s important to **"drain"** the node to protect your workloads and avoid interruptions. +### [Updating K3s](./k3s-maintenance-updates) -#### What Does "Draining" a Node Mean? -- **Draining** safely evicts all non-essential pods from the node, allowing Kubernetes to reschedule them on other nodes. -- It also makes the node "unschedulable," ensuring no new pods can be assigned to the node while it’s offline. +Keeping your K3s cluster updated is crucial for security patches, bug fixes, and +new features. Learn how to safely update K3s nodes one at a time without +disrupting your workloads. -#### What does "evicting" a Pod Mean? +**Key Topics:** -- In Kubernetes, "evicts" refers to the process of safely terminating Pods on a node, typically to free up resources or for maintenance, allowing them to be rescheduled on other nodes. +- Pre-update backups +- Draining nodes safely +- Updating K3s version +- Verifying updates -#### How to Drain a Node: -To drain a node, run the following command replacing `` with the name of the node you want to update: +### [Health Checks and Monitoring](./k3s-maintenance-health) -```bash -kubectl drain --ignore-daemonsets --delete-emptydir-data -``` +Regular health checks help you identify issues before they become critical. +Monitor your cluster's components, nodes, and workloads to ensure everything is +running smoothly. -**Explanation of Command Options:** -- `--ignore-daemonsets`: Prevents Kubernetes from evicting system-critical pods managed by DaemonSets (these won't be touched). -- `--delete-emptydir-data`: Deletes any storage associated with `EmptyDir` volumes (used for temporary data in pods). +**Key Topics:** -### Stopping the K3S Service +- Cluster health verification +- Node status checks +- Component health monitoring +- Resource usage monitoring -To update K3S, we first need to stop the running K3S service on the Raspberry Pi: +### [Troubleshooting Common Issues](./k3s-maintenance-troubleshooting) -```bash -sudo systemctl stop k3s -``` +When things go wrong, having a systematic troubleshooting approach helps you +resolve issues quickly. Learn how to diagnose and fix common K3s cluster +problems. -This command stops K3S gracefully, which ensures everything halts correctly and there's no risk of corruption during the update. +**Key Topics:** -### Updating K3S +- Pod startup failures +- Network connectivity issues +- Storage problems +- Certificate issues +- Log analysis -Now, let's update K3S to its newest version. You can use the official K3S installation script to do this in a streamlined way. Running the script below will automatically detect the current installation and update it to the latest available version: +### [Node Management](./k3s-maintenance-nodes) -```bash -curl -sfL https://get.k3s.io | sh - -``` +As your cluster grows or hardware changes, you'll need to add, remove, or +replace nodes. Learn how to safely manage your cluster's node lifecycle. -The script will download, install, and configure the latest version of K3S while keeping all your configurations in place. +**Key Topics:** -### Starting the K3S Service Again +- Adding new nodes +- Removing nodes +- Replacing failed nodes +- Node labeling and tainting -Once the update finishes, restart the K3S service on the node to bring it back online: +## Maintenance Best Practices -```bash -sudo systemctl start k3s -``` +1. **Always Backup First**: Before any maintenance operation, ensure you have + recent backups of your etcd data, persistent volumes, and cluster + configuration. -This will load the new K3S version and all services will resume. +2. **One Node at a Time**: In multi-node clusters, perform maintenance on one + node at a time to maintain cluster availability. -### Uncordoning the Node +3. **Schedule During Low Traffic**: Plan maintenance windows during periods of + low application usage when possible. -#### What Is "Uncordoning"? -After an update, we need to make the node available again for scheduling new pods, i.e., undo the "unschedulable" state created during the drain. +4. **Test in Non-Production**: If you have a test environment, validate + maintenance procedures there first. -#### How to Uncordon a Node: -To let Kubernetes know this node is now ready to schedule new pods again: +5. **Document Changes**: Keep track of what maintenance was performed, when, and + any issues encountered. -```bash -kubectl uncordon -``` +6. **Monitor After Changes**: After any maintenance, closely monitor your + cluster for 24-48 hours to ensure stability. -This command marks the node as "schedulable," meaning new pods can now be assigned to it. +## Quick Reference -### Verifying the Update +| Task | Frequency | Documentation | +| ------------------- | -------------------- | ---------------------------------------------------- | +| Update K3s | Monthly or as needed | [Updating K3s](./k3s-maintenance-updates) | +| Health Checks | Weekly | [Health Checks](./k3s-maintenance-health) | +| Review Logs | As needed | [Troubleshooting](./k3s-maintenance-troubleshooting) | +| Node Management | As needed | [Node Management](./k3s-maintenance-nodes) | +| Backup Verification | Weekly | [Backup Strategy](../kubernetes/k3s-backup) | -Once the node is back online, verify the K3S version to confirm that the update was successful: +## Related Documentation -```bash -k3s --version -``` - -Check that the output shows the latest version installed. \ No newline at end of file +- [K3s Backup Strategy](./k3s-backup) - Backup procedures and disaster recovery +- [K3s Setup](./k3s-setup) - Initial cluster installation +- [ArgoCD Setup](./setup-argocd) - GitOps configuration management diff --git a/docusaurus/docs/kubernetes/k3s-setup.md b/docusaurus/docs/kubernetes/k3s-setup.md index d8439f3..9b4b90f 100644 --- a/docusaurus/docs/kubernetes/k3s-setup.md +++ b/docusaurus/docs/kubernetes/k3s-setup.md @@ -10,9 +10,18 @@ Make sure you have: - [x] [Set up your Mini-PCs (if any)](../hardware-raspberry-pi-setup/mini-pcs-setup.md) - [x] [Configured your Network](../networking/mikrotik/network-overview.mdx) -Now, we are going to set up a Kubernetes cluster. You don't need to understand what Kubernetes is at this point, just follow the steps and you'll be able to use it. Once it's set up, you'll be able to deploy your applications and learn more about how it works. - -In this guide, we will set up a [HA (High Availability)](https://en.wikipedia.org/wiki/High-availability_cluster) cluster with 3 master nodes. If you are using different [hardware](../hardware-raspberry-pi-setup/hardware.mdx), you can set up your cluster accordingly. For example, if you are using a single machine (e.g., a single Raspberry Pi, a single Mini-PC, etc.), you can set up a single master node cluster. +Now, we are going to set up a Kubernetes cluster. You don't need to understand +what Kubernetes is at this point, just follow the steps and you'll be able to +use it. Once it's set up, you'll be able to deploy your applications and learn +more about how it works. + +In this guide, we will set up a +[HA (High Availability)](https://en.wikipedia.org/wiki/High-availability_cluster) +cluster with 3 master nodes. If you are using different +[hardware](../hardware-raspberry-pi-setup/hardware.mdx), you can set up your +cluster accordingly. For example, if you are using a single machine (e.g., a +single Raspberry Pi, a single Mini-PC, etc.), you can set up a single master +node cluster. The official K3S documentation also explains both: @@ -21,19 +30,40 @@ The official K3S documentation also explains both: ### Set Up the Master Node(s) -> **Note:** We disable the default installation of [Traefik](https://traefik.io/traefik/) because we will install it manually later using [Helm](https://helm.sh/). We also disable `servicelb` since we will use [MetalLB](https://metallb.io/) as our [load balancer](). Don't worry much about those right now. You will learn more about what they are and how to use them later. +> **Note:** We disable the default installation of +> [Traefik](https://traefik.io/traefik/) because we will install it manually +> later using [Helm](https://helm.sh/). We also disable `servicelb` since we +> will use [MetalLB](https://metallb.io/) as our +> [load balancer](). +> Don't worry much about those right now. You will learn more about what they +> are and how to use them later. **Why Use DNS Names Instead of IPs?** -We use static DNS names (not raw IP addresses) for our nodes, as configured in our [Network Device Configuration](../networking/mikrotik/device-configuration.mdx). +We use static DNS names (not raw IP addresses) for our nodes, as configured in +our +[Network Device Configuration](../networking/mikrotik/device-configuration.mdx). -If we use IP addresses directly when setting up K3S, and those IPs ever change (for example, due to network reconfiguration or moving to a different subnet), our cluster will likely break. This is because K3S (and Kubernetes in general) embeds the node addresses, including in SSL certificates and cluster configuration. Changing the IPs later would require us to tear down and completely recreate the cluster, as the certificates and internal references would no longer match. +If we use IP addresses directly when setting up K3S, and those IPs ever change +(for example, due to network reconfiguration or moving to a different subnet), +our cluster will likely break. This is because K3S (and Kubernetes in general) +embeds the node addresses, including in SSL certificates and cluster +configuration. Changing the IPs later would require us to tear down and +completely recreate the cluster, as the certificates and internal references +would no longer match. -By using DNS names that always resolve to the correct node, we can change the underlying IPs in our network without having to rebuild our Kubernetes cluster. The nodes will continue to find and trust each other as long as the DNS names remain consistent. +By using DNS names that always resolve to the correct node, we can change the +underlying IPs in our network without having to rebuild our Kubernetes cluster. +The nodes will continue to find and trust each other as long as the DNS names +remain consistent. Select one Raspberry Pi to act as the master node, and install K3S: -As you can see, we are using the static DNS names that we've set up in our [Network Device Configuration](../networking/mikrotik/device-configuration.mdx). This is really important to ensure that the nodes can communicate with each other, also that we can have our cluster running even if the subnet changes in the future. +As you can see, we are using the static DNS names that we've set up in our +[Network Device Configuration](../networking/mikrotik/device-configuration.mdx). +This is really important to ensure that the nodes can communicate with each +other, also that we can have our cluster running even if the subnet changes in +the future. ```bash title="k3s-server-1.cluster" curl -sfL https://get.k3s.io | K3S_TOKEN=SECRET_TOKEN_HERE sh -s - server \ @@ -43,7 +73,8 @@ curl -sfL https://get.k3s.io | K3S_TOKEN=SECRET_TOKEN_HERE sh -s - server \ --node-name k3s-server-1.cluster ``` -If you have multiple master nodes (as in my case, with 3), run the following command on each additional master node: +If you have multiple master nodes (as in my case, with 3), run the following +command on each additional master node: ```bash title="k3s-server-2.cluster" curl -sfL https://get.k3s.io | K3S_TOKEN=SECRET_TOKEN_HERE sh -s - server \ @@ -61,7 +92,9 @@ curl -sfL https://get.k3s.io | K3S_TOKEN=SECRET_TOKEN_HERE sh -s - server \ --node-name k3s-server-3.cluster ``` -**Copy and Set Permissions for Kubeconfig:** To avoid permission issues when using `kubectl`, copy the generated kubeconfig to your home directory and update its ownership: +**Copy and Set Permissions for Kubeconfig:** To avoid permission issues when +using `kubectl`, copy the generated kubeconfig to your home directory and update +its ownership: ```bash title="Copy and Set Permissions for Kubeconfig" # Create the .kube directory in your home directory if it doesn't already exist @@ -76,12 +109,14 @@ sudo chown $(id -u):$(id -g) ~/.kube/config > **Troubleshooting Tips:** > -> - If `kubectl get nodes` hangs or fails, check that the K3S service is running: -> `sudo systemctl status k3s` -> - If you see certificate or permission errors, double-check the ownership and permissions of `~/.kube/config`. +> - If `kubectl get nodes` hangs or fails, check that the K3S service is +> running: `sudo systemctl status k3s` +> - If you see certificate or permission errors, double-check the ownership and +> permissions of `~/.kube/config`. > - Make sure your firewall allows traffic on port 6443 between nodes. -**Verify Cluster:** Ensure that `/etc/rancher/k3s/k3s.yaml` was created and the cluster is accessible: +**Verify Cluster:** Ensure that `/etc/rancher/k3s/k3s.yaml` was created and the +cluster is accessible: ```bash title="Verify Cluster" kubectl --kubeconfig ~/.kube/config get nodes @@ -94,26 +129,31 @@ kubectl get componentstatuses kubectl get pods --all-namespaces ``` -**Set KUBECONFIG Environment Variable:** To make it more convenient to run `kubectl` commands without specifying the `--kubeconfig` flag every time, set an environment variable to automatically point to the kubeconfig file: +**Set KUBECONFIG Environment Variable:** To make it more convenient to run +`kubectl` commands without specifying the `--kubeconfig` flag every time, set an +environment variable to automatically point to the kubeconfig file: ```bash title="Set KUBECONFIG Environment Variable" export KUBECONFIG=~/.kube/config ``` -To make this setting permanent across shell sessions, add it to your shell profile: +To make this setting permanent across shell sessions, add it to your shell +profile: ```bash title="Set KUBECONFIG Environment Variable" echo "export KUBECONFIG=~/.kube/config" >> ~/.bashrc source ~/.bashrc ``` -This streamlines your workflow, allowing you to simply run `kubectl get nodes` instead of specifying the kubeconfig path each time. +This streamlines your workflow, allowing you to simply run `kubectl get nodes` +instead of specifying the kubeconfig path each time. ### Set Up Worker Nodes [Ansible Playbook](/ansible/playbooks/join-worker-nodes-and-setup-kube-config.yml) -**Join Tokens:** On the master node, retrieve the join token from `/var/lib/rancher/k3s/server/token`: +**Join Tokens:** On the master node, retrieve the join token from +`/var/lib/rancher/k3s/server/token`: ```bash title="Join Tokens" vi /var/lib/rancher/k3s/server/token @@ -135,7 +175,8 @@ curl -sfL https://get.k3s.io | K3S_TOKEN=SECRET_TOKEN_HERE sh -s - agent \ --node-name k3-worker-lenovo.cluster ``` -**Node Verification:** Check that all worker nodes have joined the cluster. On your master node, run: +**Node Verification:** Check that all worker nodes have joined the cluster. On +your master node, run: ```bash title="Node Verification" kubectl get nodes @@ -145,11 +186,13 @@ kubectl get nodes #### Kubeconfig -After setting up your cluster, it's more convenient to manage it remotely from your local machine. +After setting up your cluster, it's more convenient to manage it remotely from +your local machine. Here's how to do that: -**Create the `.kube` directory on your local machine if it doesn't already exist:** +**Create the `.kube` directory on your local machine if it doesn't already +exist:** ```bash title="Create the .kube directory on your local machine" mkdir -p ~/.kube @@ -161,13 +204,18 @@ mkdir -p ~/.kube scp @:~/.kube/config ~/.kube/config ``` -Replace `` with your username and `` with the IP address of your master node. +Replace `` with your username and `` with the IP address +of your master node. -**Note:** If you encounter a permissions issue while copying, ensure that the `~/.kube/config` on your master node is owned by your user and is accessible. You might have to adjust file permissions or ownership on the master node accordingly. +**Note:** If you encounter a permissions issue while copying, ensure that the +`~/.kube/config` on your master node is owned by your user and is accessible. +You might have to adjust file permissions or ownership on the master node +accordingly. **Update the kubeconfig server details (Optional):** -Open your local `~/.kube/config` and make sure the `server` IP matches your master node's IP. If it's set to `127.0.0.1`, you'll need to update it: +Open your local `~/.kube/config` and make sure the `server` IP matches your +master node's IP. If it's set to `127.0.0.1`, you'll need to update it: ```yaml title="Update the kubeconfig server details" server: https://:6443 @@ -175,4 +223,6 @@ server: https://:6443 Replace `` with the IP address of your master node. -After completing these steps, you should be able to run `kubectl` commands from your local machine to interact with your Kubernetes cluster. This avoids the need to SSH into the master node for cluster management tasks. +After completing these steps, you should be able to run `kubectl` commands from +your local machine to interact with your Kubernetes cluster. This avoids the +need to SSH into the master node for cluster management tasks. diff --git a/docusaurus/docs/kubernetes/kubernetes-80-20-rule.mdx b/docusaurus/docs/kubernetes/kubernetes-80-20-rule.mdx index 30c74c9..9dbab97 100644 --- a/docusaurus/docs/kubernetes/kubernetes-80-20-rule.mdx +++ b/docusaurus/docs/kubernetes/kubernetes-80-20-rule.mdx @@ -2,7 +2,7 @@ title: Kubernetes 80/20 Rule --- -import Alert from "@site/src/components/Alert/index.tsx"; +import Alert from '@site/src/components/Alert/index.tsx' -In our case of Kubernetes, that means that by focusing also upfront to a potential issues that we might face, we can save a -lot of time by being aware of these very issues, and how to best debug them. +In our case of Kubernetes, that means that by focusing also upfront to a +potential issues that we might face, we can save a lot of time by being aware of +these very issues, and how to best debug them. -import KubernetesParetoPrinciple from "@site/src/components/KubernetesParetoPrinciple/index.tsx"; +import KubernetesParetoPrinciple from '@site/src/components/KubernetesParetoPrinciple/index.tsx' diff --git a/docusaurus/docs/kubernetes/kubernetes-yml-structure.md b/docusaurus/docs/kubernetes/kubernetes-yml-structure.md index 1661fdf..f3d3b46 100644 --- a/docusaurus/docs/kubernetes/kubernetes-yml-structure.md +++ b/docusaurus/docs/kubernetes/kubernetes-yml-structure.md @@ -2,13 +2,16 @@ title: Writing YAML files for Kubernetes --- -Writing YAML files for Kubernetes involves understanding the basic structure and key components used to define cluster objects. Here's a simple logical guide to help you write k3s YAML files manually: +Writing YAML files for Kubernetes involves understanding the basic structure and +key components used to define cluster objects. Here's a simple logical guide to +help you write k3s YAML files manually: ### Basic Structure of a Kubernetes YAML File #### API Version (`apiVersion`): -Every YAML file starts with an API version. It's a string that indicates the version of the Kubernetes API you're using for the object. +Every YAML file starts with an API version. It's a string that indicates the +version of the Kubernetes API you're using for the object. Common examples include: @@ -19,7 +22,8 @@ Common examples include: --- **Kind (`kind`)**: -- This represents the type of Kubernetes resource you're defining. + +- This represents the type of Kubernetes resource you're defining. Some common kinds are: @@ -42,21 +46,20 @@ This section includes basic metadata about the object, such as: **Spec (`spec`)**: -This section contains the specifications of the object. +This section contains the specifications of the object. -It varies significantly between different kinds, but here are some general guidelines: +It varies significantly between different kinds, but here are some general +guidelines: -**For Deployments**: - - Define `replicas` to set the desired number of pod copies. - - Use `selector` to match Pods with labels. - - Define a `template` for the Pod specification. +**For Deployments**: - Define `replicas` to set the desired number of pod +copies. - Use `selector` to match Pods with labels. - Define a `template` for +the Pod specification. -**For Services**: - - Define `selector` to route traffic to the right Pods. - - Set `ports` to map incoming traffic to the target Pods. +**For Services**: - Define `selector` to route traffic to the right Pods. - Set +`ports` to map incoming traffic to the target Pods. -**For Ingress**: - - Define rules for routing external HTTP/S traffic to internal services. +**For Ingress**: - Define rules for routing external HTTP/S traffic to internal +services. --- @@ -64,16 +67,21 @@ It varies significantly between different kinds, but here are some general guide **Determine the Object Type**: -Decide whether you need a Deployment, Service, Pod, etc. This dictates the fields you'll need. +Decide whether you need a Deployment, Service, Pod, etc. This dictates the +fields you'll need. **Set the API Version and Kind**: -Reference Kubernetes documentation or k3s-specific resources to know which API version to use and set the appropriate kind. +Reference Kubernetes documentation or k3s-specific resources to know which API +version to use and set the appropriate kind. **Add Metadata**: -Assign a name to your object and optionally a namespace. Proper naming conventions help manage and track resources. +Assign a name to your object and optionally a namespace. Proper naming +conventions help manage and track resources. **Define the Spec**: -Tailor this section based on the object type. Carefully specify details like the number of replicas for Deployments, port mappings for Services, or routing rules for Ingress. +Tailor this section based on the object type. Carefully specify details like the +number of replicas for Deployments, port mappings for Services, or routing rules +for Ingress. diff --git a/docusaurus/docs/kubernetes/setup-argocd.md b/docusaurus/docs/kubernetes/setup-argocd.md new file mode 100644 index 0000000..75e01b6 --- /dev/null +++ b/docusaurus/docs/kubernetes/setup-argocd.md @@ -0,0 +1,534 @@ +--- +title: GitOps with ArgoCD +--- + +## Overview + +ArgoCD is a declarative, GitOps continuous delivery tool for Kubernetes. This +cluster uses ArgoCD to manage all applications through Git-based configuration, +ensuring infrastructure-as-code principles and automated synchronization. + +## Installation + +ArgoCD is installed using Helm with custom values that integrate with Vault for +secret management and Traefik for ingress. + +### Prerequisites + +- k3s cluster running +- cert-manager installed (for TLS certificates) +- Traefik ingress controller (default in k3s) +- HashiCorp Vault configured (for secret management) + +### Install ArgoCD + +```bash +# Add the Argo CD Helm repo +helm repo add argo https://argoproj.github.io/argo-helm +helm repo update + +# Create namespace +kubectl create namespace argocd + +# Install Argo CD with custom values +helm install argocd argo/argo-cd \ + --namespace argocd \ + --values k3s-argocd/values.yaml +``` + +The custom `values.yaml` configures: + +- Vault integration via ArgoCD Vault Plugin (AVP) +- Vault agent sidecar for token management +- Repository server configuration for secret injection + +## Configuration + +### Insecure Mode with TLS Termination + +ArgoCD is configured to run in insecure (HTTP) mode internally, with TLS +termination handled by Traefik at the ingress level. This is a common pattern +that simplifies certificate management. + +**ConfigMap** (`k3s-argocd/argocd-config-map.yaml`): + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: argocd-cmd-params-cm + namespace: argocd +data: + server.insecure: 'true' +``` + +Apply the config: + +```bash +kubectl apply -f k3s-argocd/argocd-config-map.yaml +kubectl rollout restart deployment argocd-server -n argocd +``` + +### TLS Certificate with cert-manager + +ArgoCD uses cert-manager to automatically provision Let's Encrypt certificates +via DNS-01 challenge with Cloudflare. + +**ClusterIssuer** (`k3s-argocd/clusterissuer.yaml`): + +```yaml +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-dns01 +spec: + acme: + email: your-email@example.com + server: https://acme-v02.api.letsencrypt.org/directory + privateKeySecretRef: + name: letsencrypt-dns01-account-key + solvers: + - dns01: + cloudflare: + email: your-email@example.com + apiTokenSecretRef: + name: cloudflare-api-token-secret + key: api-token +``` + +**Certificate** (`k3s-argocd/argocd-certificate.yaml`): + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: argocd-tls + namespace: argocd +spec: + secretName: argocd-server-tls + issuerRef: + name: letsencrypt-dns01 + kind: ClusterIssuer + dnsNames: + - argocd.yourdomain.com +``` + +Replace `argocd.yourdomain.com` with your actual ArgoCD domain. + +**Cloudflare API Token Secret:** + +```bash +kubectl create secret generic cloudflare-api-token-secret \ + --from-literal=api-token= \ + -n cert-manager +``` + +Apply the certificate: + +```bash +kubectl apply -f k3s-argocd/clusterissuer.yaml +kubectl apply -f k3s-argocd/argocd-certificate.yaml +``` + +### Ingress Configuration + +The ingress resource (`k3s-argocd/argocd-ingress.yaml`) exposes ArgoCD through +Traefik with TLS: + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: argocd-server-ingress + namespace: argocd + annotations: + cert-manager.io/cluster-issuer: 'letsencrypt-dns01' + traefik.ingress.kubernetes.io/router.entrypoints: 'websecure' + traefik.ingress.kubernetes.io/router.tls: 'true' +spec: + ingressClassName: traefik + rules: + - host: argocd.yourdomain.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: argocd-server + port: + number: 80 + tls: + - hosts: + - argocd.yourdomain.com + secretName: argocd-server-tls +``` + +Replace `argocd.yourdomain.com` with your actual ArgoCD domain. + +Apply the ingress: + +```bash +kubectl apply -f k3s-argocd/argocd-ingress.yaml +``` + +### Verify Certificate Status + +```bash +kubectl describe certificate argocd-tls -n argocd +``` + +## App-of-Apps Pattern + +This cluster uses the App-of-Apps pattern to manage all applications +declaratively through Git. The root applications manage child applications, +creating a hierarchical structure. + +### Root Applications + +Two root applications manage the entire application landscape: + +1. **Root App** (`k3s-argocd-app-of-apps/root-app.yaml`) - Manages application + deployments +2. **Root Projects App** (`k3s-argocd-app-of-apps/root-projects-app.yaml`) - + Manages ArgoCD projects + +**Root App:** + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: root-app + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/your-org/argocd-app-of-apps.git + targetRevision: main + path: apps + destination: + server: https://kubernetes.default.svc + namespace: argocd + syncPolicy: + automated: + selfHeal: true +``` + +**Root Projects App:** + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: root-projects-app + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/your-org/argocd-app-of-apps.git + targetRevision: main + path: projects + destination: + server: https://kubernetes.default.svc + namespace: argocd + syncPolicy: + automated: + prune: true + selfHeal: true +``` + +Replace `your-org/argocd-app-of-apps` with your actual Git repository. + +### Application Structure + +The root app manages applications defined in your Git repository (from +`k3s-argocd-app-of-apps/apps/`). Examples of common applications you might +manage: + +- **longhorn** - Distributed block storage +- **cloudnative-pg-operator** - PostgreSQL operator +- **redis** - Redis deployment +- **monitoring** - Prometheus/Grafana stack +- **your-app-dev** - Your application development environment +- **your-app-prod** - Your application production environment + +The exact applications depend on your infrastructure needs and what you define +in your Git repository. + +### Projects + +ArgoCD projects (`k3s-argocd-app-of-apps/projects/`) provide isolation and +access control: + +- **cloudnativepg-project** - For CloudNativePG operator and managed clusters +- **redis-project** - For Redis deployments + +Example project: + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: AppProject +metadata: + name: cloudnativepg-project + namespace: argocd +spec: + description: Project for CloudNativePG operator and managed Postgres clusters + sourceRepos: + - '*' + destinations: + - namespace: argocd + server: https://kubernetes.default.svc + - namespace: cnpg-system + server: https://kubernetes.default.svc + - namespace: '*' + clusterResourceWhitelist: + - group: '*' + kind: '*' + namespaceResourceWhitelist: + - group: '*' + kind: '*' +``` + +## Managing Applications + +### Adding a New Application + +1. **Create application manifest** in your Git repository (e.g., + `argocd-app-of-apps/apps/`): + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: my-new-app + namespace: argocd +spec: + project: default + source: + repoURL: https://github.com/your-org/your-repo.git + targetRevision: main + path: k8s + destination: + server: https://kubernetes.default.svc + namespace: my-app + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true +``` + +2. **Commit and push** to the Git repository + +3. **ArgoCD automatically syncs** the new application (if automated sync is + enabled) + +### Manual Sync + +If automated sync is disabled, sync manually: + +```bash +# Via CLI +argocd app sync my-new-app + +# Via UI +# Navigate to the application and click "Sync" +``` + +### Sync Policies + +Common sync policy configurations: + +**Automated with Self-Heal:** + +```yaml +syncPolicy: + automated: + prune: true # Delete resources removed from Git + selfHeal: true # Automatically sync if cluster state drifts +``` + +**Manual Sync:** + +```yaml +syncPolicy: + syncOptions: + - CreateNamespace=true +``` + +**Server-Side Apply:** + +```yaml +syncPolicy: + syncOptions: + - ServerSideApply=true + - ApplyOutOfSyncOnly=true +``` + +## Accessing ArgoCD + +### Web UI + +Once the ingress is configured, access ArgoCD at: + +- `https://argocd.yourdomain.com` (replace with your actual domain) + +### Initial Admin Password + +Get the initial admin password: + +```bash +kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d +``` + +### CLI Access + +Install ArgoCD CLI: + +```bash +# Linux +curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj-labs/argocd-operator/releases/latest/download/argocd-linux-amd64 +chmod +x /usr/local/bin/argocd +``` + +Login: + +```bash +argocd login argocd.yourdomain.com +``` + +Replace `argocd.yourdomain.com` with your actual ArgoCD domain. + +## Vault Integration + +ArgoCD is configured to use HashiCorp Vault for secret management via the ArgoCD +Vault Plugin (AVP). This allows secrets to be stored in Vault and injected +during application sync. + +### Configuration + +The `values.yaml` configures: + +- Vault agent sidecar for token management +- AVP plugin for secret injection +- Vault connection details + +See `k3s-argocd/values.yaml` and `k3s-argocd/vault-agent-config.yaml` for +details. + +### Using Vault Secrets in Applications + +In your Helm values or Kubernetes manifests, reference Vault secrets: + +```yaml +# In values.yaml +database: + password: +``` + +The AVP plugin will replace these placeholders with actual values from Vault +during sync. + +## Verification + +### Check ArgoCD Status + +```bash +# Check pods +kubectl get pods -n argocd + +# Check applications +kubectl get applications -n argocd + +# Check sync status +argocd app list +``` + +### Verify Root Applications + +```bash +kubectl get applications -n argocd | grep root +``` + +You should see: + +- `root-app` - Synced +- `root-projects-app` - Synced + +### Check Application Health + +```bash +# Via CLI +argocd app get + +# Via UI +# Navigate to the application in the ArgoCD UI +``` + +## Troubleshooting + +### Application Stuck in Syncing + +1. **Check application events:** + + ```bash + kubectl describe application -n argocd + ``` + +2. **Check ArgoCD logs:** + + ```bash + kubectl logs -n argocd deployment/argocd-application-controller + kubectl logs -n argocd deployment/argocd-repo-server + ``` + +3. **Check sync status:** + ```bash + argocd app get + ``` + +### Certificate Issues + +1. **Check certificate status:** + + ```bash + kubectl describe certificate argocd-tls -n argocd + ``` + +2. **Check cert-manager logs:** + + ```bash + kubectl logs -n cert-manager deployment/cert-manager + ``` + +3. **Verify DNS configuration:** + ```bash + dig argocd.yourdomain.com + ``` + +### Vault Integration Issues + +1. **Check Vault agent logs:** + + ```bash + kubectl logs -n argocd deployment/argocd-repo-server -c vault-agent + ``` + +2. **Verify Vault connection:** + + ```bash + kubectl exec -n argocd deployment/argocd-repo-server -c vault-agent -- vault status + ``` + +3. **Check AVP plugin:** + ```bash + kubectl logs -n argocd deployment/argocd-repo-server | grep avp + ``` + +## References + +- **ArgoCD configuration**: `K3S/k3s-argocd/` +- **App-of-apps structure**: `K3S/k3s-argocd-app-of-apps/` +- **ArgoCD documentation**: https://argo-cd.readthedocs.io/ diff --git a/docusaurus/docs/kubernetes/setup-vault.md b/docusaurus/docs/kubernetes/setup-vault.md new file mode 100644 index 0000000..7c9e966 --- /dev/null +++ b/docusaurus/docs/kubernetes/setup-vault.md @@ -0,0 +1,540 @@ +--- +title: Production Secret Management with Vault +--- + +## Overview + +HashiCorp Vault provides secure, centralized secret management for the K3s +cluster. This setup uses Vault in High Availability (HA) mode with Raft storage +backend and the Vault Secrets Operator to automatically sync secrets from Vault +into Kubernetes. + +### How It Works + +1. **Vault** acts as a centralized, secure database for secrets +2. The **Vault Secrets Operator** runs in the cluster and watches for + `VaultSecret` resources +3. When a `VaultSecret` is applied, the operator securely authenticates to + Vault, fetches the specified data, and creates a regular Kubernetes `Secret` +4. Applications use the Kubernetes `Secret` as normal + +This keeps actual secrets safe in Vault, while your Git repository only contains +non-sensitive instructions for how to retrieve them. + +## Part 1: Install Vault in HA Mode + +For production environments, Vault is installed in High Availability mode using +its integrated Raft storage backend. + +### Add HashiCorp Helm Repository + +```bash +helm repo add hashicorp https://helm.releases.hashicorp.com +helm repo update +``` + +### Install Vault in HA Mode + +```bash +helm install vault hashicorp/vault \ + --namespace vault \ + --create-namespace \ + --set "server.ha.enabled=true" \ + --set "server.ha.raft.enabled=true" +``` + +This creates a Vault cluster with: + +- Multiple Vault pods for high availability +- Raft storage backend for data persistence +- Automatic leader election + +### Verify Installation + +```bash +kubectl get pods -n vault +``` + +Wait for all Vault pods to be in `Running` state. + +## Part 2: Initialize and Unseal Vault + +A production Vault starts "sealed" for security. You must initialize it to get +the master keys and then unseal it. + +### Initialize Vault + +1. **Exec into the first Vault pod:** + +```bash +kubectl exec -it -n vault vault-0 -- /bin/sh +``` + +2. **Initialize Vault:** + +```sh +vault operator init +``` + +This command outputs: + +- **5 Unseal Keys** - You need 3 of these to unseal Vault +- **1 Initial Root Token** - Used for initial authentication + +**⚠️ CRITICAL: Save all of this information securely (password manager, secure +storage). This is the only time you'll see the unseal keys and root token in +plain text.** + +### Unseal Vault + +Vault requires 3 out of 5 unseal keys to become operational. Run the unseal +command three times, each time with a different key: + +```sh +vault operator unseal +vault operator unseal +vault operator unseal +``` + +After the third key is entered, Vault will be unsealed. You can verify with: + +```sh +vault status +``` + +You should see `Sealed: false`. + +Type `exit` to leave the pod shell. + +### Unseal Additional Vault Pods + +If you have multiple Vault pods (HA mode), you need to unseal each one: + +```bash +# Unseal vault-1 +kubectl exec -it -n vault vault-1 -- vault operator unseal +kubectl exec -it -n vault vault-1 -- vault operator unseal +kubectl exec -it -n vault vault-1 -- vault operator unseal + +# Unseal vault-2 (if exists) +kubectl exec -it -n vault vault-2 -- vault operator unseal +kubectl exec -it -n vault vault-2 -- vault operator unseal +kubectl exec -it -n vault vault-2 -- vault operator unseal +``` + +## Part 3: Install the Vault Secrets Operator + +The Vault Secrets Operator is the bridge between your cluster and Vault, +automatically syncing secrets. + +### Install the Operator + +```bash +helm install vault-secrets-operator hashicorp/vault-secrets-operator \ + --namespace vault-secrets-operator \ + --create-namespace +``` + +### Verify Installation + +```bash +kubectl get pods -n vault-secrets-operator +``` + +Wait for the operator pod to be in `Running` state. + +## Part 4: Configure Vault for Kubernetes Authentication + +Configure Vault to trust your Kubernetes cluster and allow the operator to fetch +secrets. + +### Connect to Vault and Log In + +```bash +kubectl exec -it -n vault vault-0 -- /bin/sh +vault login +``` + +### Enable Secrets Engine and Kubernetes Auth + +```sh +# Enable the KVv2 secrets engine at the path "secret/" +vault secrets enable -path=secret kv-v2 + +# Enable the Kubernetes auth method +vault auth enable kubernetes + +# Configure the auth method with cluster details +vault write auth/kubernetes/config \ + token_reviewer_jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ + kubernetes_host="https://kubernetes.default.svc" \ + kubernetes_ca_cert=@/var/run/secrets/kubernetes.io/serviceaccount/ca.crt +``` + +### Create Policies + +Policies define what secrets can be accessed. Create a policy for Longhorn R2 +secrets: + +```sh +vault policy write longhorn-r2-policy - < +``` + +Store Longhorn R2 credentials: + +```sh +vault kv put secret/longhorn-r2 \ + AWS_ACCESS_KEY_ID="YOUR_R2_ACCESS_KEY_ID" \ + AWS_SECRET_ACCESS_KEY="YOUR_R2_SECRET_ACCESS_KEY" \ + AWS_ENDPOINTS="https://.r2.cloudflarestorage.com" +``` + +Type `exit` to leave the pod shell. + +### Store Secrets via Web UI + +1. **Port-forward to Vault UI:** + +```bash +kubectl port-forward -n vault svc/vault 8200:8200 +``` + +2. **Open browser:** Navigate to `http://127.0.0.1:8200` + +3. **Log in:** Choose **Token** method and paste your root token + +4. **Navigate to Secrets:** Go to `secret/` → `longhorn-r2` + +5. **Create secret:** Click "Create secret" and add your key-value pairs + +## Part 6: Sync Secrets with Vault Secrets Operator + +The Vault Secrets Operator uses `VaultSecret` resources to sync secrets from +Vault into Kubernetes. + +### VaultConnection Resource + +First, create a connection to Vault (`k3s-vault/vault-connection.yaml`): + +```yaml +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultConnection +metadata: + name: vault-connection + namespace: longhorn-system +spec: + address: http://vault.vault:8200 + skipTLSVerify: true +``` + +Apply it: + +```bash +kubectl apply -f k3s-vault/vault-connection.yaml +``` + +### VaultAuth Resource + +Create the authentication configuration (`k3s-vault/vault-auth.yaml`): + +```yaml +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultAuth +metadata: + name: vault-auth-kubernetes + namespace: longhorn-system +spec: + vaultConnectionRef: vault-connection + method: kubernetes + mount: kubernetes + kubernetes: + role: longhorn-role + serviceAccount: default +``` + +Apply it: + +```bash +kubectl apply -f k3s-vault/vault-auth.yaml +``` + +### VaultSecret Resource + +Create the `VaultSecret` resource (`k3s-vault/longhorn-r2-vault-secret.yaml`): + +```yaml +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultSecret +metadata: + name: longhorn-r2-secret-from-vault + namespace: longhorn-system +spec: + vault: + address: http://vault.vault:8200 + auth: + kubernetes: + role: 'longhorn-role' + mountPath: 'kubernetes' + target: + name: r2-longhorn-secret + template: + type: Opaque + stringData: + AWS_ACCESS_KEY_ID: '{{ .AWS_ACCESS_KEY_ID }}' + AWS_SECRET_ACCESS_KEY: '{{ .AWS_SECRET_ACCESS_KEY }}' + AWS_ENDPOINTS: '{{ .AWS_ENDPOINTS }}' + source: + kv: + path: 'secret/longhorn-r2' + version: '2' +``` + +Apply it: + +```bash +kubectl apply -f k3s-vault/longhorn-r2-vault-secret.yaml +``` + +### Verify Secret Creation + +Check that the operator created the Kubernetes secret: + +```bash +kubectl get secret r2-longhorn-secret -n longhorn-system +kubectl get vaultsecret -n longhorn-system +``` + +The operator should show `Status: Valid` and the Kubernetes secret should exist. + +## Accessing the Vault Web UI + +Vault has a powerful built-in web UI perfect for day-to-day operations. + +### Port-Forward to Vault + +```bash +kubectl port-forward -n vault svc/vault 8200:8200 +``` + +Leave this terminal running. + +### Open the UI + +Navigate to `http://127.0.0.1:8200` in your browser. + +### Log In + +1. Choose **Token** as the login method +2. Paste your **Initial Root Token** (saved during initialization) +3. Click **Sign in** + +You can now: + +- Browse and edit secrets visually +- Create new policies +- Manage authentication roles +- View audit logs +- Monitor Vault health + +## Best Practices + +### Secret Rotation + +Regularly rotate secrets stored in Vault: + +1. **Update secret in Vault:** + + ```bash + kubectl exec -it -n vault vault-0 -- /bin/sh + vault login + vault kv put secret/longhorn-r2 \ + AWS_ACCESS_KEY_ID="NEW_KEY" \ + AWS_SECRET_ACCESS_KEY="NEW_SECRET" + ``` + +2. **Vault Secrets Operator automatically syncs** the updated secret to + Kubernetes + +### Policy Management + +- **Principle of Least Privilege:** Only grant read access to specific paths +- **Separate Policies:** Create separate policies for different applications +- **Regular Audits:** Review policies periodically + +### Token Management + +- **Avoid Root Token:** Create admin tokens with limited scope for daily use +- **Token Rotation:** Regularly rotate tokens +- **Short TTLs:** Use short TTLs for service account tokens (e.g., 24h) + +### Namespace Isolation + +Create separate VaultAuth resources for different namespaces to ensure proper +isolation: + +```yaml +apiVersion: secrets.hashicorp.com/v1beta1 +kind: VaultAuth +metadata: + name: myapp-auth + namespace: myapp +spec: + vaultConnectionRef: vault-connection + method: kubernetes + mount: kubernetes + kubernetes: + role: myapp-role + serviceAccount: myapp-sa +``` + +## Troubleshooting + +### Vault is Sealed + +If Vault becomes sealed (e.g., after a restart), unseal it: + +```bash +kubectl exec -it -n vault vault-0 -- /bin/sh +vault operator unseal +vault operator unseal +vault operator unseal +``` + +### VaultSecret Not Syncing + +1. **Check VaultSecret status:** + + ```bash + kubectl describe vaultsecret -n + ``` + +2. **Check operator logs:** + + ```bash + kubectl logs -n vault-secrets-operator deployment/vault-secrets-operator + ``` + +3. **Verify Vault connection:** + + ```bash + kubectl get vaultconnection -n + ``` + +4. **Check authentication:** + ```bash + kubectl get vaultauth -n + ``` + +### Authentication Failures + +1. **Verify role exists:** + + ```bash + kubectl exec -it -n vault vault-0 -- vault read auth/kubernetes/role/ + ``` + +2. **Check service account:** + + ```bash + kubectl get serviceaccount -n + ``` + +3. **Verify policy:** + ```bash + kubectl exec -it -n vault vault-0 -- vault policy read + ``` + +### Secret Not Found + +1. **Verify secret exists in Vault:** + + ```bash + kubectl exec -it -n vault vault-0 -- vault kv get secret/longhorn-r2 + ``` + +2. **Check path in VaultSecret:** + ```bash + kubectl get vaultsecret -n -o yaml + ``` + +## Verification Commands + +### Check Vault Status + +```bash +kubectl exec -it -n vault vault-0 -- vault status +``` + +### List Secrets in Vault + +```bash +kubectl exec -it -n vault vault-0 -- vault kv list secret/ +``` + +### Check Vault Secrets Operator + +```bash +kubectl get pods -n vault-secrets-operator +kubectl get vaultsecrets -A +``` + +### Verify Synced Secrets + +```bash +kubectl get secrets -n longhorn-system | grep r2 +``` + +## References + +- **Vault configuration**: `K3S/k3s-vault/` +- **Vault documentation**: https://developer.hashicorp.com/vault/docs +- **Vault Secrets Operator**: https://secrets-store-csi-driver.sigs.k8s.io/ diff --git a/docusaurus/docs/kubernetes/what-is-kubernetes.md b/docusaurus/docs/kubernetes/what-is-kubernetes.md index 77b956f..2530344 100644 --- a/docusaurus/docs/kubernetes/what-is-kubernetes.md +++ b/docusaurus/docs/kubernetes/what-is-kubernetes.md @@ -3,23 +3,74 @@ sidebar_position: 5 title: What is Kubernetes? 🎥 --- -As with anything in life, my experience has taught me that focusing on the essence of something and then going top-down is the best way to learn. In the context of Kubernetes, this means understanding it in a "teach me like I'm 6 years old" way. Kubernetes is a complex system, and trying to understand every component at the very beginning is overwhelming and will only lead to frustration. Plus, it won't be useful anyway, as this theory becomes important later, once things start failing (not working) and we need to debug. +As with anything in life, my experience has taught me that focusing on the +essence of something and then going top-down is the best way to learn. In the +context of Kubernetes, this means understanding it in a "teach me like I'm 6 +years old" way. Kubernetes is a complex system, and trying to understand every +component at the very beginning is overwhelming and will only lead to +frustration. Plus, it won't be useful anyway, as this theory becomes important +later, once things start failing (not working) and we need to debug. -So, what we need to get out of this section is the main benefit of an orchestration platform like Kubernetes: what it does, and how it can help us as engineers. +So, what we need to get out of this section is the main benefit of an +orchestration platform like Kubernetes: what it does, and how it can help us as +engineers. -### So, what does Kubernetes actually do? +## So, what does Kubernetes actually do? -Kubernetes is basically our super-organized friend who makes sure all our apps (and the stuff they need) are running smoothly, wherever we want them, cloud, our laptop, or a bunch of servers. We tell Kubernetes what we want ("run this app, keep it healthy, make sure it can handle lots of users"), and it figures out the rest. +Think of Kubernetes as your super-organized friend who makes sure all your apps +(and the stuff they need) are running smoothly, wherever you want them, cloud, +your laptop, or a bunch of servers. You tell Kubernetes what you want ("run this +app, keep it healthy, make sure it can handle lots of users"), and it figures +out the rest. -Basically, something like this: +### The Simple Version -- **We give it instructions**: Like, "We want 3 copies of our app running." -- **It keeps things running**: If something crashes, Kubernetes restarts it. If we need more power, it adds more copies. If we want to update our app, it helps us do it without breaking things. -- **It works anywhere**: Cloud, on-prem, hybrid, etc. Kubernetes doesn't care. It just wants to run our stuff. -- **It's all about making life easier**: Less manual work, more time for us to build cool things. +Kubernetes is an orchestration platform. That's a fancy way of saying it manages +your applications automatically. Here's what that means in practice: -### How to Get the Most Out of It +- **You give it instructions**: Like, "I want 3 copies of my app running, and if + one dies, replace it." +- **It keeps things running**: If something crashes, Kubernetes restarts it. If + you need more power, it adds more copies. If you want to update your app, it + helps you do it without breaking things. +- **It works anywhere**: Cloud, on-prem, hybrid, your basement—Kubernetes + doesn't care. It just wants to run your stuff. +- **It's all about making life easier**: Less manual work, more time for you to + build cool things. -- Check out the [official docs](https://kubernetes.io/docs/home/) - This is probably the best resource out there. If you are patient enough to read it, you will learn a lot. -- [Docker Mastery: with Kubernetes + Swarm from a Docker Captain](https://www.udemy.com/course/docker-mastery) - This is a great course to get started with Docker and Kubernetes. -- Let's not stress about the details at first. We'll learn about each individual component as we go along. +### Why Should You Care? + +Before diving into the technical details, it's worth understanding why +Kubernetes matters. In the real world, applications need to: + +- Stay running even when things break +- Handle more users without you manually adding servers +- Update without downtime +- Work the same way whether you're testing on your laptop or running in + production + +Kubernetes handles all of this for you. Instead of manually SSH-ing into +servers, restarting services, and hoping everything works, you describe what you +want, and Kubernetes makes it happen. + +### The Big Picture + +At its core, Kubernetes is about **declarative management**. You declare what +you want (3 copies of my app, always running), and Kubernetes makes sure that's +the reality. It's the difference between telling someone "watch the pot and stir +it every 5 minutes" versus "keep the soup at the right temperature", one is +manual work, the other is automation. + +## How to Get the Most Out of It + +- Check out the [official docs](https://kubernetes.io/docs/home/) - This is + probably the best resource out there. If you are patient enough to read it, + you will learn a lot. +- [Docker Mastery: with Kubernetes + Swarm from a Docker Captain](https://www.udemy.com/course/docker-mastery) - + This is a great course to get started with Docker and Kubernetes. +- Let's not stress about the details at first. We'll learn about each individual + component as we go along. + +The key is to start using it. Theory becomes useful once you've actually +deployed something and it breaks. That's when you'll appreciate why all these +components exist. diff --git a/docusaurus/docs/networking/expose-traefik-dashboard-inside-the-k3s-cluster.md b/docusaurus/docs/networking/expose-traefik-dashboard-inside-the-k3s-cluster.md index ab6af08..a62e670 100644 --- a/docusaurus/docs/networking/expose-traefik-dashboard-inside-the-k3s-cluster.md +++ b/docusaurus/docs/networking/expose-traefik-dashboard-inside-the-k3s-cluster.md @@ -2,20 +2,32 @@ title: Expose Traefik Dashboard inside the K3s Cluster --- -As we have learned in the previous section ([Kubernetes Networking](understanding-network-components#ingress-controllers-traefik-nginx)), ingress controllers are responsible for managing HTTP and HTTPS traffic, enabling external access to internal Kubernetes services. In simpler terms, the ingress controller ensures that incoming traffic is directed to the appropriate services that we define. - -In K3s, [Traefik](https://doc.traefik.io/traefik/) comes preconfigured as the default ingress controller, which means we can also take advantage of the [Traefik Dashboard](https://doc.traefik.io/traefik/operations/dashboard/). However, since the dashboard is not fully set up by default, we will need to configure it ourselves. +As we have learned in the previous section +([Kubernetes Networking](understanding-network-components#ingress-controllers-traefik-nginx)), +ingress controllers are responsible for managing HTTP and HTTPS traffic, +enabling external access to internal Kubernetes services. In simpler terms, the +ingress controller ensures that incoming traffic is directed to the appropriate +services that we define. + +In K3s, [Traefik](https://doc.traefik.io/traefik/) comes preconfigured as the +default ingress controller, which means we can also take advantage of the +[Traefik Dashboard](https://doc.traefik.io/traefik/operations/dashboard/). +However, since the dashboard is not fully set up by default, we will need to +configure it ourselves. Let's proceed with setting that up. ### Verify Traefik is Running First, let’s check if Traefik is installed and running in the cluster: + ```bash kubectl get pods -n kube-system ``` -We’ll look for a pod with a name like `traefik-...`. If it’s there and running, we’re good to go. If not, we might need to revisit the K3s installation settings. +We’ll look for a pod with a name like `traefik-...`. If it’s there and running, +we’re good to go. If not, we might need to revisit the K3s installation +settings. ## Objective @@ -26,9 +38,12 @@ You will be creating the required Kubernetes resources: ## Create the Traefik Dashboard Service -We'll create a `ClusterIP` Service to expose the Traefik dashboard. This service will make the Traefik dashboard's HTTP API, running on port `9000`, available to the cluster. +We'll create a `ClusterIP` Service to expose the Traefik dashboard. This service +will make the Traefik dashboard's HTTP API, running on port `9000`, available to +the cluster. -Create a YAML file named `traefik-dashboard-service.yaml` with the following contents: +Create a YAML file named `traefik-dashboard-service.yaml` with the following +contents: ```yaml apiVersion: v1 @@ -43,8 +58,8 @@ spec: type: ClusterIP ports: - name: traefik - port: 9000 # Dashboard listens on port 9000 - targetPort: 9000 # Forward traffic to this port on Traefik pods + port: 9000 # Dashboard listens on port 9000 + targetPort: 9000 # Forward traffic to this port on Traefik pods protocol: TCP selector: app.kubernetes.io/instance: traefik-kube-system @@ -52,9 +67,11 @@ spec: ``` - **Explanation**: - - `ClusterIP`: Used for internal access only (within the cluster not externally exposed). + - `ClusterIP`: Used for internal access only (within the cluster not + externally exposed). - - The service exposes port `9000`, which is the default port where Traefik serves its dashboard. + - The service exposes port `9000`, which is the default port where Traefik + serves its dashboard. ```bash kubectl apply -f traefik-dashboard-service.yaml @@ -64,9 +81,12 @@ kubectl apply -f traefik-dashboard-service.yaml ## Create the Traefik Ingress Resource -Next, we need to create an Ingress that routes traffic to the `traefik-dashboard` service created in the previous step. This will allow external traffic to reach the dashboard by using a specific domain. +Next, we need to create an Ingress that routes traffic to the +`traefik-dashboard` service created in the previous step. This will allow +external traffic to reach the dashboard by using a specific domain. -Create a YAML file named `traefik-dashboard-ingress.yaml` with the following contents: +Create a YAML file named `traefik-dashboard-ingress.yaml` with the following +contents: ```yaml apiVersion: networking.k8s.io/v1 @@ -78,7 +98,7 @@ metadata: spec.ingressClassName: traefik spec: rules: - - host: YOUR_DOMAIN_NAME # Replace YOUR_DOMAIN_NAME with your own domain. + - host: YOUR_DOMAIN_NAME # Replace YOUR_DOMAIN_NAME with your own domain. http: paths: - path: / @@ -90,11 +110,15 @@ spec: number: 9000 ``` -- Ingress: The ingress resource defines rules that route HTTP requests to `traefik-dashboard` at port `9000` based on a specific host (`YOUR_DOMAIN_NAME`). +- Ingress: The ingress resource defines rules that route HTTP requests to + `traefik-dashboard` at port `9000` based on a specific host + (`YOUR_DOMAIN_NAME`). -- Replace `YOUR_DOMAIN_NAME` with the desired domain name where you want to expose your Traefik dashboard. +- Replace `YOUR_DOMAIN_NAME` with the desired domain name where you want to + expose your Traefik dashboard. -- IngressClass: We're using the `traefik` ingress controller, as it's the default installed ingress controller for K3s. +- IngressClass: We're using the `traefik` ingress controller, as it's the + default installed ingress controller for K3s. ```bash kubectl apply -f traefik-dashboard-ingress.yaml @@ -104,9 +128,13 @@ kubectl apply -f traefik-dashboard-ingress.yaml ## Update DNS or `/etc/hosts` -To access the Traefik dashboard through your web browser, you'll need to ensure DNS resolves the host (`YOUR_DOMAIN_NAME`) to the correct IP address (either a load balancer IP, node IP, etc.). In the case of local development, you can update your **/etc/hosts** file. +To access the Traefik dashboard through your web browser, you'll need to ensure +DNS resolves the host (`YOUR_DOMAIN_NAME`) to the correct IP address (either a +load balancer IP, node IP, etc.). In the case of local development, you can +update your **/etc/hosts** file. -Suppose you're running a single-node K3s cluster accessible at the IP `192.168.1.100` and you want to use `traefik.example.com`. +Suppose you're running a single-node K3s cluster accessible at the IP +`192.168.1.100` and you want to use `traefik.example.com`. Edit `/etc/hosts` and add: @@ -116,7 +144,8 @@ Edit `/etc/hosts` and add: ## Access the Traefik Dashboard -Once the service and ingress resources are in place, and DNS (or `/etc/hosts`) has been configured, you should be able to access the dashboard in your browser: +Once the service and ingress resources are in place, and DNS (or `/etc/hosts`) +has been configured, you should be able to access the dashboard in your browser: ``` http://traefik.example.com/ @@ -124,14 +153,19 @@ http://traefik.example.com/ ### Notes: -- Deployment Security: The `Ingress` config above exposes the dashboard without authentication. For production deployments, consider securing the dashboard with basic authentication or other mechanisms. -- Dashboard Availability: By default, Traefik's dashboard is available via port 9000 and isn't exposed unless configured to be so. The steps above ensure it is properly exposed. +- Deployment Security: The `Ingress` config above exposes the dashboard without + authentication. For production deployments, consider securing the dashboard + with basic authentication or other mechanisms. +- Dashboard Availability: By default, Traefik's dashboard is available via port + 9000 and isn't exposed unless configured to be so. The steps above ensure it + is properly exposed. ## Clean-up -When you no longer need the Traefik Dashboard exposed, you can remove the resources by using the following commands: +When you no longer need the Traefik Dashboard exposed, you can remove the +resources by using the following commands: ```bash kubectl delete -f traefik-dashboard-ingress.yaml kubectl delete -f traefik-dashboard-service.yaml -``` \ No newline at end of file +``` diff --git a/docusaurus/docs/networking/kubernetes-networking-explained.md b/docusaurus/docs/networking/kubernetes-networking-explained.md index 28e82e4..09f774c 100644 --- a/docusaurus/docs/networking/kubernetes-networking-explained.md +++ b/docusaurus/docs/networking/kubernetes-networking-explained.md @@ -15,18 +15,28 @@ title: Kubernetes Networking Explained Kubernetes networking is designed to be **simple and flat**: -- Any pod can communicate with any other pod in the cluster, regardless of which namespace they're in. This communication works out of the box without additional configuration. -- Pods and services use **DNS** for service discovery instead of hardcoding IP addresses. +- Any pod can communicate with any other pod in the cluster, regardless of which + namespace they're in. This communication works out of the box without + additional configuration. +- Pods and services use **DNS** for service discovery instead of hardcoding IP + addresses. ### Pod-to-Pod Networking -Every pod is assigned a unique IP address. All pods share a single, flat address space, so there’s no [Network Address Translation (NAT)](https://www.youtube.com/watch?v=FTUV0t6JaDA) when pods communicate. However, pod IPs are [ephemeral](https://www.google.com/search?q=ephemeral&oq=ephemeral&gs_lcrp=EgZjaHJvbWUyBggAEEUYOdIBBzExOWowajeoAgCwAgA&sourceid=chrome&ie=UTF-8), they change if a pod is restarted. +Every pod is assigned a unique IP address. All pods share a single, flat address +space, so there’s no +[Network Address Translation (NAT)](https://www.youtube.com/watch?v=FTUV0t6JaDA) +when pods communicate. However, pod IPs are +[ephemeral](https://www.google.com/search?q=ephemeral&oq=ephemeral&gs_lcrp=EgZjaHJvbWUyBggAEEUYOdIBBzExOWowajeoAgCwAgA&sourceid=chrome&ie=UTF-8), +they change if a pod is restarted. ### Pod-to-Service Networking with DNS -Kubernetes provides a built-in DNS service that allows pods to resolve services using their names. For example: +Kubernetes provides a built-in DNS service that allows pods to resolve services +using their names. For example: -- A service called `nodejs-service` in the `default` namespace can be resolved by other pods in the same namespace as: +- A service called `nodejs-service` in the `default` namespace can be resolved + by other pods in the same namespace as: ``` http://nodejs-service @@ -38,13 +48,15 @@ http://nodejs-service http://nodejs-service.default.svc.cluster.local ``` -This DNS-based service discovery simplifies communication between pods and services, especially in complex setups. +This DNS-based service discovery simplifies communication between pods and +services, especially in complex setups. ## Key Networking Components in Kubernetes ### **A. Services** -Services are used to expose a group of pods (selected using labels) over the network and provide a stable address for accessing them. +Services are used to expose a group of pods (selected using labels) over the +network and provide a stable address for accessing them. Three key types of services: @@ -52,28 +64,36 @@ Three key types of services: - Accessible **within the cluster only**. - Provides internal networking between pods. -- Example: A backend service used by a frontend within the same application stack. +- Example: A backend service used by a frontend within the same application + stack. 2. **NodePort** - Exposes a service on a static port across all cluster nodes. -- Mostly used for development purposes but not ideal for production due to limited network flexibility. +- Mostly used for development purposes but not ideal for production due to + limited network flexibility. 3. **LoadBalancer** -- Requests an external IP to expose the service outside your cluster. In K3s, this integrates with **MetalLB** to assign an IP from your private pool. +- Requests an external IP to expose the service outside your cluster. In K3s, + this integrates with **MetalLB** to assign an IP from your private pool. -> Tip: Minimize `LoadBalancer` usage by routing external traffic via an **Ingress Controller** for better efficiency. +> Tip: Minimize `LoadBalancer` usage by routing external traffic via an +> **Ingress Controller** for better efficiency. ## Ingress: The Gateway to Your Cluster -Ingress is responsible for **routing external HTTP / HTTPS traffic** to services within your cluster. It integrates seamlessly with **Traefik**, your Ingress Controller in K3s. +Ingress is responsible for **routing external HTTP / HTTPS traffic** to services +within your cluster. It integrates seamlessly with **Traefik**, your Ingress +Controller in K3s. ### How It Works: -1. Create your services (e.g., `ClusterIP` services for Node.js, backends, etc.). +1. Create your services (e.g., `ClusterIP` services for Node.js, backends, + etc.). 2. Define an Ingress resource: - - Map hostnames (e.g., `nodejs.example.com`) or path prefixes (e.g., `/api`) to specific services. + - Map hostnames (e.g., `nodejs.example.com`) or path prefixes (e.g., `/api`) + to specific services. 3. Traefik manages incoming requests and routes them to the appropriate service. **Example Ingress Resource:** @@ -101,17 +121,18 @@ spec: ### Benefits of Ingress: -- Reduces the need for multiple `LoadBalancer` services, only Traefik’s load balancer requires an external IP. +- Reduces the need for multiple `LoadBalancer` services, only Traefik’s load + balancer requires an external IP. - Simplifies DNS-based routing for multiple services. ## Cross-Namespace Networking ### **Default Behavior:** -In K3s/Kubernetes, pods and services in one namespace can communicate with those in another **by default**. You can achieve this by: +In K3s/Kubernetes, pods and services in one namespace can communicate with those +in another **by default**. You can achieve this by: 1. Using DNS: - - `..svc.cluster.local` - Example: `http://postgres-service.database.svc.cluster.local` @@ -119,11 +140,14 @@ In K3s/Kubernetes, pods and services in one namespace can communicate with those ### **Restricting Cross-Namespace Communication** -To prevent unrestricted communication between namespaces, use **Network Policies** (see below). +To prevent unrestricted communication between namespaces, use **Network +Policies** (see below). ## Network Policies: Restricting Internal Communication -By default, Kubernetes allows all traffic between pods and across namespaces. To secure your cluster, you can leverage **Network Policies** to restrict ingress (incoming) and/or egress (outgoing) traffic. +By default, Kubernetes allows all traffic between pods and across namespaces. To +secure your cluster, you can leverage **Network Policies** to restrict ingress +(incoming) and/or egress (outgoing) traffic. ### **How Network Policies Work** @@ -152,11 +176,13 @@ spec: - Egress ``` -- Blocks all traffic to/from pods in the `default` namespace unless explicitly allowed. +- Blocks all traffic to/from pods in the `default` namespace unless explicitly + allowed. #### **Allow Specific Namespace Traffic** -Allow only traffic originating from pods in a specific namespace (e.g., `frontend` namespace): +Allow only traffic originating from pods in a specific namespace (e.g., +`frontend` namespace): ```yaml apiVersion: networking.k8s.io/v1 @@ -175,11 +201,13 @@ spec: role: frontend ``` -- In the `backend` namespace, only pods from the `frontend` namespace (labeled `role: frontend`) can communicate. +- In the `backend` namespace, only pods from the `frontend` namespace (labeled + `role: frontend`) can communicate. #### **Allow Specific Pod Communication** -Allow only a specific pod to communicate with another (e.g., frontend → backend): +Allow only a specific pod to communicate with another (e.g., frontend → +backend): ```yaml apiVersion: networking.k8s.io/v1 @@ -198,7 +226,8 @@ spec: app: frontend ``` -- Backend pods (`app: backend`) can only receive traffic from frontend pods (`app: frontend`). +- Backend pods (`app: backend`) can only receive traffic from frontend pods + (`app: frontend`). ## Useful Tools for Debugging Networking in K3s @@ -224,12 +253,15 @@ kubectl exec -it -- curl 4. **Network Policy Debugging** -- Use tools like **Cilium** (if installed) or **NetworkPolicy Viewer** addons for better visualization of applied policies. +- Use tools like **Cilium** (if installed) or **NetworkPolicy Viewer** addons + for better visualization of applied policies. ## Best Practices for K3s Networking - Use **ClusterIP** for internal services and restrict `NodePort` services. -- Depend on **Ingress** for external HTTP/S access, reduce the use of multiple `LoadBalancer` services. +- Depend on **Ingress** for external HTTP/S access, reduce the use of multiple + `LoadBalancer` services. - Enforce a **default-deny policy** and gradually allow necessary traffic. - Use namespace labels and Network Policies to isolate and secure workloads. -- Monitor and audit your networking policies and Traefik configurations regularly. +- Monitor and audit your networking policies and Traefik configurations + regularly. diff --git a/docusaurus/docs/networking/mikrotik/common-scenarios.mdx b/docusaurus/docs/networking/mikrotik/common-scenarios.mdx index d3c49b6..b7cc9e4 100644 --- a/docusaurus/docs/networking/mikrotik/common-scenarios.mdx +++ b/docusaurus/docs/networking/mikrotik/common-scenarios.mdx @@ -3,6 +3,6 @@ sidebar_position: 6 title: Common Scenarios --- -import Scenarios from "@site/src/components/MikrotikNetworking/Scenarios"; +import Scenarios from '@site/src/components/MikrotikNetworking/Scenarios' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/configure-email-on-mikrotik.md b/docusaurus/docs/networking/mikrotik/configure-email-on-mikrotik.md index 9a389d4..146c370 100644 --- a/docusaurus/docs/networking/mikrotik/configure-email-on-mikrotik.md +++ b/docusaurus/docs/networking/mikrotik/configure-email-on-mikrotik.md @@ -2,13 +2,19 @@ title: Configure Email on MikroTik --- -If your public IP changes or similar events occur, it's useful to receive notifications. +If your public IP changes or similar events occur, it's useful to receive +notifications. -There are many ways to achieve this, such as calling a webhook to send a Slack or Discord message. The simplest method, however, is to send an email to yourself. This guide uses Gmail as an example. +There are many ways to achieve this, such as calling a webhook to send a Slack +or Discord message. The simplest method, however, is to send an email to +yourself. This guide uses Gmail as an example. -Before configuring email on RouterOS, generate an app password for your Gmail account. +Before configuring email on RouterOS, generate an app password for your Gmail +account. -You can do this by [generating an app password](https://myaccount.google.com/apppasswords). Store the password securely in a password manager like LastPass or 1Password. +You can do this by +[generating an app password](https://myaccount.google.com/apppasswords). Store +the password securely in a password manager like LastPass or 1Password. Next, configure the router’s SMTP settings: @@ -23,6 +29,10 @@ Test the configuration: /tool e-mail send to="your.email@gmail.com" subject="MikroTik Test" body="Hello from the router" ``` -This provides a general-purpose tool for sending emails from your MikroTik router. Use it for monitoring, alerts, or any scenario where email notifications are helpful, for example, when IP access is granted or denied, or when a specific event occurs. +This provides a general-purpose tool for sending emails from your MikroTik +router. Use it for monitoring, alerts, or any scenario where email notifications +are helpful, for example, when IP access is granted or denied, or when a +specific event occurs. -In the next section, we'll cover how to send an email when a particular event happens. +In the next section, we'll cover how to send an email when a particular event +happens. diff --git a/docusaurus/docs/networking/mikrotik/core-concepts.mdx b/docusaurus/docs/networking/mikrotik/core-concepts.mdx index 32bdafc..df758fa 100644 --- a/docusaurus/docs/networking/mikrotik/core-concepts.mdx +++ b/docusaurus/docs/networking/mikrotik/core-concepts.mdx @@ -3,6 +3,6 @@ sidebar_position: 2 title: Core Concepts --- -import CoreConcepts from "@site/src/components/MikrotikNetworking/CoreConcepts"; +import CoreConcepts from '@site/src/components/MikrotikNetworking/CoreConcepts' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/device-configuration.mdx b/docusaurus/docs/networking/mikrotik/device-configuration.mdx index c52e492..02aee0a 100644 --- a/docusaurus/docs/networking/mikrotik/device-configuration.mdx +++ b/docusaurus/docs/networking/mikrotik/device-configuration.mdx @@ -3,6 +3,6 @@ sidebar_position: 4 title: Device Configuration --- -import DeviceConfiguration from "@site/src/components/MikrotikNetworking/DeviceConfiguration"; +import DeviceConfiguration from '@site/src/components/MikrotikNetworking/DeviceConfiguration' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/dynamic-dns-with-cloudflare.md b/docusaurus/docs/networking/mikrotik/dynamic-dns-with-cloudflare.md index aa89c28..799a30b 100644 --- a/docusaurus/docs/networking/mikrotik/dynamic-dns-with-cloudflare.md +++ b/docusaurus/docs/networking/mikrotik/dynamic-dns-with-cloudflare.md @@ -4,22 +4,41 @@ title: DDNS Using Cloudflare ## MikroTik Scripting -After nearly two decades as programmers, I find MikroTik scripting to be one of RouterOS’s best features. It enables creativity and extensibility, allowing me to quickly write scripts whenever a new use case arises. The scripting language is simple, and if you have programming experience, it should feel straightforward. +After nearly two decades as programmers, I find MikroTik scripting to be one of +RouterOS’s best features. It enables creativity and extensibility, allowing me +to quickly write scripts whenever a new use case arises. The scripting language +is simple, and if you have programming experience, it should feel +straightforward. ## Why do we need a DDNS? -Without a static public IP address, setting our IP in Cloudflare (or any DNS provider) won’t work permanently. Our current public IP might work for now, but once our ISP changes it, our services will become inaccessible. To solve this, we use Dynamic DNS (DDNS): we create an A record in Cloudflare (e.g., something.example.com) and update it automatically whenever our public IP changes. This way, we can always point our ingress or other services to a consistent domain name. +Without a static public IP address, setting our IP in Cloudflare (or any DNS +provider) won’t work permanently. Our current public IP might work for now, but +once our ISP changes it, our services will become inaccessible. To solve this, +we use Dynamic DNS (DDNS): we create an A record in Cloudflare (e.g., +something.example.com) and update it automatically whenever our public IP +changes. This way, we can always point our ingress or other services to a +consistent domain name. In our setup, we use two methods: -- Utilize the `On Up` event under the `default` [PPP (Point-to-Point)](https://help.mikrotik.com/docs/spaces/ROS/pages/328072/PPP) profile -- Use the built-in [Scheduler](https://help.mikrotik.com/docs/spaces/ROS/pages/40992881/Scheduler) (essentially, a cron) to run every X minutes to update the DNS record in Cloudflare. +- Utilize the `On Up` event under the `default` + [PPP (Point-to-Point)](https://help.mikrotik.com/docs/spaces/ROS/pages/328072/PPP) + profile +- Use the built-in + [Scheduler](https://help.mikrotik.com/docs/spaces/ROS/pages/40992881/Scheduler) + (essentially, a cron) to run every X minutes to update the DNS record in + Cloudflare. -Before using the script below, we’ll need to set up a few things in our Cloudflare account. Specifically, we’ll need some tokens and IDs so the script can update our DNS record via the API. This setup should only take a few minutes. +Before using the script below, we’ll need to set up a few things in our +Cloudflare account. Specifically, we’ll need some tokens and IDs so the script +can update our DNS record via the API. This setup should only take a few +minutes. ## Get Our Cloudflare Credentials -Before configuring the router, let’s gather these four pieces of information from our Cloudflare account: +Before configuring the router, let’s gather these four pieces of information +from our Cloudflare account: - Our Zone ID. - The DNS Record Name we want to update (e.g., router.ourdomain.com). @@ -28,7 +47,8 @@ Before configuring the router, let’s gather these four pieces of information f **Step A: Create the DNS Record (if it doesn't exist)** -First, we need a placeholder 'A' record in Cloudflare that the script can target for updates. +First, we need a placeholder 'A' record in Cloudflare that the script can target +for updates. - Log in to our Cloudflare dashboard. - Go to the DNS settings for our domain. @@ -36,27 +56,39 @@ First, we need a placeholder 'A' record in Cloudflare that the script can target - Configure it as follows: - Type: A - Name: The subdomain we want to use (e.g., router, home, m920q). - - IPv4 address: Use a placeholder, such as our current public IP. The script will update this automatically as our IP changes. - - Proxy status: Our choice. If we want Cloudflare's protection (orange cloud), leave it Proxied. If we want a direct connection (e.g., for a VPN), set it to DNS only. Note our choice for later. + - IPv4 address: Use a placeholder, such as our current public IP. The script + will update this automatically as our IP changes. + - Proxy status: Our choice. If we want Cloudflare's protection (orange cloud), + leave it Proxied. If we want a direct connection (e.g., for a VPN), set it + to DNS only. Note our choice for later. - Click Save. **Step B: Get our Zone ID and API Token** -- Find our Zone ID: On the main `Overview page` for our domain in Cloudflare, scroll down. We'll find the Zone ID on the right-hand side. Copy it to a safe place. +- Find our Zone ID: On the main `Overview page` for our domain in Cloudflare, + scroll down. We'll find the Zone ID on the right-hand side. Copy it to a safe + place. - Create an API Token: - Click the user icon in the top right and go to My Profile. - Select the API Tokens tab on the left. - Click Create Token. - Find the Edit zone DNS template and click Use template. - - Make sure to assign the following two permissions: `Zone - Zone - Read` and `Zone - DNS - Edit`. In simple terms, our script needs to read data from our DNS and update DNS records when needed. - - Under Zone Resources, ensure we select the specific domain we want this token to control. + - Make sure to assign the following two permissions: `Zone - Zone - Read` and + `Zone - DNS - Edit`. In simple terms, our script needs to read data from our + DNS and update DNS records when needed. + - Under Zone Resources, ensure we select the specific domain we want this + token to control. - Click Continue to summary, then Create Token. - - Cloudflare will display the token only once. Copy it immediately and store it safely. + - Cloudflare will display the token only once. Copy it immediately and store + it safely. **Step C: Get the DNS Record ID** -This ID is not visible on the dashboard. The easiest way to get it is with a command on our PC (not the router). Open a Command Prompt (Windows) or Terminal (Mac/Linux) and run the following, replacing the capitalized parts with our info: +This ID is not visible on the dashboard. The easiest way to get it is with a +command on our PC (not the router). Open a Command Prompt (Windows) or Terminal +(Mac/Linux) and run the following, replacing the capitalized parts with our +info: ```bash curl -X GET "https://api.cloudflare.com/client/v4/zones/OUR_ZONE_ID/dns_records?name=OUR_DNS_RECORD_NAME" \ @@ -124,16 +156,22 @@ Now, let's program the router. ``` -Edit the five configuration lines at the top with our Cloudflare information, and update the WAN interface as needed. +Edit the five configuration lines at the top with our Cloudflare information, +and update the WAN interface as needed. ## Logging -We’ll notice the script includes several log statements. Logging is a good engineering practice, so I recommend keeping them. +We’ll notice the script includes several log statements. Logging is a good +engineering practice, so I recommend keeping them. -Before proceeding to the next steps, let’s test the script we just created under `System > Scripts`. +Before proceeding to the next steps, let’s test the script we just created under +`System > Scripts`. - Open the `cloudflare-ddns` script, and click `Run Script` -- Open the `Log` menu item in the sidebar to check if everything is working correctly. If there are errors, they will appear in red. Most errors are related to permissions or similar issues, such as Cloudflare returning an HTTP status 400. Once everything works, we can proceed to the next steps. +- Open the `Log` menu item in the sidebar to check if everything is working + correctly. If there are errors, they will appear in red. Most errors are + related to permissions or similar issues, such as Cloudflare returning an HTTP + status 400. Once everything works, we can proceed to the next steps. **Step 2: Schedule the Script** @@ -142,18 +180,22 @@ Navigate to `System > Scheduler`. - Click + to add a new schedule. - Name it Run-Cloudflare-Update. - In the On Event box, type the script name: `cloudflare-ddns`. -- Set the interval, for example, to 00:05:00 (every 5 minutes), or adjust as needed. +- Set the interval, for example, to 00:05:00 (every 5 minutes), or adjust as + needed. - Click Apply and OK. ## Using the PPPoE Client "On-Up" Script -If your internet connection uses PPP (as mine does for Telenor ISP), we can use the `On Up` event, which triggers when the connection comes up. +If your internet connection uses PPP (as mine does for Telenor ISP), we can use +the `On Up` event, which triggers when the connection comes up. -Typically, our public IP changes when our connection goes down and comes back up. +Typically, our public IP changes when our connection goes down and comes back +up. - Navigate to the PPP menu on the left. - Go to the Profiles tab. -- Open the profile that our PPPoE interface uses. This is typically the one named default or default-encryption. +- Open the profile that our PPPoE interface uses. This is typically the one + named default or default-encryption. - In the Profile settings, find the field named `On Up`. - In this field, we'll run our previously defined script @@ -168,7 +210,8 @@ Typically, our public IP changes when our connection goes down and comes back up ## For Other Connection Types -For those of you who have a different type of internet connection, the same concept applies: +For those of you who have a different type of internet connection, the same +concept applies: **DHCP Client (Cable/Fiber)** @@ -176,13 +219,18 @@ If your WAN interface gets its IP via DHCP, we would: - Go to IP -> DHCP Client - Open our WAN DHCP client entry -- Run a script inside the Script field, e.g. `/system script run cloudflare-ddns` +- Run a script inside the Script field, e.g. + `/system script run cloudflare-ddns` ## Sending emails for monitoring purposes -If you have [configured email](./configure-email-on-mikrotik) as described earlier, you can receive notifications when certain events occur. Monitoring helps us understand patterns in our network, such as how often our public IP changes. +If you have [configured email](./configure-email-on-mikrotik) as described +earlier, you can receive notifications when certain events occur. Monitoring +helps us understand patterns in our network, such as how often our public IP +changes. -Let's extend the above `On Up` and `On Down` scripts to log and send an email when these events occur: +Let's extend the above `On Up` and `On Down` scripts to log and send an email +when these events occur: ```bash :log info "PPP UP - running DDNS update" @@ -199,4 +247,5 @@ And of course, when it goes down as well: /tool e-mail send to="hi@programmer.network" subject="PPP is DOWN" body="PPP connection lost" ``` -In simple terms, we will want to do this in the appropriate place depending on the connection type. +In simple terms, we will want to do this in the appropriate place depending on +the connection type. diff --git a/docusaurus/docs/networking/mikrotik/firewall-logic.mdx b/docusaurus/docs/networking/mikrotik/firewall-logic.mdx index 7a63e56..7bfa2ab 100644 --- a/docusaurus/docs/networking/mikrotik/firewall-logic.mdx +++ b/docusaurus/docs/networking/mikrotik/firewall-logic.mdx @@ -3,6 +3,6 @@ sidebar_position: 5 title: Firewall Logic --- -import FirewallLogic from "@site/src/components/MikrotikNetworking/FirewallLogic"; +import FirewallLogic from '@site/src/components/MikrotikNetworking/FirewallLogic' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/lenovo-m920q-roas.mdx b/docusaurus/docs/networking/mikrotik/lenovo-m920q-roas.mdx index f8cf7f8..58d9cb9 100644 --- a/docusaurus/docs/networking/mikrotik/lenovo-m920q-roas.mdx +++ b/docusaurus/docs/networking/mikrotik/lenovo-m920q-roas.mdx @@ -3,120 +3,203 @@ sidebar_position: 3 title: MikroTik RouterOS on Lenovo M920q --- -You can have the greatest computers in the world, with insane amounts of CPU, RAM, and storage. But without proper networking, or if your network stack is a bottleneck, none of it will matter. Who cares if your NVMe can transfer data at lightning speed if your network is slow? Who cares if your API is written in binary and can process 17 trillion requests in negative time if your network adds four seconds of latency? Simply put, without top-notch networking, everything else will feel like a third-rate setup. That’s why I decided to step up my game and build this awesome machine as my dedicated router, the brain of the entire operation. - -As I mentioned in the [Hardware Overview](../../hardware-raspberry-pi-setup/hardware.mdx) section, you don't have to use the same hardware I did. If you have a different router running MikroTik RouterOS, you can skip this section. However, if you're looking for an enterprise-grade router for your home network at a much lower cost than buying hardware from MikroTik or other vendors, this section is for you. - -Many homelab setups I’ve seen on YouTube underestimate the importance of proper networking. Regardless of CPU, RAM, or storage, devices must communicate efficiently. Clients connect to the router, which means it must handle many firewall rules, NAT, VPN, etc. +You can have the greatest computers in the world, with insane amounts of CPU, +RAM, and storage. But without proper networking, or if your network stack is a +bottleneck, none of it will matter. Who cares if your NVMe can transfer data at +lightning speed if your network is slow? Who cares if your API is written in +binary and can process 17 trillion requests in negative time if your network +adds four seconds of latency? Simply put, without top-notch networking, +everything else will feel like a third-rate setup. That’s why I decided to step +up my game and build this awesome machine as my dedicated router, the brain of +the entire operation. + +As I mentioned in the +[Hardware Overview](../../hardware-raspberry-pi-setup/hardware.mdx) section, you +don't have to use the same hardware I did. If you have a different router +running MikroTik RouterOS, you can skip this section. However, if you're looking +for an enterprise-grade router for your home network at a much lower cost than +buying hardware from MikroTik or other vendors, this section is for you. + +Many homelab setups I’ve seen on YouTube underestimate the importance of proper +networking. Regardless of CPU, RAM, or storage, devices must communicate +efficiently. Clients connect to the router, which means it must handle many +firewall rules, NAT, VPN, etc. There are four things you need for this setup: 1. [Lenovo M920q Mini PC](https://www.ebay.com/sch/i.html?_nkw=Lenovo+M920Q&_sacat=0&_from=R40&_trksid=p2332490.m570.l1313) - - Has a critical feature: a usable PCIe expansion slot, allowing installation of a server-grade network card. -2. [The Network Card](https://www.aliexpress.com/item/1005005920672631.html?spm=a2g0o.order_list.order_list_main.11.329f180254cPlG) (NIC) - - SFP+ model (Intel 82599ES chipset, X520-DA2) for direct fiber or DAC connectivity to your switch. -3. [PCIe Riser Card](https://www.aliexpress.com/item/1005007593015885.html?spm=a2g0o.order_list.order_list_main.5.329f180254cPlG) (to connect the NIC to the motherboard) - - Mandatory adapter needed to physically connect the network card to the Lenovo M920q's motherboard. + - Has a critical feature: a usable PCIe expansion slot, allowing installation + of a server-grade network card. +2. [The Network Card](https://www.aliexpress.com/item/1005005920672631.html?spm=a2g0o.order_list.order_list_main.11.329f180254cPlG) + (NIC) + - SFP+ model (Intel 82599ES chipset, X520-DA2) for direct fiber or DAC + connectivity to your switch. +3. [PCIe Riser Card](https://www.aliexpress.com/item/1005007593015885.html?spm=a2g0o.order_list.order_list_main.5.329f180254cPlG) + (to connect the NIC to the motherboard) + - Mandatory adapter needed to physically connect the network card to the + Lenovo M920q's motherboard. 4. [MikroTik RouterOS License](https://help.mikrotik.com/docs/spaces/ROS/pages/328149/RouterOS+license+keys) - - You'll need a Level 4 (P1) RouterOS license to unlock the full speed of your hardware. + - You'll need a Level 4 (P1) RouterOS license to unlock the full speed of + your hardware.
-import ImageGallery from "react-image-gallery"; +import ImageGallery from 'react-image-gallery'
-For this build, we'll install MikroTik RouterOS v7 directly on bare metal. This avoids virtualization headaches and ensures maximum hardware performance. We'll use a 90W or higher power adapter to ensure the CPU, SSD, and NIC have enough power, even under heavy load. +For this build, we'll install MikroTik RouterOS v7 directly on bare metal. This +avoids virtualization headaches and ensures maximum hardware performance. We'll +use a 90W or higher power adapter to ensure the CPU, SSD, and NIC have enough +power, even under heavy load. -Switching from the RB3011 to this build is a huge leap forward. The RB3011 would struggle with high connection counts or heavy traffic, but this setup handles those demands easily, making it ideal for a busy K3s cluster. Compared to expensive MikroTik CCRs, this Intel-powered box outperforms many for CPU-intensive tasks like VPNs or complex firewall rules. +Switching from the RB3011 to this build is a huge leap forward. The RB3011 would +struggle with high connection counts or heavy traffic, but this setup handles +those demands easily, making it ideal for a busy K3s cluster. Compared to +expensive MikroTik CCRs, this Intel-powered box outperforms many for +CPU-intensive tasks like VPNs or complex firewall rules. -We benefit from the flexibility and power of a general-purpose CPU, rather than being limited by a fixed-function router chip. For our services, this router removes the network as a bottleneck. APIs and applications will run at full speed, limited only by the server itself, not by the network path. +We benefit from the flexibility and power of a general-purpose CPU, rather than +being limited by a fixed-function router chip. For our services, this router +removes the network as a bottleneck. APIs and applications will run at full +speed, limited only by the server itself, not by the network path. ## Lenovo M920q BIOS -Before we can install RouterOS, we have to configure the BIOS on our Lenovo M920q. To save you time and effort, here are the BIOS settings you should enable or disable. +Before we can install RouterOS, we have to configure the BIOS on our Lenovo +M920q. To save you time and effort, here are the BIOS settings you should enable +or disable. -Generally, disable features you don't use. In this context, two clear candidates are `bluetooth` and the `wireless` card. I went further and removed the wireless card from the Lenovo M920q, as I won't be using it. +Generally, disable features you don't use. In this context, two clear candidates +are `bluetooth` and the `wireless` card. I went further and removed the wireless +card from the Lenovo M920q, as I won't be using it. Assuming you've updated the BIOS to the latest version, do the following: - Restart the PC and keep pressing the F1 key to enter the BIOS. -- Go to `Devices > USB Setup > Bluetooth` and set it to `disabled`. If you don't use the front USB ports, you can disable those too, leaving just one rear port enabled for keyboard or external screen setup. +- Go to `Devices > USB Setup > Bluetooth` and set it to `disabled`. If you don't + use the front USB ports, you can disable those too, leaving just one rear port + enabled for keyboard or external screen setup. -- Go to `Devices > Network Setup` and ensure all options except `Wireless LAN` are enabled. Disable `Wireless LAN`, this router won't connect to your switch via wireless. The `PXE` options must be enabled to install RouterOS via [Netinstall](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). +- Go to `Devices > Network Setup` and ensure all options except `Wireless LAN` + are enabled. Disable `Wireless LAN`, this router won't connect to your switch + via wireless. The `PXE` options must be enabled to install RouterOS via + [Netinstall](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). -- Go to `Power` and set `After Power Loss` to `Power On`. This is the most critical setting for all Mini PCs, ensuring the hardware automatically powers on after an outage. +- Go to `Power` and set `After Power Loss` to `Power On`. This is the most + critical setting for all Mini PCs, ensuring the hardware automatically powers + on after an outage. -- Go to `Startup` and make sure the `CSM` option is `Enabled` and `Boot Mode` is set to `Auto`. `CSM` stands for `Compatibility Support Module` and allows loading of non-UEFI operating systems. +- Go to `Startup` and make sure the `CSM` option is `Enabled` and `Boot Mode` is + set to `Auto`. `CSM` stands for `Compatibility Support Module` and allows + loading of non-UEFI operating systems. -- Go to `Startup > Primary Boot Sequence` and change the boot order. Since you'll install RouterOS using Netinstall (via ethernet), move `Network 1` to the top of the `Primary Boot Sequence` list. +- Go to `Startup > Primary Boot Sequence` and change the boot order. Since + you'll install RouterOS using Netinstall (via ethernet), move `Network 1` to + the top of the `Primary Boot Sequence` list. -At this point, your BIOS should be fully set up, and we can proceed to the next section: setting up MikroTik's Netinstall. For now, you can turn the PC off. +At this point, your BIOS should be fully set up, and we can proceed to the next +section: setting up MikroTik's Netinstall. For now, you can turn the PC off. ## Install MikroTik RouterOS on Lenovo M920q -From experience, I must emphasize: installing RouterOS via USB stick is not possible. I tried burning [RouterOS](https://mikrotik.com/software) to a USB stick using [Rufus](https://rufus.ie/en/), set up the BIOS, and booted into the USB. Starting the RouterOS installation, I got a `no cdrom found` error. +From experience, I must emphasize: installing RouterOS via USB stick is not +possible. I tried burning [RouterOS](https://mikrotik.com/software) to a USB +stick using [Rufus](https://rufus.ie/en/), set up the BIOS, and booted into the +USB. Starting the RouterOS installation, I got a `no cdrom found` error.
-I thought it might be a corrupted ISO or USB stick, so I tried different sticks and Rufus settings, but nothing worked. Eventually, with help from AI and forums, I concluded you must follow MikroTik's [Netinstall guide](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). +I thought it might be a corrupted ISO or USB stick, so I tried different sticks +and Rufus settings, but nothing worked. Eventually, with help from AI and +forums, I concluded you must follow MikroTik's +[Netinstall guide](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). -In short, install RouterOS over the network. The USB process fails because, once installation starts, RouterOS looks for a CD-ROM instead of recognizing the USB drive. +In short, install RouterOS over the network. The USB process fails because, once +installation starts, RouterOS looks for a CD-ROM instead of recognizing the USB +drive. -Rather than duplicating all the content from MikroTik's original guide, simply follow the official [Netinstall guide](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). The installation process takes less than a minute, and the next time you boot, your Lenovo M920q will be ready to route. +Rather than duplicating all the content from MikroTik's original guide, simply +follow the official +[Netinstall guide](https://help.mikrotik.com/docs/spaces/ROS/pages/24805390/Netinstall). +The installation process takes less than a minute, and the next time you boot, +your Lenovo M920q will be ready to route. -`Before finalizing the setup`, make sure to return to your BIOS setup, go to `Startup > Primary Boot Sequence`, and set your SSD or NVMe (depending on your hardware) back as the primary boot option. If you skip this, your Lenovo will try to boot from the network every time, causing long delays before it attempts another boot method. +`Before finalizing the setup`, make sure to return to your BIOS setup, go to +`Startup > Primary Boot Sequence`, and set your SSD or NVMe (depending on your +hardware) back as the primary boot option. If you skip this, your Lenovo will +try to boot from the network every time, causing long delays before it attempts +another boot method. -Your text is clear and mostly well-written. Here’s a proofread version with minor improvements for clarity and flow: +Your text is clear and mostly well-written. Here’s a proofread version with +minor improvements for clarity and flow: ## Purchase the MikroTik RouterOS License -If everything has gone well up to this point, you’re ready for the final step: connect to your new router and purchase the license. - -Unless you plan to register a mini ISP company and sell internet, just like myself, you’ll want to get a RouterOS Level 4 license. You can compare the licenses at [RouterOS license key levels](https://help.mikrotik.com/docs/spaces/ROS/pages/328149/RouterOS+license+keys#RouterOSlicensekeys-RouterOSlicensekeylevels). In short, the main differences between level 4 and higher licenses are features like OVPN, tunnels, EoIP, user management, RADIUS, etc. For most users, these aren’t necessary. - -To purchase the license, create a MikroTik account at the [MikroTik Account](https://mikrotik.com/client/) page. Once your account is created, you’ll receive your initial login information via email (be sure to change your password after logging in). - -After purchasing the license, you’ll receive an email with the subject `MikroTik RouterOS Licensed Key`, which will contain your license in a `.key` file. Download the license, log in to your router (e.g., via Winbox), go to `System > License`, and click `Import Key...`. Once you import the key, your router will prompt you to restart. After restarting, your new level 4 license will be applied to your device. +If everything has gone well up to this point, you’re ready for the final step: +connect to your new router and purchase the license. + +Unless you plan to register a mini ISP company and sell internet, just like +myself, you’ll want to get a RouterOS Level 4 license. You can compare the +licenses at +[RouterOS license key levels](https://help.mikrotik.com/docs/spaces/ROS/pages/328149/RouterOS+license+keys#RouterOSlicensekeys-RouterOSlicensekeylevels). +In short, the main differences between level 4 and higher licenses are features +like OVPN, tunnels, EoIP, user management, RADIUS, etc. For most users, these +aren’t necessary. + +To purchase the license, create a MikroTik account at the +[MikroTik Account](https://mikrotik.com/client/) page. Once your account is +created, you’ll receive your initial login information via email (be sure to +change your password after logging in). + +After purchasing the license, you’ll receive an email with the subject +`MikroTik RouterOS Licensed Key`, which will contain your license in a `.key` +file. Download the license, log in to your router (e.g., via Winbox), go to +`System > License`, and click `Import Key...`. Once you import the key, your +router will prompt you to restart. After restarting, your new level 4 license +will be applied to your device. ## Moving the Configuration from an Old MikroTik Router -Until now, I was using an RB3011. I exported its configuration and, with minimal changes, imported it to the new Lenovo M920q router. This saved a lot of time, as I didn’t have to recreate everything from scratch. +Until now, I was using an RB3011. I exported its configuration and, with minimal +changes, imported it to the new Lenovo M920q router. This saved a lot of time, +as I didn’t have to recreate everything from scratch. -If your current (original) router, such as an RB3011 or similar, is still running, follow these steps: +If your current (original) router, such as an RB3011 or similar, is still +running, follow these steps: **Export the config on the RB3011:** @@ -127,18 +210,35 @@ If your current (original) router, such as an RB3011 or similar, is still runnin This will generate a file named `rb3011-backup.rsc`. -Download this file using Winbox, WebFig, or SCP. You’ll find it under the `Files` menu. +Download this file using Winbox, WebFig, or SCP. You’ll find it under the +`Files` menu. Open it in your preferred text editor (e.g., Neovim, VSCode, Emacs). -At this point, you’ll want to adjust the config to match the interfaces on the Lenovo M920q. In my case, I only needed to search for `sfp1` (from the RB3011) and replace it with `ether2`. This is because, after installing the [Intel X520-DA2](https://www.aliexpress.com/item/1005005920672631.html?spm=a2g0o.order_list.order_list_main.11.329f180254cPlG), MikroTik RouterOS identifies the two SFP+ ports as `ether2` and `ether3`. So, your router will appear to have three Ethernet ports: `ether1`, `ether2`, and `ether3`. Functionally, these are still SFP+ ports, only the naming is different. +At this point, you’ll want to adjust the config to match the interfaces on the +Lenovo M920q. In my case, I only needed to search for `sfp1` (from the RB3011) +and replace it with `ether2`. This is because, after installing the +[Intel X520-DA2](https://www.aliexpress.com/item/1005005920672631.html?spm=a2g0o.order_list.order_list_main.11.329f180254cPlG), +MikroTik RouterOS identifies the two SFP+ ports as `ether2` and `ether3`. So, +your router will appear to have three Ethernet ports: `ether1`, `ether2`, and +`ether3`. Functionally, these are still SFP+ ports, only the naming is +different. -Make the absolute minimum changes needed to get the new router up and running. Avoid making lots of adjustments at once, as this can make troubleshooting difficult (e.g., is the Lenovo working? Is the new SFP cable good?). +Make the absolute minimum changes needed to get the new router up and running. +Avoid making lots of adjustments at once, as this can make troubleshooting +difficult (e.g., is the Lenovo working? Is the new SFP cable good?). -Once you’ve made the minimal necessary changes, restore the backup onto your Lenovo M920q. +Once you’ve made the minimal necessary changes, restore the backup onto your +Lenovo M920q. -After confirming that the basic setup works, you can proceed with additional changes and experiment as needed. +After confirming that the basic setup works, you can proceed with additional +changes and experiment as needed. ## Keep Your Old Router Around as a Backup -If you’re not in desperate need of money and don’t have to sell your old router, I recommend keeping it. You can use it to practice MikroTik configurations and have a backup in case your Lenovo ever stops functioning. Since much of this setup is about Kubernetes and we’re calling it a `mini data center`, high availability (HA) should apply at every level, not just the Kubernetes nodes. So, stash your old router in a safe place as a backup plan. +If you’re not in desperate need of money and don’t have to sell your old router, +I recommend keeping it. You can use it to practice MikroTik configurations and +have a backup in case your Lenovo ever stops functioning. Since much of this +setup is about Kubernetes and we’re calling it a `mini data center`, high +availability (HA) should apply at every level, not just the Kubernetes nodes. +So, stash your old router in a safe place as a backup plan. diff --git a/docusaurus/docs/networking/mikrotik/network-overview.mdx b/docusaurus/docs/networking/mikrotik/network-overview.mdx index 4912f32..8dedf79 100644 --- a/docusaurus/docs/networking/mikrotik/network-overview.mdx +++ b/docusaurus/docs/networking/mikrotik/network-overview.mdx @@ -3,6 +3,6 @@ sidebar_position: 1 title: Network Overview --- -import NetworkOverview from "@site/src/components/MikrotikNetworking/NetworkOverview"; +import NetworkOverview from '@site/src/components/MikrotikNetworking/NetworkOverview' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/summary-and-checklist.mdx b/docusaurus/docs/networking/mikrotik/summary-and-checklist.mdx index b9bc847..004dc9a 100644 --- a/docusaurus/docs/networking/mikrotik/summary-and-checklist.mdx +++ b/docusaurus/docs/networking/mikrotik/summary-and-checklist.mdx @@ -3,6 +3,6 @@ sidebar_position: 7 title: Summary & Checklist --- -import Summary from "@site/src/components/MikrotikNetworking/Summary"; +import Summary from '@site/src/components/MikrotikNetworking/Summary' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/vlan-schema.mdx b/docusaurus/docs/networking/mikrotik/vlan-schema.mdx index b0d84ad..dc7f4bb 100644 --- a/docusaurus/docs/networking/mikrotik/vlan-schema.mdx +++ b/docusaurus/docs/networking/mikrotik/vlan-schema.mdx @@ -3,6 +3,6 @@ sidebar_position: 3 title: VLAN Schema --- -import VlanSchema from "@site/src/components/MikrotikNetworking/VlanSchema"; +import VlanSchema from '@site/src/components/MikrotikNetworking/VlanSchema' -{" "} + diff --git a/docusaurus/docs/networking/mikrotik/why-mikrotik.mdx b/docusaurus/docs/networking/mikrotik/why-mikrotik.mdx index 7ca4961..653754e 100644 --- a/docusaurus/docs/networking/mikrotik/why-mikrotik.mdx +++ b/docusaurus/docs/networking/mikrotik/why-mikrotik.mdx @@ -1,6 +1,6 @@ # Why Mikrotik? -import Alert from "@site/src/components/Alert/index.tsx"; +import Alert from '@site/src/components/Alert/index.tsx' `, double-check that MetalLB has the correct IP pool configuration and that the nodes/pods can reach the network you specified. +- If the `EXTERNAL-IP` shows as ``, double-check that MetalLB has the + correct IP pool configuration and that the nodes/pods can reach the network + you specified. --- -### Step 4: (Optional) Verify Traefik Ingress Functionality +### Step 5: (Optional) Verify Traefik Ingress Functionality -Test the Traefik ingress controller by creating an `Ingress` resource, which should be exposed externally via the LoadBalancer IP that MetalLB assigned. +Test the Traefik ingress controller by creating an `Ingress` resource, which +should be exposed externally via the LoadBalancer IP that MetalLB assigned. 1. Create a simple test ingress: @@ -227,16 +334,16 @@ metadata: namespace: default spec: rules: - - host: your.custom.domain # Or use an IP-based access - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: your-app-service # Replace this with a service that your app is running on - port: - number: 80 + - host: your.custom.domain # Or use an IP-based access + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: your-app-service # Replace this with a service that your app is running on + port: + number: 80 ``` 2. Apply the `Ingress` resource: @@ -245,23 +352,32 @@ spec: kubectl apply -f test-ingress.yaml ``` -3. Ensure your `DNS` points `your.custom.domain` to the external IP (or access it with the IP address directly). +3. Ensure your `DNS` points `your.custom.domain` to the external IP (or access + it with the IP address directly). -4. You should be able to access your Traefik-ingressed service by navigating to `http://your.custom.domain` or `http://` in a browser. +4. You should be able to access your Traefik-ingressed service by navigating to + `http://your.custom.domain` or `http://` in a browser. --- ### Additional Considerations: -- **DNS**: Ensure you have your DNS properly configured to point the hostname you use in your Ingress definition to the external IP provided by MetalLB. -- **SSL/TLS**: If you plan to use `HTTPS`, you'll want to configure SSL termination on Traefik. This typically involves configuring Traefik with either self-signed certificates, **ACME Let's Encrypt**, or another certificate management setup. -- **Firewall**: Make sure your network firewall policies (if any) allow access to external clients for the allocated IP range in your MetalLB configuration. + +- **DNS**: Ensure you have your DNS properly configured to point the hostname + you use in your Ingress definition to the external IP provided by MetalLB. +- **SSL/TLS**: If you plan to use `HTTPS`, you'll want to configure SSL + termination on Traefik. This typically involves configuring Traefik with + either self-signed certificates, **ACME Let's Encrypt**, or another + certificate management setup. +- **Firewall**: Make sure your network firewall policies (if any) allow access + to external clients for the allocated IP range in your MetalLB configuration. --- ### Troubleshooting: 1. **No External IP**: - - Make sure that MetalLB is configured correctly, and the IP range is valid in your local network. + - Make sure that MetalLB is configured correctly, and the IP range is valid + in your local network. - Verify that the MetalLB controller and speaker pods are running. ```bash @@ -275,8 +391,11 @@ kubectl apply -f test-ingress.yaml ``` 2. **Invalid IP Range**: - - Double-check that the IP range you’ve reserved for MetalLB does not overlap with a DHCP-pool range or any IP address that’s already in use on your local network. + - Double-check that the IP range you’ve reserved for MetalLB does not overlap + with a DHCP-pool range or any IP address that’s already in use on your + local network. 3. **Ingress Routing Issues**: - - Verify the `Ingress` resource, and ensure that the service names and ports match correctly with your application. - - Validate Traefik's logs for any issues related to routing. \ No newline at end of file + - Verify the `Ingress` resource, and ensure that the service names and ports + match correctly with your application. + - Validate Traefik's logs for any issues related to routing. diff --git a/docusaurus/docs/networking/understanding-network-components.md b/docusaurus/docs/networking/understanding-network-components.md index b0090ef..233fe1f 100644 --- a/docusaurus/docs/networking/understanding-network-components.md +++ b/docusaurus/docs/networking/understanding-network-components.md @@ -4,116 +4,186 @@ title: Kubernetes Networking #### What is Network Load Balancing? -Network load balancing is a critical process that distributes incoming network traffic across multiple backend servers or pods. This distribution ensures that no single server becomes overloaded, enhancing application response times, availability, and fault tolerance. It's like directing vehicles on a highway to different lanes to avoid congestion. +Network load balancing is a critical process that distributes incoming network +traffic across multiple backend servers or pods. This distribution ensures that +no single server becomes overloaded, enhancing application response times, +availability, and fault tolerance. It's like directing vehicles on a highway to +different lanes to avoid congestion. #### Load Balancer -A load balancer operates as a [gateway](https://en.wikipedia.org/wiki/Gateway_(telecommunications)) that receives incoming requests and decides how to distribute them across the available servers. This ensures [high availability](https://en.wikipedia.org/wiki/High_availability) and [reliability](https://en.wikipedia.org/wiki/Reliability_engineering) by balancing the load and increasing [failover](https://en.wikipedia.org/wiki/Failover) capabilities. +A load balancer operates as a +[gateway]() that +receives incoming requests and decides how to distribute them across the +available servers. This ensures +[high availability](https://en.wikipedia.org/wiki/High_availability) and +[reliability](https://en.wikipedia.org/wiki/Reliability_engineering) by +balancing the load and increasing +[failover](https://en.wikipedia.org/wiki/Failover) capabilities. #### MetalLB in Kubernetes -- **Purpose**: Designed for environments like Raspberry Pi clusters, MetalLB provides network load balancing for bare-metal Kubernetes settings that lack a built-in cloud LoadBalancer service. -- **Functionality**: MetalLB assigns external IP addresses to services, enabling your Kubernetes cluster to be externally accessible and allowing traffic to reach your cluster services efficiently. +- **Purpose**: Designed for environments like Raspberry Pi clusters, MetalLB + provides network load balancing for bare-metal Kubernetes settings that lack a + built-in cloud LoadBalancer service. +- **Functionality**: MetalLB assigns external IP addresses to services, enabling + your Kubernetes cluster to be externally accessible and allowing traffic to + reach your cluster services efficiently. #### Ingress Controllers (Traefik, NGINX) -- **Purpose**: Ingress controllers manage HTTP and HTTPS traffic, facilitating external access to internal Kubernetes services. -- **Functionality**: They route incoming requests based on specified rules such as domain names or URL paths. For instance: +- **Purpose**: Ingress controllers manage HTTP and HTTPS traffic, facilitating + external access to internal Kubernetes services. +- **Functionality**: They route incoming requests based on specified rules such + as domain names or URL paths. For instance: - Requests to `api.example.com` might be routed to a backend API service. - Requests to `www.example.com` could be directed to a frontend service. #### Port Forwarding -- **Purpose**: Port forwarding acts as a direct pathway from your local machine to a pod within the cluster, bypassing more complex routing like ingress. -- **Use Case**: It's particularly useful for development and debugging, allowing developers to connect directly to specific pods. +- **Purpose**: Port forwarding acts as a direct pathway from your local machine + to a pod within the cluster, bypassing more complex routing like ingress. +- **Use Case**: It's particularly useful for development and debugging, allowing + developers to connect directly to specific pods. ### Integrating [MetalLB](https://metallb.universe.tf/) and Ingress Controllers -[MetalLB](https://metallb.universe.tf/) is not mutually exclusive with ingress controllers. Instead, they can work together. [MetalLB](https://metallb.universe.tf/) can provide an external IP address for your services, allowing your ingress controller (like Traefik or NGINX) to route incoming traffic to various services in your cluster. +[MetalLB](https://metallb.universe.tf/) is not mutually exclusive with ingress +controllers. Instead, they can work together. +[MetalLB](https://metallb.universe.tf/) can provide an external IP address for +your services, allowing your ingress controller (like Traefik or NGINX) to route +incoming traffic to various services in your cluster. 1. **[MetalLB](https://metallb.universe.tf/)**: - - Provides external IPs to the LoadBalancer services, thereby making them accessible from outside the cluster. + - Provides external IPs to the LoadBalancer services, thereby making them + accessible from outside the cluster. 2. **Ingress Controller**: - - Utilizes the IPs provided by [MetalLB](https://metallb.universe.tf/) to manage routing of incoming HTTP/HTTPS requests. It's configured through ingress resources, which dictate traffic handling: - - **Domain-Based Routing**: Traffic can be directed to services based on the domain accessed. - - **Path-Based Routing**: Specific URL paths can point to distinct services. + - Utilizes the IPs provided by [MetalLB](https://metallb.universe.tf/) to + manage routing of incoming HTTP/HTTPS requests. It's configured through + ingress resources, which dictate traffic handling: + - **Domain-Based Routing**: Traffic can be directed to services based on + the domain accessed. + - **Path-Based Routing**: Specific URL paths can point to distinct + services. ### Kubernetes ClusterIP vs NodePort vs LoadBalancer vs Ingress? -Let's explore Kubernetes service types, ClusterIP, NodePort, LoadBalancer, and Ingress, and explain when to use each one and how they can work together. +Let's explore Kubernetes service types, ClusterIP, NodePort, LoadBalancer, and +Ingress, and explain when to use each one and how they can work together. #### ClusterIP **What It Is:** -- ClusterIP is the default Kubernetes service type that exposes a service on a cluster-internal IP. This service type is only accessible within the cluster. + +- ClusterIP is the default Kubernetes service type that exposes a service on a + cluster-internal IP. This service type is only accessible within the cluster. **Use Cases:** -- **Internal Communication**: Ideal for services that only need to communicate with other services within the cluster (e.g., microservices architecture). -- **Backend Services**: Suitable for databases or back-end services that should not be accessed directly from the outside. + +- **Internal Communication**: Ideal for services that only need to communicate + with other services within the cluster (e.g., microservices architecture). +- **Backend Services**: Suitable for databases or back-end services that should + not be accessed directly from the outside. **Pros:** -- Provides a simple way to manage internal services without exposing them to the outside. + +- Provides a simple way to manage internal services without exposing them to the + outside. - Reduces security risks by limiting external access. **Cons:** + - Not suitable for direct access from outside the cluster. #### NodePort **What It Is:** -- NodePort exposes a service on each node's IP at a specific port. It creates a static port on each node and forwards traffic to your service. + +- NodePort exposes a service on each node's IP at a specific port. It creates a + static port on each node and forwards traffic to your service. **Use Cases:** -- **Local Development**: Ideal for development environments or testing purposes where simple access is needed. -- **Small-Scale Applications**: When running a small or non-critical setup where direct node access is required. -- **Debugging**: Suitable for scenarios where quick access to a service from an external source is necessary for troubleshooting. + +- **Local Development**: Ideal for development environments or testing purposes + where simple access is needed. +- **Small-Scale Applications**: When running a small or non-critical setup where + direct node access is required. +- **Debugging**: Suitable for scenarios where quick access to a service from an + external source is necessary for troubleshooting. **Pros:** -- Simple to set up and does not require external infrastructure like a load balancer. + +- Simple to set up and does not require external infrastructure like a load + balancer. **Cons:** -- Requires manual management of ports, which can become complex in larger environments. + +- Requires manual management of ports, which can become complex in larger + environments. - Not ideal for high availability since traffic can overwhelm a single node. #### LoadBalancer **What It Is:** -- LoadBalancer creates an external load balancer in supported environments, assigning an external IP address to your service. MetalLB can simulate this in bare metal environments like those with Raspberry Pis. + +- LoadBalancer creates an external load balancer in supported environments, + assigning an external IP address to your service. MetalLB can simulate this in + bare metal environments like those with Raspberry Pis. **Use Cases:** -- **Production-Ready Applications**: When you need stable IP addresses and external access in production environments. -- **Auto-Scaling Needs**: In scenarios where you need automatic distribution of traffic across pods without manual management. + +- **Production-Ready Applications**: When you need stable IP addresses and + external access in production environments. +- **Auto-Scaling Needs**: In scenarios where you need automatic distribution of + traffic across pods without manual management. **Pros:** + - Provides a single, stable point of access. - Managed traffic distribution across multiple nodes. **Cons:** + - Can be costly in some cloud environments due to resource usage. -- Simpler than Ingress, hence does not support HTTP-level routing or SSL termination. +- Simpler than Ingress, hence does not support HTTP-level routing or SSL + termination. #### Ingress **What It Is:** -- Ingress manages external access to services within a cluster, typically HTTP/HTTPS. Ingress controllers handle routing rules and can offer additional functionality like SSL termination and host/path-based routing. + +- Ingress manages external access to services within a cluster, typically + HTTP/HTTPS. Ingress controllers handle routing rules and can offer additional + functionality like SSL termination and host/path-based routing. **Use Cases:** -- **Complex Applications**: Suitable for environments with multiple services that require sophisticated routing based on domains or paths. -- **Unified Entry Point**: When you want to consolidate access through a single, manageable endpoint. -- **Secure Connections**: Supports SSL termination, critical for secure communication over the web. + +- **Complex Applications**: Suitable for environments with multiple services + that require sophisticated routing based on domains or paths. +- **Unified Entry Point**: When you want to consolidate access through a single, + manageable endpoint. +- **Secure Connections**: Supports SSL termination, critical for secure + communication over the web. **Pros:** + - Offers rich routing features and flexibility. - Can manage multiple services through a single gateway. **Cons:** + - Requires additional configuration and management. -- Initial setup can be complex and requires understanding of ingress rules and controllers. +- Initial setup can be complex and requires understanding of ingress rules and + controllers. ### Recommendations -- **Choose ClusterIP for**: Services only needing internal communication within the cluster. -- **Choose NodePort for**: Simplicity in small, non-production setups where each node's IP can handle incoming requests. -- **Choose LoadBalancer for**: Production needs in cloud environments or using MetalLB for external IP management in bare-metal scenarios. -- **Choose Ingress for**: Complex routing logic, SSL support, and environments requiring a unified interface for multiple services. \ No newline at end of file +- **Choose ClusterIP for**: Services only needing internal communication within + the cluster. +- **Choose NodePort for**: Simplicity in small, non-production setups where each + node's IP can handle incoming requests. +- **Choose LoadBalancer for**: Production needs in cloud environments or using + MetalLB for external IP management in bare-metal scenarios. +- **Choose Ingress for**: Complex routing logic, SSL support, and environments + requiring a unified interface for multiple services. diff --git a/docusaurus/docs/resources.md b/docusaurus/docs/resources.md index ab74ed6..724a6a6 100644 --- a/docusaurus/docs/resources.md +++ b/docusaurus/docs/resources.md @@ -2,11 +2,18 @@ title: Resources --- -I've been in this industry for over 20 years, and I can with certainty say that there's no better community than the one in this industry. Everytyhing that we build, all the new things we come up with, are the combination of the work of many people. In simple terms, we are all sharing knowledge with each other, and building up on top of each other's work. +I've been in this industry for over 20 years, and I can with certainty say that +there's no better community than the one in this industry. Everytyhing that we +build, all the new things we come up with, are the combination of the work of +many people. In simple terms, we are all sharing knowledge with each other, and +building up on top of each other's work. -This very guide is no exception of that. I came up with many ideas, but also, got inspired by the work of others. +This very guide is no exception of that. I came up with many ideas, but also, +got inspired by the work of others. -While I haven't been using the things from these resources directly, e.g. some might have only challenged me to think differently, I'll still share them for the sake of being transparent, aka, being a good citizen of the community. +While I haven't been using the things from these resources directly, e.g. some +might have only challenged me to think differently, I'll still share them for +the sake of being transparent, aka, being a good citizen of the community. The order is random, and I'll write them down as I go through my notes. diff --git a/docusaurus/docs/storage/setup-longhorn-dashboard.md b/docusaurus/docs/storage/setup-longhorn-dashboard.md index c8e6e7a..1d84495 100644 --- a/docusaurus/docs/storage/setup-longhorn-dashboard.md +++ b/docusaurus/docs/storage/setup-longhorn-dashboard.md @@ -4,7 +4,9 @@ title: Expose Longhorn Dashboard using Traefik Ingress ### Identify the Longhorn Dashboard Service -When Longhorn is installed, it comes with a **Service** called `longhorn-frontend`. This service manages access to the Longhorn dashboard, which is used for monitoring and managing Longhorn volumes. +When Longhorn is installed, it comes with a **Service** called +`longhorn-frontend`. This service manages access to the Longhorn dashboard, +which is used for monitoring and managing Longhorn volumes. You can verify the service by running: @@ -12,7 +14,8 @@ You can verify the service by running: kubectl get svc -n longhorn-system ``` -Look for the **`longhorn-frontend`** service in the output, which typically looks like this: +Look for the **`longhorn-frontend`** service in the output, which typically +looks like this: ```plaintext NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE @@ -21,11 +24,13 @@ longhorn-frontend ClusterIP 10.43.75.105 80/TCP 4m ### Create an Ingress Resource to Expose the Dashboard -We will use **Traefik Ingress** to expose the Longhorn dashboard so that it is accessible via a browser. +We will use **Traefik Ingress** to expose the Longhorn dashboard so that it is +accessible via a browser. #### Ingress YAML Configuration -Create a YAML file (e.g., `longhorn-ingress.yaml`) with the following configuration: +Create a YAML file (e.g., `longhorn-ingress.yaml`) with the following +configuration: ```yaml apiVersion: networking.k8s.io/v1 @@ -34,25 +39,27 @@ metadata: name: longhorn-ingress namespace: longhorn-system spec: - ingressClassName: traefik # We use Traefik as the ingress controller + ingressClassName: traefik # We use Traefik as the ingress controller rules: - - host: longhorn.local.host # The domain by which we'll access Longhorn UI + - host: longhorn.local.host # The domain by which we'll access Longhorn UI http: paths: - path: / pathType: Prefix backend: service: - name: longhorn-frontend # Service managing Longhorn dashboard + name: longhorn-frontend # Service managing Longhorn dashboard port: - number: 80 # Service port where Longhorn UI runs + number: 80 # Service port where Longhorn UI runs ``` This configuration does the following: -- It tells Traefik to expose the **`longhorn-frontend`** service (which runs the dashboard) under the host `longhorn.local.host`. +- It tells Traefik to expose the **`longhorn-frontend`** service (which runs the + dashboard) under the host `longhorn.local.host`. -- HTTP traffic to this host will be routed to the Longhorn dashboard running on port 80. +- HTTP traffic to this host will be routed to the Longhorn dashboard running on + port 80. #### Apply the Ingress Resource: @@ -64,11 +71,16 @@ kubectl apply -f longhorn-ingress.yaml ### Configure Your `/etc/hosts` File for Local Access -Since this is for local testing, we need to map the domain `longhorn.local.host` to the IP address of your Kubernetes cluster. We'll achieve this by updating your **`/etc/hosts`** file to resolve requests for `longhorn.local.host` to your cluster node's IP. +Since this is for local testing, we need to map the domain `longhorn.local.host` +to the IP address of your Kubernetes cluster. We'll achieve this by updating +your **`/etc/hosts`** file to resolve requests for `longhorn.local.host` to your +cluster node's IP. #### Get the Cluster Node's IP: -Find the IP address of your cluster node (where Traefik or your load balancer is running). You can often use `kubectl get nodes -o wide` to retrieve the IP address. It might look something like this: +Find the IP address of your cluster node (where Traefik or your load balancer is +running). You can often use `kubectl get nodes -o wide` to retrieve the IP +address. It might look something like this: ```bash kubectl get nodes -o wide @@ -80,11 +92,13 @@ node-master Ready master,control-plane 12d v1.22.2 192.168.1.100 Ubuntu 20.04.3 LTS 5.4.0-89-generic docker://20.10.8 ``` -In this example, suppose Traefik is running on the master node, which has the IP `192.168.1.100`. +In this example, suppose Traefik is running on the master node, which has the IP +`192.168.1.100`. #### Update `/etc/hosts`: -Edit the `/etc/hosts` file on your local machine using a text editor (e.g., `vim`, `nano`, etc.). +Edit the `/etc/hosts` file on your local machine using a text editor (e.g., +`vim`, `nano`, etc.). ```bash sudo nano /etc/hosts @@ -96,15 +110,17 @@ Add the following entry, replacing **`192.168.1.100`** with your node's IP: 192.168.1.100 longhorn.local.host ``` -This entry ensures that when you open `http://longhorn.local.host` in your browser, it will route the traffic to your cluster. - +This entry ensures that when you open `http://longhorn.local.host` in your +browser, it will route the traffic to your cluster. ### Access the Longhorn Dashboard -Now that your ingress is configured and your `/etc/hosts` is updated, you should be able to access the Longhorn dashboard by navigating to: +Now that your ingress is configured and your `/etc/hosts` is updated, you should +be able to access the Longhorn dashboard by navigating to: ``` http://longhorn.local.host ``` -The Longhorn UI should load in your browser, allowing you to manage your Longhorn volumes and nodes. \ No newline at end of file +The Longhorn UI should load in your browser, allowing you to manage your +Longhorn volumes and nodes. diff --git a/docusaurus/docs/storage/setup-longhorn.md b/docusaurus/docs/storage/setup-longhorn.md index b29291f..1ffb181 100644 --- a/docusaurus/docs/storage/setup-longhorn.md +++ b/docusaurus/docs/storage/setup-longhorn.md @@ -4,17 +4,21 @@ title: Setup Longhorn #### Download the Longhorn Manifest YAMLs -Longhorn's manifest files are available in their GitHub repository. You can apply them directly to the Kubernetes cluster: +Longhorn's manifest files are available in their GitHub repository. You can +apply them directly to the Kubernetes cluster: ```bash kubectl apply -f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/longhorn.yaml ``` -This command will pull the entire Longhorn deployment YAML, which configures everything Longhorn requires inside the `longhorn-system` namespace. +This command will pull the entire Longhorn deployment YAML, which configures +everything Longhorn requires inside the `longhorn-system` namespace. #### Monitor the Deployment Progress -After applying the manifest, you'll see various Kubernetes objects like Pods, Services, DaemonSets, and CRDs being created. You can monitor them with the following command: +After applying the manifest, you'll see various Kubernetes objects like Pods, +Services, DaemonSets, and CRDs being created. You can monitor them with the +following command: ```bash kubectl get all -n longhorn-system @@ -22,11 +26,13 @@ kubectl get all -n longhorn-system Especially watch the status of the Pods. -It will take a couple of minutes for all required components to pull the images from the Docker registry, configure themselves, and become ready. +It will take a couple of minutes for all required components to pull the images +from the Docker registry, configure themselves, and become ready. #### Verify Custom Resource Definitions (CRDs) -Longhorn uses Custom Resource Definitions (CRDs) for managing and storing information about volumes, nodes, and engines. +Longhorn uses Custom Resource Definitions (CRDs) for managing and storing +information about volumes, nodes, and engines. Check if the Longhorn CRDs have been installed properly: @@ -42,7 +48,8 @@ You should see a list of Longhorn-related CRDs like: - `replicas.longhorn.io` - and others. -These CRDs are the foundation of Longhorn's integration into your Kubernetes cluster. +These CRDs are the foundation of Longhorn's integration into your Kubernetes +cluster. #### Verify Longhorn Components (Pods, DaemonSet) @@ -59,35 +66,48 @@ You should see Longhorn pods running, like: - `longhorn-ui-{pod-name}` - `longhorn-driver-deployer-{pod-name}` -Additionally, verify that the `longhorn-manager` DaemonSet has pods on **every node** in your cluster, as it’s responsible for managing Longhorn processes on each node: +Additionally, verify that the `longhorn-manager` DaemonSet has pods on **every +node** in your cluster, as it’s responsible for managing Longhorn processes on +each node: ```bash kubectl get ds -n longhorn-system ``` -Check that the DaemonSet has `Desired` pods on all your nodes, and `Current` matches the desired pod count. +Check that the DaemonSet has `Desired` pods on all your nodes, and `Current` +matches the desired pod count. ### Accessing the Longhorn UI -Longhorn provides a web-based UI for managing your storage. To access it, you will need to expose its service. We cover this in the next section ["Expose Longhorn Dashboard using Traefik Ingress"](setup-longhorn-dashboard). +Longhorn provides a web-based UI for managing your storage. To access it, you +will need to expose its service. We cover this in the next section +["Expose Longhorn Dashboard using Traefik Ingress"](setup-longhorn-dashboard). ### Configure Nodes for Longhorn Storage -Longhorn automatically recognizes your Kubernetes nodes, but you may want to configure how disks on your nodes are used for storage. +Longhorn automatically recognizes your Kubernetes nodes, but you may want to +configure how disks on your nodes are used for storage. -You can do this through the **Longhorn UI** under the **Node & Disk** section. Here you can: +You can do this through the **Longhorn UI** under the **Node & Disk** section. +Here you can: - Determine how much space is allocated on each node. -- Specify custom directories for disk storage (e.g., `/mnt/disk` instead of default paths). -- Set replication factors (i.e., how many copies of a volume will be stored across nodes). +- Specify custom directories for disk storage (e.g., `/mnt/disk` instead of + default paths). +- Set replication factors (i.e., how many copies of a volume will be stored + across nodes). ### Test Longhorn - Creating a PVC -Let’s verify that Longhorn is working by creating a test Persistent Volume Claim (PVC). Here’s how you can create a StorageClass and a sample PVC. +Let’s verify that Longhorn is working by creating a test Persistent Volume Claim +(PVC). Here’s how you can create a StorageClass and a sample PVC. #### Create the Longhorn StorageClass -To create a `StorageClass` for Longhorn, you need to define one so that Longhorn can dynamically provision volumes. You can use the default settings, but feel free to customize, especially the number of replicas depending on how many nodes you have. +To create a `StorageClass` for Longhorn, you need to define one so that Longhorn +can dynamically provision volumes. You can use the default settings, but feel +free to customize, especially the number of replicas depending on how many nodes +you have. Create a file named `longhorn-storageclass.yaml`: @@ -98,8 +118,8 @@ metadata: name: longhorn provisioner: driver.longhorn.io parameters: - numberOfReplicas: "2" - staleReplicaTimeout: "30" + numberOfReplicas: '2' + staleReplicaTimeout: '30' allowVolumeExpansion: true reclaimPolicy: Retain volumeBindingMode: Immediate @@ -113,7 +133,8 @@ kubectl apply -f longhorn-storageclass.yaml #### Create a PVC Using Longhorn -Now create a sample Persistent Volume Claim (PVC) to test that Longhorn can provision volumes: +Now create a sample Persistent Volume Claim (PVC) to test that Longhorn can +provision volumes: Create a `longhorn-pvc.yaml` file: @@ -147,7 +168,8 @@ Once it’s `Bound`, you know Longhorn successfully provisioned your storage. #### Optionally Deploy a Pod Using the PVC -To further verify the PVC is working, you can deploy a simple pod (for example, the NGINX web server) that mounts the Longhorn volume: +To further verify the PVC is working, you can deploy a simple pod (for example, +the NGINX web server) that mounts the Longhorn volume: Create a simple `nginx-pod.yaml` file: @@ -158,15 +180,15 @@ metadata: name: nginx spec: containers: - - image: nginx - name: nginx - volumeMounts: - - name: data - mountPath: /usr/share/nginx/html + - image: nginx + name: nginx + volumeMounts: + - name: data + mountPath: /usr/share/nginx/html volumes: - - name: data - persistentVolumeClaim: - claimName: longhorn-pvc + - name: data + persistentVolumeClaim: + claimName: longhorn-pvc ``` Apply the pod: @@ -179,8 +201,11 @@ Once the pod is running, Longhorn storage is working as expected. ### Monitor Longhorn -Longhorn offers monitoring and management tools (both in the UI and via the CLI) to track the status of volumes, nodes, and replicas. +Longhorn offers monitoring and management tools (both in the UI and via the CLI) +to track the status of volumes, nodes, and replicas. Key areas to check: + - **Volumes**: Make sure volumes are healthy and properly replicated. -- **Replicas**: Ensure replicas are collaborating across your cluster nodes to ensure data redundancy. \ No newline at end of file +- **Replicas**: Ensure replicas are collaborating across your cluster nodes to + ensure data redundancy. diff --git a/docusaurus/docs/storage/understanding-longhorn-concepts.md b/docusaurus/docs/storage/understanding-longhorn-concepts.md index 267fa1a..c99358b 100644 --- a/docusaurus/docs/storage/understanding-longhorn-concepts.md +++ b/docusaurus/docs/storage/understanding-longhorn-concepts.md @@ -4,79 +4,117 @@ title: Kubernetes Storage ### StorageClass - What is it? -Think of a **StorageClass** in Kubernetes as a **"recipe"** or **blueprint** that dictates **how to create storage** for your application. +Think of a **StorageClass** in Kubernetes as a **"recipe"** or **blueprint** +that dictates **how to create storage** for your application. -In Kubernetes, you often need to store data (like database data, logs, etc.), and different applications might need different types of storage (some want fast, some want big, some want highly replicated storage). To solve this, Kubernetes uses **StorageClasses** to define how **storage should be provisioned**. +In Kubernetes, you often need to store data (like database data, logs, etc.), +and different applications might need different types of storage (some want +fast, some want big, some want highly replicated storage). To solve this, +Kubernetes uses **StorageClasses** to define how **storage should be +provisioned**. -- A **StorageClass** describes the **type of storage**: It can be based on the **speed**, **redundancy**, **storage provider/driver**, or **other properties**. -- Once defined, the StorageClass allows Kubernetes to automatically give the right kind of storage to any service (`Pods`) that asks for it. +- A **StorageClass** describes the **type of storage**: It can be based on the + **speed**, **redundancy**, **storage provider/driver**, or **other + properties**. +- Once defined, the StorageClass allows Kubernetes to automatically give the + right kind of storage to any service (`Pods`) that asks for it. -So, instead of worrying about *how* to create storage for your specific application, you just pick a StorageClass, and Kubernetes takes care of the rest (i.e., Kubernetes sends the request to the configured storage system). +So, instead of worrying about _how_ to create storage for your specific +application, you just pick a StorageClass, and Kubernetes takes care of the rest +(i.e., Kubernetes sends the request to the configured storage system). #### Simple Analogy for a StorageClass -Imagine you're at a **pizza restaurant.** You want to order a pizza, but you don't care about how the kitchen makes it, you just describe the type of pizza you want by **selecting a predefined option** on the menu: +Imagine you're at a **pizza restaurant.** You want to order a pizza, but you +don't care about how the kitchen makes it, you just describe the type of pizza +you want by **selecting a predefined option** on the menu: - Regular crust - Thin crust - Extra cheese -The kitchen (in this case, Kubernetes) **knows how to create** the pizza based on those instructions. +The kitchen (in this case, Kubernetes) **knows how to create** the pizza based +on those instructions. -In the Kubernetes world, the **"crust and cheese options"** represent different types of storage like Longhorn, AWS EBS, Google Persistent Disks, SSDs, etc. +In the Kubernetes world, the **"crust and cheese options"** represent different +types of storage like Longhorn, AWS EBS, Google Persistent Disks, SSDs, etc. ### PersistentVolumeClaim (PVC) - What does it do? -A **PersistentVolumeClaim (PVC)** is your way of asking for a specific amount of storage from Kubernetes. It’s kind of like saying, "**Hey, I need 10 GB of storage that I can use reliably and persistently**." +A **PersistentVolumeClaim (PVC)** is your way of asking for a specific amount of +storage from Kubernetes. It’s kind of like saying, "**Hey, I need 10 GB of +storage that I can use reliably and persistently**." -- A **PVC** is a request for storage: In the PVC, you specify **how much storage** you need (e.g., 10 GB or 50 GB), and **what kind of access** you need (e.g., read only or read/write). - -- The PVC gets "matched" to a **PersistentVolume** (an actual piece of storage) through the **StorageClass** you define. Once this happens, Kubernetes guarantees that the storage is reserved and available for your application (even if the pod is deleted or recreated). +- A **PVC** is a request for storage: In the PVC, you specify **how much + storage** you need (e.g., 10 GB or 50 GB), and **what kind of access** you + need (e.g., read only or read/write). +- The PVC gets "matched" to a **PersistentVolume** (an actual piece of storage) + through the **StorageClass** you define. Once this happens, Kubernetes + guarantees that the storage is reserved and available for your application + (even if the pod is deleted or recreated). -In simpler terms, imagine your PVC as a **rental request form**. You fill it out, specifying how much storage (like how much "house space" you need) and what type of house (StorageClass) you're asking for. Once Kubernetes finds matching storage (PersistentVolume), it gives you the key to that "house" (or disk) to use. +In simpler terms, imagine your PVC as a **rental request form**. You fill it +out, specifying how much storage (like how much "house space" you need) and what +type of house (StorageClass) you're asking for. Once Kubernetes finds matching +storage (PersistentVolume), it gives you the key to that "house" (or disk) to +use. -So, the **PVC connects you to that storage**, and you can now use it for your application's data. +So, the **PVC connects you to that storage**, and you can now use it for your +application's data. #### Simple Analogy for a PVC -Let's go back to the **pizza restaurant** analogy. Your **PVC** is kind of like saying: +Let's go back to the **pizza restaurant** analogy. Your **PVC** is kind of like +saying: -- "*I want a pizza that’s **12 inches large**, and it should be **thin crust**!*" +- "_I want a pizza that’s **12 inches large**, and it should be **thin + crust**!_" When you make this request (PVC), the restaurant (Kubernetes) will: -1. Look at its "menu" (StorageClasses) and find the right recipe or profile that matches your request. + +1. Look at its "menu" (StorageClasses) and find the right recipe or profile that + matches your request. 2. Bake a pizza based on that recipe (allocate PersistentVolume). -3. Serve it to you (PVC is *bound* to the actual storage). +3. Serve it to you (PVC is _bound_ to the actual storage). -So, whenever you create a **PVC**, it will “claim” a matching **PersistantVolume** from Kubernetes, ensuring that your "requested storage" is available and bound to you for the data needs for your app. +So, whenever you create a **PVC**, it will “claim” a matching +**PersistantVolume** from Kubernetes, ensuring that your "requested storage" is +available and bound to you for the data needs for your app. ### Putting It Together -1. **StorageClass** == A **blueprint (recipe)** that defines how to provision a specific type of storage (e.g., fast disk, replicated storage, etc.). - -2. **PersistentVolumeClaim (PVC)** == **A request** for storage. It says, "*I need X amount of storage handled in Y way*", and then Kubernetes matches it with the right type of storage based on the **StorageClass**. +1. **StorageClass** == A **blueprint (recipe)** that defines how to provision a + specific type of storage (e.g., fast disk, replicated storage, etc.). + +2. **PersistentVolumeClaim (PVC)** == **A request** for storage. It says, "_I + need X amount of storage handled in Y way_", and then Kubernetes matches it + with the right type of storage based on the **StorageClass**. ### Real Example -Let's say you're deploying a **MySQL database** in your Kubernetes cluster. It's going to need some disk space to store data. +Let's say you're deploying a **MySQL database** in your Kubernetes cluster. It's +going to need some disk space to store data. -1. First, you'll define a **StorageClass** to tell Kubernetes where the storage should come from and what kind it should be (e.g., using Longhorn for local replicated storage). +1. First, you'll define a **StorageClass** to tell Kubernetes where the storage + should come from and what kind it should be (e.g., using Longhorn for local + replicated storage). - ```yaml - apiVersion: storage.k8s.io/v1 - kind: StorageClass - metadata: - name: longhorn - provisioner: driver.longhorn.io - parameters: - numberOfReplicas: "2" - staleReplicaTimeout: "30" - allowVolumeExpansion: true - reclaimPolicy: Retain - volumeBindingMode: Immediate - ``` + ```yaml + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: longhorn + provisioner: driver.longhorn.io + parameters: + numberOfReplicas: '2' + staleReplicaTimeout: '30' + allowVolumeExpansion: true + reclaimPolicy: Retain + volumeBindingMode: Immediate + ``` -2. Next, you'll make a **PersistentVolumeClaim (PVC)** that asks for, say, **5 GB** of storage that uses this **longhorn** StorageClass. +2. Next, you'll make a **PersistentVolumeClaim (PVC)** that asks for, say, **5 + GB** of storage that uses this **longhorn** StorageClass. ```yaml apiVersion: v1 @@ -92,9 +130,14 @@ Let's say you're deploying a **MySQL database** in your Kubernetes cluster. It's storage: 5Gi ``` -Once the PVC is created, Kubernetes finds storage according to the `longhorn` recipe and gives you **5 GB** of storage. Now your MySQL pod can use that storage to save data files or your database. +Once the PVC is created, Kubernetes finds storage according to the `longhorn` +recipe and gives you **5 GB** of storage. Now your MySQL pod can use that +storage to save data files or your database. #### Summary: -- **StorageClass**: The blueprint that defines what type of storage to give when storage is requested (e.g., fast SSD storage, networked storage, etc.). -- **PersistentVolumeClaim (PVC)**: A request for a specific amount of storage based on the criteria defined in the StorageClass (like *"I need 10 GB of disk space on this class of storage!"*). \ No newline at end of file +- **StorageClass**: The blueprint that defines what type of storage to give when + storage is requested (e.g., fast SSD storage, networked storage, etc.). +- **PersistentVolumeClaim (PVC)**: A request for a specific amount of storage + based on the criteria defined in the StorageClass (like _"I need 10 GB of disk + space on this class of storage!"_). diff --git a/docusaurus/docs/welcome.md b/docusaurus/docs/welcome.md index 3c1de14..96deeba 100644 --- a/docusaurus/docs/welcome.md +++ b/docusaurus/docs/welcome.md @@ -10,20 +10,46 @@ import DataCenterOverview from "@site/src/components/DataCenterOverview"; Welcome! I'm glad you're here. Hail to the SEO overlords! -If we haven't met yet, my name is Alexander. I've been in IT for over 20 years and professionally active since 2007. Over the years, I've worked as a CISCO engineer, a senior full-stack developer, a startup co-founder, and, for the past few years, a team and tech lead. You can check out my [LinkedIn profile](https://www.linkedin.com/in/aleksandar-grbic-74670263/) if you want to know more about my background. +If we haven't met yet, my name is Alexander. I've been in IT for over 20 years +and professionally active since 2007. Over the years, I've worked as a CISCO +engineer, a senior full-stack developer, a startup co-founder, and, for the past +few years, a team and tech lead. You can check out my +[LinkedIn profile](https://www.linkedin.com/in/aleksandar-grbic-74670263/) if +you want to know more about my background. -I think it's important to share a bit about myself so you can decide if I'm the right person to guide you. +I think it's important to share a bit about myself so you can decide if I'm the +right person to guide you. -I hope this guide to building a home "mini data center", from hardware and networking to K3s and real workloads is helpful. If you have any questions or suggestions, feel free to reach out on GitHub or join me on one of my live streams. +I hope this guide to building a home "mini data center", from hardware and +networking to K3s and real workloads is helpful. If you have any questions or +suggestions, feel free to reach out on GitHub or join me on one of my live +streams. Now, let's get started. -With so much information available today, it's worth asking yourself, _"Why am I building this?"_ or _"Why am I learning these skills?"_ - -Why choose this project, setting up your own infrastructure, networking, and orchestration, when there are so many other things you could focus on? How will it help your career? Will it solve any real problems you're facing, or are you just doing it because it seems popular or trendy? These are the kinds of questions you should think about before committing to something new. - -Many of you might know me from [Twitch](https://www.twitch.tv/programmer_network) or [YouTube](https://www.youtube.com/@programmer-network). I prefer to keep things practical and honest. I'm not a fan of the influencer mindset where people recommend things just to fit their content strategy, especially when they don't actually use the tools or techniques they're promoting. - -So, when someone asks, _"Why should I build a home lab or learn Kubernetes?"_, it's a valid question. I could give you plenty of reasons, but not all of them might matter to you. Maybe this isn't the right time for you to dive into this. Maybe there are other skills that would be more useful for where you are in your career right now. - -It's important to understand the difference between things that are interesting to explore and things that are genuinely valuable to learn. Everyone has different needs depending on their goals, so take the time to think about whether this journey is what you need at this moment. +With so much information available today, it's worth asking yourself, _"Why am I +building this?"_ or _"Why am I learning these skills?"_ + +Why choose this project, setting up your own infrastructure, networking, and +orchestration, when there are so many other things you could focus on? How will +it help your career? Will it solve any real problems you're facing, or are you +just doing it because it seems popular or trendy? These are the kinds of +questions you should think about before committing to something new. + +Many of you might know me from +[Twitch](https://www.twitch.tv/programmer_network) or +[YouTube](https://www.youtube.com/@programmer-network). I prefer to keep things +practical and honest. I'm not a fan of the influencer mindset where people +recommend things just to fit their content strategy, especially when they don't +actually use the tools or techniques they're promoting. + +So, when someone asks, _"Why should I build a home lab or learn Kubernetes?"_, +it's a valid question. I could give you plenty of reasons, but not all of them +might matter to you. Maybe this isn't the right time for you to dive into this. +Maybe there are other skills that would be more useful for where you are in your +career right now. + +It's important to understand the difference between things that are interesting +to explore and things that are genuinely valuable to learn. Everyone has +different needs depending on their goals, so take the time to think about +whether this journey is what you need at this moment. diff --git a/docusaurus/docs/what-we-will-learn.md b/docusaurus/docs/what-we-will-learn.md index 92af6b5..5e336d8 100644 --- a/docusaurus/docs/what-we-will-learn.md +++ b/docusaurus/docs/what-we-will-learn.md @@ -2,11 +2,24 @@ title: Outcome --- -Whenever I'm learning something, I try to think about the outcome first. That's where I usually begin. It's similar to building a product. As Steve Jobs once said, "Start with the customer, and the problem, then work backwards to the technology." +Whenever I'm learning something, I try to think about the outcome first. That's +where I usually begin. It's similar to building a product. As Steve Jobs once +said, "Start with the customer, and the problem, then work backwards to the +technology." -I started learning Kubernetes because I wanted to enable myself to provision infrastructure with minimal effort. If I want to start building a new product and "ship fast, fail fast," infrastructure should be an afterthought. I don't want to spend time thinking about it. I should just write a few small YAML files, hit deploy, and be good to go. I shouldn't have to worry about networking, hard drives, CPUs, Logging etc. I just need an interface where I can say, "Hey, I want XYZ, give it to me. +I started learning Kubernetes because I wanted to enable myself to provision +infrastructure with minimal effort. If I want to start building a new product +and "ship fast, fail fast," infrastructure should be an afterthought. I don't +want to spend time thinking about it. I should just write a few small YAML +files, hit deploy, and be good to go. I shouldn't have to worry about +networking, hard drives, CPUs, Logging etc. I just need an interface where I can +say, "Hey, I want XYZ, give it to me. -With that in mind, this will be the outcome of this course: By the end, we'll have the ability to provision a full-stack application infrastructure in under five minutes. And to top it off, it'll cost us next to nothing, as the only expenses will be the initial investment in the bare-metal server, plus about 5 euros per month for electricity. +With that in mind, this will be the outcome of this course: By the end, we'll +have the ability to provision a full-stack application infrastructure in under +five minutes. And to top it off, it'll cost us next to nothing, as the only +expenses will be the initial investment in the bare-metal server, plus about 5 +euros per month for electricity. ```mermaid %%{init: { 'theme': 'dark' } }%% diff --git a/docusaurus/docs/why-is-it-hard.md b/docusaurus/docs/why-is-it-hard.md index 1703795..3a6d790 100644 --- a/docusaurus/docs/why-is-it-hard.md +++ b/docusaurus/docs/why-is-it-hard.md @@ -2,16 +2,37 @@ title: The Challenges of Building a Mini Data Center --- -One of the hardest things about building a mini data center isn't any single technology or component. Many people often find themselves learning individual pieces - like networking, hardware setup, or Kubernetes - without seeing how everything fits together. And while that fragmented approach is challenging, it's not the biggest obstacle. +One of the hardest things about building a mini data center isn't any single +technology or component. Many people often find themselves learning individual +pieces - like networking, hardware setup, or Kubernetes - without seeing how +everything fits together. And while that fragmented approach is challenging, +it's not the biggest obstacle. -The biggest challenge is building something comprehensive and then letting it gather dust. In the context of our mini data center, if you end up going through this guide and then just abandoning your setup, you will find yourself forgetting crucial skills really fast - from MikroTik networking configurations to storage management and container orchestration. The advantage that I believe this entire guide enforces is that we are actually building an infrastructure that we intend to use daily. It's our mini data center that we are excited to maintain, covering everything from physical hardware and L2 networking to distributed storage, container orchestration, and running our applications. +The biggest challenge is building something comprehensive and then letting it +gather dust. In the context of our mini data center, if you end up going through +this guide and then just abandoning your setup, you will find yourself +forgetting crucial skills really fast - from MikroTik networking configurations +to storage management and container orchestration. The advantage that I believe +this entire guide enforces is that we are actually building an infrastructure +that we intend to use daily. It's our mini data center that we are excited to +maintain, covering everything from physical hardware and L2 networking to +distributed storage, container orchestration, and running our applications. -Clearly, you can also look at this complexity as a massive disadvantage, and to answer that concern, we really need to get back to [why](./why.md) and understand what we are getting out of this journey. +Clearly, you can also look at this complexity as a massive disadvantage, and to +answer that concern, we really need to get back to [why](./why.md) and +understand what we are getting out of this journey. So, to make this learning experience manageable, we need to ensure that: - We are building this as our primary infrastructure that we will actively use -- We understand that a mini data center requires regular maintenance across all layers - from hardware and networking to software - and we're enthusiastic about that -- We accept that components will fail at some point - whether it's a Raspberry Pi, a network switch, or a software deployment - and we see these as valuable learning opportunities -- We view this complete setup as an investment in our technical growth, keeping our full-stack infrastructure skills sharp -- We recognize that having hands-on experience with every layer of modern infrastructure, especially in the age of AI, helps us stay relevant and adaptable in the job market +- We understand that a mini data center requires regular maintenance across all + layers - from hardware and networking to software - and we're enthusiastic + about that +- We accept that components will fail at some point - whether it's a Raspberry + Pi, a network switch, or a software deployment - and we see these as valuable + learning opportunities +- We view this complete setup as an investment in our technical growth, keeping + our full-stack infrastructure skills sharp +- We recognize that having hands-on experience with every layer of modern + infrastructure, especially in the age of AI, helps us stay relevant and + adaptable in the job market diff --git a/docusaurus/docs/why.md b/docusaurus/docs/why.md index a75738b..4a16854 100644 --- a/docusaurus/docs/why.md +++ b/docusaurus/docs/why.md @@ -2,23 +2,44 @@ title: Why Build a Home Mini Data Center? --- -Answering this question honestly, for each of us individually, is fundamental because it will determine the outcome and likelihood of success or failure. If you're building something just because you heard about it from the latest content creator, or because "everyone seems to be doing it," you're already off to a poor start. - -What I've learned over the last two decades of my career is that if you're wondering whether you need something, you probably don't. These things usually come naturally as a result of the problems you encounter. The problem itself often guides you to the right tools, just like this one. - -Rather than getting too deep into philosophy, I'll share my personal reasons for building a complete mini data center at home - from setting up the physical rack and hardware (Raspberry Pis and Mini PCs), to configuring MikroTik networking equipment, and deploying Kubernetes: +Answering this question honestly, for each of us individually, is fundamental +because it will determine the outcome and likelihood of success or failure. If +you're building something just because you heard about it from the latest +content creator, or because "everyone seems to be doing it," you're already off +to a poor start. + +What I've learned over the last two decades of my career is that if you're +wondering whether you need something, you probably don't. These things usually +come naturally as a result of the problems you encounter. The problem itself +often guides you to the right tools, just like this one. + +Rather than getting too deep into philosophy, I'll share my personal reasons for +building a complete mini data center at home - from setting up the physical rack +and hardware (Raspberry Pis and Mini PCs), to configuring MikroTik networking +equipment, and deploying Kubernetes: ### End-to-End Infrastructure Knowledge -As my career moves towards a CTO role, having **end-to-end** knowledge of infrastructure is essential - from physical hardware and networking to container orchestration and application deployment. Understanding the complete technology stack inside out will enable me to make better decisions and interface more effectively with stakeholders, investors, and engineering teams. +As my career moves towards a CTO role, having **end-to-end** knowledge of +infrastructure is essential - from physical hardware and networking to container +orchestration and application deployment. Understanding the complete technology +stack inside out will enable me to make better decisions and interface more +effectively with stakeholders, investors, and engineering teams. ### Efficient Infrastructure Provisioning -I want to be able to provision infrastructure in under 5 minutes while thinking as little about it as possible. My focus should be on building products, not managing infrastructure. Additionally, the infrastructure needs to remain **cost-efficient**. Right now, I'm paying **24 euros a month** for Hetzner, which equals **about 300 euros a year**. Within two years, the hardware for my own mini data center (which I've already paid for upfront) will have effectively paid for itself through the savings. +I want to be able to provision infrastructure in under 5 minutes while thinking +as little about it as possible. My focus should be on building products, not +managing infrastructure. Additionally, the infrastructure needs to remain +**cost-efficient**. Right now, I'm paying **24 euros a month** for Hetzner, +which equals **about 300 euros a year**. Within two years, the hardware for my +own mini data center (which I've already paid for upfront) will have effectively +paid for itself through the savings. ### Complete Control Over the Stack -Building a mini data center gives me full control over every aspect of my infrastructure: +Building a mini data center gives me full control over every aspect of my +infrastructure: - **Hardware**: Choice of Raspberry Pis and Mini PCs for different workloads - **Networking**: Custom MikroTik setup for advanced networking capabilities @@ -28,14 +49,33 @@ Building a mini data center gives me full control over every aspect of my infras ### Keeping My Skills Sharp -Maintaining a complete data center forces me to regularly **maintain** and upgrade various components - from hardware and networking to software and orchestration. This helps me retain and sharpen the technical skills I've developed over time but might not frequently use in my day-to-day work. By continuing to use these skills, I ensure they remain active and relevant. +Maintaining a complete data center forces me to regularly **maintain** and +upgrade various components - from hardware and networking to software and +orchestration. This helps me retain and sharpen the technical skills I've +developed over time but might not frequently use in my day-to-day work. By +continuing to use these skills, I ensure they remain active and relevant. ### I see the future of infrastructure leaning towards on-premises -As hardware becomes more efficient and cheaper to acquire, I predict that the future of infrastructure won't be `cloud-only` as it may seem now. While cloud solutions like GCP, AWS, and Azure are incredibly powerful, they come with high costs (even though they often save your engineers time) and may raise privacy concerns for some companies. As fiber-optic internet becomes more globally accessible and hardware costs continue to decrease, I believe that more companies will be confident enough to shift back to hosting on their own infrastructure, especially since modern tools make it significantly easier to manage and scale. +As hardware becomes more efficient and cheaper to acquire, I predict that the +future of infrastructure won't be `cloud-only` as it may seem now. While cloud +solutions like GCP, AWS, and Azure are incredibly powerful, they come with high +costs (even though they often save your engineers time) and may raise privacy +concerns for some companies. As fiber-optic internet becomes more globally +accessible and hardware costs continue to decrease, I believe that more +companies will be confident enough to shift back to hosting on their own +infrastructure, especially since modern tools make it significantly easier to +manage and scale. ### What Are Your Reasons? -You now know my reasons for building a complete mini data center at home. It's essential for you to either find yourself in some of these points or come up with solid reasons of your own. I highly suggest having an internal discussion with yourself to figure out whether there's value for you in undertaking this journey. +You now know my reasons for building a complete mini data center at home. It's +essential for you to either find yourself in some of these points or come up +with solid reasons of your own. I highly suggest having an internal discussion +with yourself to figure out whether there's value for you in undertaking this +journey. -Learning new things should never be something we question; we should always strive to learn. The real question is: _Should I invest time in building and maintaining a mini data center_, or would my time be better spent learning another skill that will provide immediate value? +Learning new things should never be something we question; we should always +strive to learn. The real question is: _Should I invest time in building and +maintaining a mini data center_, or would my time be better spent learning +another skill that will provide immediate value? diff --git a/docusaurus/docusaurus.config.ts b/docusaurus/docusaurus.config.ts index 65a5062..7af60a8 100644 --- a/docusaurus/docusaurus.config.ts +++ b/docusaurus/docusaurus.config.ts @@ -77,8 +77,8 @@ const config: Config = { }, prism: { additionalLanguages: ["bash"], - theme: prismThemes.ultramin, - darkTheme: prismThemes.gruvboxMaterialDark, + theme: prismThemes.dracula, + darkTheme: prismThemes.nightOwl, }, colorMode: { defaultMode: "dark", // Set dark mode as the default diff --git a/docusaurus/package.json b/docusaurus/package.json index 5370e80..468d610 100644 --- a/docusaurus/package.json +++ b/docusaurus/package.json @@ -12,32 +12,36 @@ "serve": "docusaurus serve", "write-translations": "docusaurus write-translations", "write-heading-ids": "docusaurus write-heading-ids", - "typecheck": "tsc" + "typecheck": "tsc", + "format": "prettier --write \"docs/**/*.{md,mdx}\"", + "format:check": "prettier --check \"docs/**/*.{md,mdx}\"", + "format:docs": "prettier --write \"docs/**/*.{md,mdx}\"" }, "dependencies": { - "@docusaurus/core": "3.8.1", - "@docusaurus/plugin-ideal-image": "3.8.1", - "@docusaurus/preset-classic": "3.8.1", - "@docusaurus/theme-mermaid": "3.8.1", + "@docusaurus/core": "3.9.2", + "@docusaurus/plugin-ideal-image": "3.9.2", + "@docusaurus/preset-classic": "3.9.2", + "@docusaurus/theme-mermaid": "3.9.2", "@heroicons/react": "2.2.0", - "@mdx-js/react": "3.1.0", - "chart.js": "4.5.0", + "@mdx-js/react": "3.1.1", + "chart.js": "4.5.1", "clsx": "2.1.1", "prism-react-renderer": "2.4.1", - "react": "19.1.0", - "react-dom": "19.1.0", + "react": "19.2.3", + "react-dom": "19.2.3", "react-image-gallery": "1.4.0", - "react-tooltip": "5.29.1", - "sharp": "0.34.2" + "react-tooltip": "5.30.0", + "sharp": "0.32.6" }, "devDependencies": { - "@docusaurus/module-type-aliases": "3.8.1", - "@docusaurus/tsconfig": "3.8.1", - "@docusaurus/types": "3.8.1", - "@tailwindcss/postcss": "4.1.10", + "@docusaurus/module-type-aliases": "3.9.2", + "@docusaurus/tsconfig": "3.9.2", + "@docusaurus/types": "3.9.2", + "@tailwindcss/postcss": "4.1.18", "postcss": "8.5.6", - "tailwindcss": "4.1.10", - "typescript": "5.8.3" + "prettier": "3.7.4", + "tailwindcss": "4.1.18", + "typescript": "5.9.3" }, "browserslist": { "production": [ diff --git a/docusaurus/sidebars.ts b/docusaurus/sidebars.ts index 95cc01c..f2309c3 100644 --- a/docusaurus/sidebars.ts +++ b/docusaurus/sidebars.ts @@ -137,75 +137,58 @@ const sidebars: SidebarsConfig = { type: "category", label: "Kubernetes", items: [ + // 1. Getting Started (Setup) { type: "doc", label: "K3s Setup", id: "kubernetes/k3s-setup", }, - { - type: "doc", - label: "What Is Kubernetes", - id: "kubernetes/what-is-kubernetes", - }, - { - type: "doc", - label: "Anatomy of a kubectl Command", - id: "kubernetes/anatomy-of-kubectl-command", - }, - { - type: "doc", - label: "Anatomy of a Kubernetes YAML", - id: "kubernetes/anatomy-of-kubernetes-yaml", - }, - { - type: "doc", - label: "Kubernetes 80/20 Rule", - id: "kubernetes/kubernetes-80-20-rule", - }, - { - type: "doc", - label: "K3s Backup", - id: "kubernetes/k3s-backup", - }, - { - type: "doc", - label: "K3s Maintenance", - id: "kubernetes/k3s-maintenance", - }, + // 2. Core Concepts { type: "category", - label: "Storage", + label: "Core Concepts", items: [ { type: "doc", - label: "Understanding Longhorn Concepts", - id: "storage/understanding-longhorn-concepts", + label: "What Is Kubernetes", + id: "kubernetes/what-is-kubernetes", }, { type: "doc", - label: "Setup Longhorn", - id: "storage/setup-longhorn", + label: "Anatomy of a kubectl Command", + id: "kubernetes/anatomy-of-kubectl-command", }, { type: "doc", - label: "Setup Longhorn Dashboard", - id: "storage/setup-longhorn-dashboard", + label: "Anatomy of a Kubernetes YAML", + id: "kubernetes/anatomy-of-kubernetes-yaml", + }, + { + type: "doc", + label: "Kubernetes 80/20 Rule", + id: "kubernetes/kubernetes-80-20-rule", }, ], }, + // 3. Infrastructure Components { type: "category", - label: "Databases", + label: "Storage", items: [ { type: "doc", - label: "Databases Within Kubernetes", - id: "databases/databases-within-kubernetes", + label: "Understanding Longhorn Concepts", + id: "storage/understanding-longhorn-concepts", }, { type: "doc", - label: "Setup CloudNative PG", - id: "databases/setup-cloudnative-pg", + label: "Setup Longhorn", + id: "storage/setup-longhorn", + }, + { + type: "doc", + label: "Setup Longhorn Dashboard", + id: "storage/setup-longhorn-dashboard", }, ], }, @@ -237,7 +220,115 @@ const sidebars: SidebarsConfig = { }, { type: "category", - label: "Exercises", + label: "GitOps", + items: [ + { + type: "doc", + label: "Setup ArgoCD", + id: "kubernetes/setup-argocd", + }, + ], + }, + { + type: "category", + label: "Secrets Management", + items: [ + { + type: "doc", + label: "Setup Vault", + id: "kubernetes/setup-vault", + }, + ], + }, + // 4. Operations + { + type: "category", + label: "K3s Backup", + items: [ + { + type: "doc", + label: "Overview", + id: "kubernetes/k3s-backup", + }, + { + type: "doc", + label: "etcd Snapshots", + id: "kubernetes/k3s-backup-etcd", + }, + { + type: "doc", + label: "Longhorn Backups", + id: "kubernetes/k3s-backup-longhorn", + }, + { + type: "doc", + label: "Velero Backups", + id: "kubernetes/k3s-backup-velero", + }, + { + type: "doc", + label: "CloudNative PG Backups", + id: "kubernetes/k3s-backup-cloudnative-pg", + }, + { + type: "doc", + label: "Disaster Recovery", + id: "kubernetes/k3s-backup-disaster-recovery", + }, + ], + }, + { + type: "category", + label: "K3s Maintenance", + items: [ + { + type: "doc", + label: "Overview", + id: "kubernetes/k3s-maintenance", + }, + { + type: "doc", + label: "Updating K3s", + id: "kubernetes/k3s-maintenance-updates", + }, + { + type: "doc", + label: "Health Checks", + id: "kubernetes/k3s-maintenance-health", + }, + { + type: "doc", + label: "Troubleshooting", + id: "kubernetes/k3s-maintenance-troubleshooting", + }, + { + type: "doc", + label: "Node Management", + id: "kubernetes/k3s-maintenance-nodes", + }, + ], + }, + // 5. Applications + { + type: "category", + label: "Databases", + items: [ + { + type: "doc", + label: "Databases Within Kubernetes", + id: "databases/databases-within-kubernetes", + }, + { + type: "doc", + label: "Setup CloudNative PG", + id: "databases/setup-cloudnative-pg", + }, + ], + }, + // 6. Practice + { + type: "category", + label: "Practice", items: [ { type: "doc", diff --git a/docusaurus/src/components/ExplanationCard/index.tsx b/docusaurus/src/components/ExplanationCard/index.tsx index 45b35a0..f133b0e 100644 --- a/docusaurus/src/components/ExplanationCard/index.tsx +++ b/docusaurus/src/components/ExplanationCard/index.tsx @@ -1,5 +1,3 @@ -import { Tooltip } from "react-tooltip"; - const ExplanationCard = ({ section, styles, @@ -9,18 +7,18 @@ const ExplanationCard = ({ }) => (
-

+

{section.title}

- +

+ {section.description} +

); diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/configmap.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/configmap.ts index f80060f..05e6ef2 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/configmap.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/configmap.ts @@ -27,27 +27,31 @@ export const sections = [ key: "apiVersion:", value: "v1", title: "apiVersion", - description: "The version of the Kubernetes API for ConfigMap resources.", + description: + "ConfigMaps use the core `v1` API version. They're part of Kubernetes' core resources, so no special API group is needed. ConfigMaps have been stable since Kubernetes 1.2.", }, { id: "kind", key: "kind:", value: "ConfigMap", title: "kind", - description: "Specifies the object type, here it is a ConfigMap.", + description: + "A ConfigMap stores non-sensitive configuration data as key-value pairs. Use it for environment variables, configuration files, or command-line arguments. ConfigMaps are mounted into Pods as volumes or environment variables.", }, { id: "metadata", key: "metadata:", value: `\n name: my-app-config`, title: "metadata", - description: "Metadata for the ConfigMap, such as its name.", + description: + "The ConfigMap name is referenced in Pod specs. Pods mount ConfigMaps via `spec.containers[].envFrom` or `spec.volumes[].configMap`. Changes to ConfigMap data don't automatically update running Pods—you may need to restart Pods or use a sidecar to watch for changes.", }, { id: "data", key: "data:", value: `\n APP_ENV: production\n LOG_LEVEL: info`, title: "data", - description: "Key-value pairs of configuration data.", + description: + "Key-value pairs of configuration data. Values are plain strings (no base64 encoding needed, unlike Secrets). You can also use `binaryData` for binary content. Maximum size is 1MB. Use YAML multiline strings (`|` or `>`) for multi-line values.", }, ]; diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/deployment.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/deployment.ts index 19b57ce..874c260 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/deployment.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/deployment.ts @@ -58,7 +58,7 @@ export const sections = [ value: "apps/v1", title: "apiVersion", description: - "Which Kubernetes API version to use. Essential for compatibility.", + "The API group and version. For Deployments, always use `apps/v1`. Different resources use different API groups (e.g., `networking.k8s.io/v1` for Ingress, `v1` for Services). Use `kubectl api-resources` to discover available API versions.", }, { id: "kind", @@ -66,7 +66,7 @@ export const sections = [ value: "Deployment", title: "kind", description: - "The type of object to create (e.g., `Deployment`, `Service`).", + "The type of Kubernetes object. `Deployment` is a controller that manages Pod replicas. It ensures the desired number of Pods are running and handles rolling updates. Other common kinds include `Service`, `Ingress`, `ConfigMap`, and `Secret`.", }, { id: "metadata", @@ -75,7 +75,7 @@ export const sections = [ name: my-app-deployment`, title: "metadata", description: - "Unique identifiers for the object, like its `name` and `labels`.", + "Unique identifiers for the object. The `name` is required and must be unique within a namespace. `labels` help organize and select resources. `namespace` defaults to 'default' if omitted. `annotations` store non-identifying metadata.", }, { id: "spec", @@ -84,7 +84,7 @@ export const sections = [ replicas: 3`, title: "spec", description: - "The **desired state**. You tell Kubernetes what you want the object to look like.", + "The **desired state** of your Deployment. Kubernetes continuously reconciles the actual state with this desired state. If a Pod crashes, Kubernetes creates a new one to match `replicas: 3`. This is the reconciliation loop in action.", }, { id: "selector", @@ -94,7 +94,7 @@ export const sections = [ app: my-app`, title: "spec.selector", description: - "How a controller (like a Deployment) finds which Pods to manage. It matches the Pods' labels.", + "How the Deployment finds which Pods to manage. The labels here **must match** the labels in `spec.template.metadata.labels`. This is how Kubernetes knows which Pods belong to this Deployment. If labels don't match, the Deployment won't manage the Pods.", indent: 2, }, { @@ -103,7 +103,7 @@ export const sections = [ value: "", title: "spec.template", description: - "A blueprint for creating the Pods. It has its own `metadata` and `spec`.", + "A blueprint for creating Pods. This template is immutable—once a Pod is created, changes to the template don't affect existing Pods. To update running Pods, Kubernetes creates new ones with the updated template and terminates old ones (rolling update).", indent: 2, }, { @@ -113,7 +113,8 @@ export const sections = [ labels: app: my-app`, title: "spec.template.metadata", - description: "Metadata for the Pods created by the template.", + description: + "Labels applied to Pods created from this template. These labels must match `spec.selector.matchLabels` so the Deployment can find and manage these Pods. Services also use these labels to route traffic to the Pods.", indent: 4, }, { @@ -121,7 +122,8 @@ export const sections = [ key: "spec:", value: "", title: "spec.template.spec", - description: "Specification for the Pods created by the template.", + description: + "The Pod specification. This defines what runs inside each Pod: containers, volumes, environment variables, resource limits, and more. Each Pod created from this template will have this exact specification.", indent: 4, }, { @@ -134,7 +136,7 @@ export const sections = [ - containerPort: 80`, title: "spec.template.spec.containers", description: - "The heart of the Pod. A list of one or more containers to run, specifying the `image`, `ports`, etc.", + "The containers to run in each Pod. You can run multiple containers in a Pod (sidecar pattern). Common fields include `image` (required), `ports`, `env`, `resources` (CPU/memory limits), `livenessProbe`, and `readinessProbe`. Always specify resource limits in production.", indent: 6, }, ]; diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/index.tsx b/docusaurus/src/components/KubernetesYAMLAnatomy/index.tsx index 9f05863..1e7de77 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/index.tsx +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/index.tsx @@ -53,21 +53,23 @@ export default function App() { />
- {/* Right Side: Explanations */} -
- {sections.map(section => ( + {/* Right Side: Explanation (single card) */} +
+ {highlightedSection ? ( handleHover(section)} + section={highlightedSection} + styles={sectionStyles[highlightedSection.id]} + isHighlighted={true} + onMouseEnter={() => handleHover(highlightedSection)} onMouseLeave={() => handleHover(null)} /> - ))} + ) : ( +
+

+ Hover over any section in the YAML code to see its explanation +

+
+ )}
diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/ingress.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/ingress.ts index ffeae37..32a8b62 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/ingress.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/ingress.ts @@ -42,35 +42,40 @@ export const sections = [ key: "apiVersion:", value: "networking.k8s.io/v1", title: "apiVersion", - description: "The version of the Kubernetes API for Ingress resources.", + description: + "Ingress uses the `networking.k8s.io/v1` API group. Always use `v1` (not `v1beta1` which is deprecated). This API group contains networking-related resources like Ingress and NetworkPolicy.", }, { id: "kind", key: "kind:", value: "Ingress", title: "kind", - description: "Specifies the object type, here it is an Ingress.", + description: + "An Ingress exposes HTTP and HTTPS routes from outside the cluster to Services within the cluster. It requires an Ingress Controller (like Traefik, NGINX, or cloud-provider load balancers) to function. Think of it as a reverse proxy configuration.", }, { id: "metadata", key: "metadata:", value: `\n name: my-app-ingress`, title: "metadata", - description: "Metadata for the Ingress, such as its name.", + description: + "The Ingress name and optional annotations. Annotations are crucial for Ingress—they configure the Ingress Controller (e.g., `cert-manager.io/cluster-issuer: letsencrypt-prod` for TLS certificates).", }, { id: "spec", key: "spec:", value: "", title: "spec", - description: "The desired state of the Ingress resource.", + description: + "Defines routing rules and TLS configuration. The Ingress Controller reads this spec to configure routing. You can specify multiple hosts, paths, and TLS certificates here.", }, { id: "rules", key: "rules:", value: "", title: "spec.rules", - description: "Defines the rules for routing traffic.", + description: + "Routing rules for the Ingress. Each rule can match a specific hostname (optional) and define paths. If no host is specified, the rule matches all hosts. Rules are evaluated in order.", indent: 2, }, { @@ -78,7 +83,8 @@ export const sections = [ key: "http:", value: "", title: "spec.rules.http", - description: "HTTP-specific routing information.", + description: + "HTTP routing configuration for a rule. Contains the list of paths that should be routed to backend Services. You can also define TLS configuration at the rule level.", indent: 4, }, { @@ -86,7 +92,8 @@ export const sections = [ key: "paths:", value: `\n - path: /\n pathType: Prefix\n backend:\n service:\n name: my-app-service\n port:\n number: 80`, title: "spec.rules.http.paths", - description: "Defines the paths and backend services for the Ingress.", + description: + "Path-based routing rules. `path` is the URL path, `pathType` can be `Exact`, `Prefix`, or `ImplementationSpecific`. The `backend` references a Service by name and port. This routes `/` requests to `my-app-service:80`.", indent: 6, }, ]; diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/pvc.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/pvc.ts index 82899b2..43acb62 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/pvc.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/pvc.ts @@ -37,7 +37,8 @@ export const sections = [ key: "apiVersion:", value: "v1", title: "apiVersion", - description: "The version of the Kubernetes API for PVC resources.", + description: + "PVCs use the core `v1` API version. They're part of Kubernetes' storage API. PVCs work with StorageClasses to dynamically provision PersistentVolumes (PVs) from available storage backends.", }, { id: "kind", @@ -45,28 +46,31 @@ export const sections = [ value: "PersistentVolumeClaim", title: "kind", description: - "Specifies the object type, here it is a PersistentVolumeClaim.", + "A PVC requests storage from a StorageClass. Think of it as a 'storage reservation'—you request 10GB, and Kubernetes provisions a PersistentVolume (PV) that matches your requirements. Pods mount PVCs as volumes to get persistent storage.", }, { id: "metadata", key: "metadata:", value: `\n name: my-app-pvc`, title: "metadata", - description: "Metadata for the PVC, such as its name.", + description: + "The PVC name is referenced in Pod specs via `spec.volumes[].persistentVolumeClaim.claimName`. When a Pod is deleted, the PVC (and its data) persists unless you explicitly delete it. This enables data persistence across Pod restarts.", }, { id: "spec", key: "spec:", value: "", title: "spec", - description: "The desired state of the PVC resource.", + description: + "Defines storage requirements: access mode (how many Pods can mount it), storage size, and optional StorageClass. If no StorageClass is specified, the cluster's default StorageClass is used. The PVC stays in 'Pending' until a matching PV is provisioned.", }, { id: "accessModes", key: "accessModes:", value: `\n - ReadWriteOnce`, title: "spec.accessModes", - description: "Defines how the volume can be mounted (e.g., ReadWriteOnce).", + description: + "Defines how the volume can be mounted. `ReadWriteOnce` (RWO) allows read-write by a single node (most common). `ReadOnlyMany` (ROX) allows read-only by many nodes. `ReadWriteMany` (RWX) allows read-write by many nodes (requires NFS-like storage). Choose based on your use case.", indent: 2, }, { @@ -74,7 +78,8 @@ export const sections = [ key: "resources:", value: `\n requests:\n storage: 1Gi`, title: "spec.resources", - description: "Specifies the amount of storage requested.", + description: + "Storage capacity request. Use standard Kubernetes size units: `Ki` (kibibytes), `Mi` (mebibytes), `Gi` (gibibytes), `Ti` (tebibytes). The actual provisioned size may be larger depending on the storage backend. You cannot shrink a PVC—only expand it (if the StorageClass supports it).", indent: 2, }, ]; diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/secret.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/secret.ts index 0ad526e..91c5e39 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/secret.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/secret.ts @@ -1,66 +1,70 @@ export const sectionStyles = { apiVersion: { - keyColor: "text-blue-600 dark:text-blue-400", - cardColor: "border-2 border-blue-200 dark:border-blue-900", - titleColor: "text-blue-700 dark:text-blue-400", + keyColor: 'text-blue-600 dark:text-blue-400', + cardColor: 'border-2 border-blue-200 dark:border-blue-900', + titleColor: 'text-blue-700 dark:text-blue-400', }, kind: { - keyColor: "text-cyan-600 dark:text-cyan-400", - cardColor: "border-2 border-cyan-200 dark:border-cyan-900", - titleColor: "text-cyan-700 dark:text-cyan-400", + keyColor: 'text-cyan-600 dark:text-cyan-400', + cardColor: 'border-2 border-cyan-200 dark:border-cyan-900', + titleColor: 'text-cyan-700 dark:text-cyan-400', }, metadata: { - keyColor: "text-green-600 dark:text-green-400", - cardColor: "border-2 border-green-200 dark:border-green-900", - titleColor: "text-green-700 dark:text-green-400", + keyColor: 'text-green-600 dark:text-green-400', + cardColor: 'border-2 border-green-200 dark:border-green-900', + titleColor: 'text-green-700 dark:text-green-400', }, data: { - keyColor: "text-purple-600 dark:text-purple-400", - cardColor: "border-2 border-purple-200 dark:border-purple-900", - titleColor: "text-purple-700 dark:text-purple-400", + keyColor: 'text-purple-600 dark:text-purple-400', + cardColor: 'border-2 border-purple-200 dark:border-purple-900', + titleColor: 'text-purple-700 dark:text-purple-400', }, type: { - keyColor: "text-pink-600 dark:text-pink-400", - cardColor: "border-2 border-pink-200 dark:border-pink-900", - titleColor: "text-pink-700 dark:text-pink-400", + keyColor: 'text-pink-600 dark:text-pink-400', + cardColor: 'border-2 border-pink-200 dark:border-pink-900', + titleColor: 'text-pink-700 dark:text-pink-400', }, -}; +} export const sections = [ { - id: "apiVersion", - key: "apiVersion:", - value: "v1", - title: "apiVersion", - description: "The version of the Kubernetes API for Secret resources.", + id: 'apiVersion', + key: 'apiVersion:', + value: 'v1', + title: 'apiVersion', + description: + "Secrets use the core `v1` API version. Like ConfigMaps, they're part of Kubernetes' core resources. Secrets are base64-encoded but not encrypted—use external secret management (like Vault) for production encryption.", }, { - id: "kind", - key: "kind:", - value: "Secret", - title: "kind", - description: "Specifies the object type, here it is a Secret.", + id: 'kind', + key: 'kind:', + value: 'Secret', + title: 'kind', + description: + 'A Secret stores sensitive data like passwords, API keys, or TLS certificates. Secrets are base64-encoded (not encrypted) and should be treated carefully. Mount them into Pods as volumes or environment variables, similar to ConfigMaps.', }, { - id: "metadata", - key: "metadata:", + id: 'metadata', + key: 'metadata:', value: `\n name: my-app-secret`, - title: "metadata", - description: "Metadata for the Secret, such as its name.", + title: 'metadata', + description: + 'The Secret name is referenced in Pod specs via `spec.containers[].env[].valueFrom.secretKeyRef` or `spec.volumes[].secret`. Never commit Secrets to version control—use tools like Sealed Secrets, External Secrets Operator, or Vault for production.', }, { - id: "type", - key: "type:", - value: " Opaque", - title: "type", + id: 'type', + key: 'type:', + value: ' Opaque', + title: 'type', description: - "The type of Secret. 'Opaque' is the default for arbitrary user-defined data.", + 'The Secret type determines how Kubernetes handles the data. `Opaque` (default) is for arbitrary user data. Other types include `kubernetes.io/tls` (TLS certificates), `kubernetes.io/dockerconfigjson` (Docker registry credentials), and `kubernetes.io/basic-auth` (basic authentication).', }, { - id: "data", - key: "data:", + id: 'data', + key: 'data:', value: `\n PASSWORD: cGFzc3dvcmQ= # base64 for 'password'`, - title: "data", - description: "Key-value pairs of secret data, base64-encoded.", + title: 'data', + description: + "Key-value pairs of secret data, **base64-encoded**. Use `echo -n 'password' | base64` to encode values. For TLS certificates, use `kubernetes.io/tls` type with `tls.crt` and `tls.key` keys. Maximum size is 1MB. Use `stringData` for plain text (Kubernetes encodes it automatically).", }, -]; +] diff --git a/docusaurus/src/components/KubernetesYAMLAnatomy/service.ts b/docusaurus/src/components/KubernetesYAMLAnatomy/service.ts index 2ce3d0b..8f07349 100644 --- a/docusaurus/src/components/KubernetesYAMLAnatomy/service.ts +++ b/docusaurus/src/components/KubernetesYAMLAnatomy/service.ts @@ -37,14 +37,16 @@ export const sections = [ key: "apiVersion:", value: "v1", title: "apiVersion", - description: "The version of the Kubernetes API to use.", + description: + "Services use the core `v1` API version. This is part of Kubernetes' core API group, unlike Deployments which use `apps/v1`. Most basic resources (Pods, Services, ConfigMaps, Secrets) use `v1`.", }, { id: "kind", key: "kind:", value: "Service", title: "kind", - description: "Specifies the object type, in this case, a Service.", + description: + "A Service provides a stable network endpoint for Pods. It acts as a load balancer, distributing traffic to matching Pods. Even when Pods are recreated (new IPs), the Service IP stays the same, providing reliable service discovery.", }, { id: "metadata", @@ -53,7 +55,7 @@ export const sections = [ name: my-app-service`, title: "metadata", description: - "Data that helps uniquely identify the object, including a name.", + "The Service name is used by other Pods to connect via DNS. A Service named `my-app-service` in namespace `default` is accessible at `my-app-service.default.svc.cluster.local` or simply `my-app-service` from the same namespace.", }, { id: "spec", @@ -61,7 +63,7 @@ export const sections = [ value: "", title: "spec", description: - "The desired state of the Service, defining how it exposes an application.", + "Defines how the Service selects Pods and exposes them. The `selector` finds Pods, and `ports` defines how traffic is routed. Service types include ClusterIP (default, internal), NodePort, and LoadBalancer.", }, { id: "selector", @@ -70,7 +72,7 @@ export const sections = [ app: my-app`, title: "spec.selector", description: - "Selects the Pods to which this Service will route traffic, based on their labels.", + "Selects Pods by matching their labels. The Service finds all Pods with `app: my-app` and routes traffic to them. If Pods are recreated with the same labels, they're automatically included. The selector must match Pod labels exactly.", indent: 2, }, { @@ -82,7 +84,7 @@ export const sections = [ targetPort: 8080`, title: "spec.ports", description: - "Defines the port mapping. It forwards traffic from port 80 on the Service to port 8080 on the Pods.", + "Port mapping configuration. `port` is the Service port (what clients connect to), `targetPort` is the container port (where the app listens). `protocol` is usually TCP (default) or UDP. You can define multiple ports for services that expose multiple endpoints.", indent: 2, }, ]; diff --git a/docusaurus/src/components/MikrotikNetworking/CodeBlock/index.tsx b/docusaurus/src/components/MikrotikNetworking/CodeBlock/index.tsx index a92bbdd..7ee0d0f 100644 --- a/docusaurus/src/components/MikrotikNetworking/CodeBlock/index.tsx +++ b/docusaurus/src/components/MikrotikNetworking/CodeBlock/index.tsx @@ -16,7 +16,7 @@ const CodeBlock = ({ code }: CodeBlockProps) => { return (
- + {({ className, style, tokens, getLineProps, getTokenProps }) => (
             {tokens.map((line, i) => (
diff --git a/docusaurus/src/css/custom.css b/docusaurus/src/css/custom.css
index d0697bd..2211db3 100644
--- a/docusaurus/src/css/custom.css
+++ b/docusaurus/src/css/custom.css
@@ -11,29 +11,29 @@
 
 /* You can override the default Infima variables here. */
 :root {
-  --ifm-color-primary: #ffab00;
-  --ifm-color-primary-dark: #ffab00;
-  --ifm-color-primary-darker: #ffab00;
-  --ifm-color-primary-darkest: #ffab00;
-  --ifm-color-primary-light: #818cf8;
-  --ifm-color-primary-lighter: #a5b4fc;
-  --ifm-color-primary-lightest: #c7d2fe;
+  --ifm-color-primary: #326ce5;
+  --ifm-color-primary-dark: #2563eb;
+  --ifm-color-primary-darker: #1d4ed8;
+  --ifm-color-primary-darkest: #1e40af;
+  --ifm-color-primary-light: #60a5fa;
+  --ifm-color-primary-lighter: #93c5fd;
+  --ifm-color-primary-lightest: #bfdbfe;
   --ifm-code-font-size: 95%;
   --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
 }
 
 /* For readability concerns, you should choose a lighter palette in dark mode. */
 [data-theme="dark"] {
-  --ifm-color-primary: #ffab00;
-  --ifm-color-primary-dark: #ffab00;
-  --ifm-color-primary-darker: #ffab00;
-  --ifm-color-primary-darkest: #ffab00;
-  --ifm-color-primary-light: #818cf8;
-  --ifm-color-primary-lighter: #a5b4fc;
-  --ifm-color-primary-lightest: #c7d2fe;
-  --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
-  --ifm-background-color: #242323 !important;
-  --ifm-navbar-background-color: #242323 !important;
+  --ifm-color-primary: #3b82f6;
+  --ifm-color-primary-dark: #2563eb;
+  --ifm-color-primary-darker: #1d4ed8;
+  --ifm-color-primary-darkest: #1e40af;
+  --ifm-color-primary-light: #60a5fa;
+  --ifm-color-primary-lighter: #93c5fd;
+  --ifm-color-primary-lightest: #bfdbfe;
+  --docusaurus-highlighted-code-line-bg: rgba(59, 130, 246, 0.2);
+  --ifm-background-color: #1a1a1a !important;
+  --ifm-navbar-background-color: #1a1a1a !important;
 }
 
 .navbar__item.navbar__link svg {
@@ -51,3 +51,101 @@ ul:not(.menu__list) li {
 .table-of-contents li {
   list-style-type: none !important;
 }
+
+/* Code block styling - Dark theme */
+[data-theme="dark"] .prism-code,
+[data-theme="dark"] pre[class*="language-"],
+[data-theme="dark"] code[class*="language-"] {
+  background-color: #1e293b !important;
+  color: #e2e8f0 !important;
+}
+
+[data-theme="dark"] div[class*="codeBlockContainer"],
+[data-theme="dark"] div[class*="codeBlockContent"] {
+  background-color: #1e293b !important;
+}
+
+/* Syntax highlighting colors for dark theme - blue theme */
+[data-theme="dark"] .token.keyword,
+[data-theme="dark"] .token.operator,
+[data-theme="dark"] .token.boolean {
+  color: #60a5fa !important; /* Blue for keywords */
+}
+
+[data-theme="dark"] .token.string,
+[data-theme="dark"] .token.url {
+  color: #34d399 !important; /* Teal/cyan for strings */
+}
+
+[data-theme="dark"] .token.function,
+[data-theme="dark"] .token.class-name {
+  color: #93c5fd !important; /* Light blue for functions */
+}
+
+[data-theme="dark"] .token.comment {
+  color: #64748b !important; /* Gray for comments */
+}
+
+[data-theme="dark"] .token.number {
+  color: #fbbf24 !important; /* Amber for numbers */
+}
+
+[data-theme="dark"] .token.variable,
+[data-theme="dark"] .token.property {
+  color: #c084fc !important; /* Purple for variables */
+}
+
+[data-theme="dark"] .token.punctuation {
+  color: #cbd5e1 !important; /* Light gray for punctuation */
+}
+
+/* Code block styling - Light theme */
+:root:not([data-theme="dark"]) .prism-code,
+:root:not([data-theme="dark"]) pre[class*="language-"],
+:root:not([data-theme="dark"]) code[class*="language-"] {
+  background-color: #f8fafc !important;
+  color: #1e293b !important;
+}
+
+:root:not([data-theme="dark"]) div[class*="codeBlockContainer"],
+:root:not([data-theme="dark"]) div[class*="codeBlockContent"] {
+  background-color: #f8fafc !important;
+}
+
+/* Syntax highlighting colors for light theme - high contrast blue theme */
+:root:not([data-theme="dark"]) .token.keyword,
+:root:not([data-theme="dark"]) .token.operator,
+:root:not([data-theme="dark"]) .token.boolean {
+  color: #2563eb !important; /* Dark blue for keywords - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.string,
+:root:not([data-theme="dark"]) .token.url {
+  color: #059669 !important; /* Dark teal/green for strings - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.function,
+:root:not([data-theme="dark"]) .token.class-name {
+  color: #1d4ed8 !important; /* Darker blue for functions - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.comment {
+  color: #64748b !important; /* Medium gray for comments */
+}
+
+:root:not([data-theme="dark"]) .token.number {
+  color: #dc2626 !important; /* Red for numbers - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.variable,
+:root:not([data-theme="dark"]) .token.property {
+  color: #7c3aed !important; /* Dark purple for variables - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.punctuation {
+  color: #475569 !important; /* Dark gray for punctuation - high contrast */
+}
+
+:root:not([data-theme="dark"]) .token.plain {
+  color: #1e293b !important; /* Dark text for plain tokens - high contrast */
+}
diff --git a/docusaurus/static/img/programmer-network-logo.svg b/docusaurus/static/img/programmer-network-logo.svg
index 974027f..3d32fd4 100644
--- a/docusaurus/static/img/programmer-network-logo.svg
+++ b/docusaurus/static/img/programmer-network-logo.svg
@@ -1,9 +1,9 @@
-
+
     
     
-    
+    
         
+            repeatCount="indefinite" values="transparent;#1e40af">
     
 
\ No newline at end of file

From c2d482a24f83b665661291bfe55b7a97fd1920e4 Mon Sep 17 00:00:00 2001
From: Aleksandar Grbic 
Date: Mon, 12 Jan 2026 23:05:06 +0100
Subject: [PATCH 3/3] Refactor Docusaurus sidebars and enhance Kubernetes
 practice documentation

- Standardized quotes in sidebars.ts for consistency.
- Added new practice exercises for Kubernetes, covering ConfigMaps, Secrets, Persistent Volumes, Longhorn Storage, and CloudNative PG.
- Introduced a comprehensive guide for building a complete application stack, integrating various Kubernetes components.
- Improved organization and clarity in the practice documentation to facilitate learning.
---
 .../practice/01-basic-kubernetes.md           | 228 +++++++++
 .../practice/02-namespaces-and-resources.md   | 201 ++++++++
 .../practice/03-configmaps-and-secrets.md     | 256 ++++++++++
 .../practice/04-persistent-volumes.md         | 188 +++++++
 .../practice/05-longhorn-storage.md           | 217 ++++++++
 .../practice/06-services-and-networking.md    | 253 ++++++++++
 .../practice/07-cloudnative-pg-basics.md      | 228 +++++++++
 .../practice/08-cloudnative-pg-advanced.md    | 274 ++++++++++
 .../practice/10-complete-application.md       | 376 ++++++++++++++
 .../kubernetes/practice/getting-started.md    | 153 ++++++
 docusaurus/sidebars.ts                        | 477 ++++++++++--------
 11 files changed, 2644 insertions(+), 207 deletions(-)
 create mode 100644 docusaurus/docs/kubernetes/practice/01-basic-kubernetes.md
 create mode 100644 docusaurus/docs/kubernetes/practice/02-namespaces-and-resources.md
 create mode 100644 docusaurus/docs/kubernetes/practice/03-configmaps-and-secrets.md
 create mode 100644 docusaurus/docs/kubernetes/practice/04-persistent-volumes.md
 create mode 100644 docusaurus/docs/kubernetes/practice/05-longhorn-storage.md
 create mode 100644 docusaurus/docs/kubernetes/practice/06-services-and-networking.md
 create mode 100644 docusaurus/docs/kubernetes/practice/07-cloudnative-pg-basics.md
 create mode 100644 docusaurus/docs/kubernetes/practice/08-cloudnative-pg-advanced.md
 create mode 100644 docusaurus/docs/kubernetes/practice/10-complete-application.md
 create mode 100644 docusaurus/docs/kubernetes/practice/getting-started.md

diff --git a/docusaurus/docs/kubernetes/practice/01-basic-kubernetes.md b/docusaurus/docs/kubernetes/practice/01-basic-kubernetes.md
new file mode 100644
index 0000000..f035359
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/01-basic-kubernetes.md
@@ -0,0 +1,228 @@
+---
+title: 'Exercise 1: Basic Kubernetes Resources'
+---
+
+## Objective
+
+Learn to create and manage basic Kubernetes resources: Pods, Deployments, and
+Services. This is the foundation for everything else you'll do in Kubernetes.
+
+## Prerequisites
+
+- K3s cluster running
+- `kubectl` configured and working
+- Basic understanding of containers
+
+Verify your setup:
+
+```bash
+kubectl get nodes
+```
+
+You should see your cluster nodes listed.
+
+## Exercise: Deploy a Simple Web Application
+
+### Step 1: Create a Namespace
+
+Namespaces help organize your resources. Let's create one for this exercise.
+
+**Command:**
+
+```bash
+kubectl create namespace practice-01
+```
+
+**YAML Version:** Create `namespace.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-01
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f namespace.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get namespace practice-01
+```
+
+### Step 2: Create a Deployment
+
+A Deployment manages Pods and ensures they stay running. Let's deploy a simple
+nginx web server.
+
+**Command:**
+
+```bash
+kubectl create deployment hello-world \
+  --image=nginx \
+  --namespace=practice-01
+```
+
+**YAML Version:** Create `deployment.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: hello-world
+  namespace: practice-01
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: hello-world
+  template:
+    metadata:
+      labels:
+        app: hello-world
+    spec:
+      containers:
+        - name: nginx
+          image: nginx:latest
+          ports:
+            - containerPort: 80
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f deployment.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get deployment hello-world -n practice-01
+kubectl get pods -n practice-01
+```
+
+Wait until the pod shows `Running` status.
+
+### Step 3: Expose with a Service
+
+Services provide a stable way to access your Pods. Let's create a ClusterIP
+service.
+
+**Command:**
+
+```bash
+kubectl expose deployment hello-world \
+  --type=ClusterIP \
+  --port=80 \
+  --namespace=practice-01
+```
+
+**YAML Version:** Create `service.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: hello-world
+  namespace: practice-01
+spec:
+  selector:
+    app: hello-world
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 80
+  type: ClusterIP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f service.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get service hello-world -n practice-01
+```
+
+### Step 4: Test the Application
+
+Since ClusterIP services are only accessible within the cluster, we'll use port
+forwarding to test it.
+
+```bash
+kubectl port-forward deployment/hello-world 8080:80 -n practice-01
+```
+
+In another terminal, test it:
+
+```bash
+curl http://localhost:8080
+```
+
+You should see the nginx welcome page HTML. Press `Ctrl+C` to stop port
+forwarding.
+
+## Verification
+
+Check that everything is working:
+
+```bash
+# Check deployment
+kubectl get deployment hello-world -n practice-01
+
+# Check pods
+kubectl get pods -n practice-01 -l app=hello-world
+
+# Check service
+kubectl get service hello-world -n practice-01
+
+# Check service endpoints
+kubectl get endpoints hello-world -n practice-01
+```
+
+All resources should show as ready and running.
+
+## Understanding What Happened
+
+- **Namespace**: Isolated environment for your resources
+- **Deployment**: Manages Pod lifecycle, ensures desired number of replicas
+- **Pod**: The actual container running nginx
+- **Service**: Provides stable network access to Pods, even if they restart
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-01
+```
+
+Or delete individual resources:
+
+```bash
+kubectl delete -f service.yaml
+kubectl delete -f deployment.yaml
+kubectl delete -f namespace.yaml
+```
+
+## Next Steps
+
+→ [Exercise 2: Namespaces and Resources](./02-namespaces-and-resources)
+
+## Additional Practice
+
+Try these variations:
+
+1. Scale the deployment to 3 replicas:
+   `kubectl scale deployment hello-world --replicas=3 -n practice-01`
+2. Check how Kubernetes distributes the Pods:
+   `kubectl get pods -n practice-01 -o wide`
+3. Delete one Pod and watch it get recreated:
+   `kubectl delete pod  -n practice-01`
diff --git a/docusaurus/docs/kubernetes/practice/02-namespaces-and-resources.md b/docusaurus/docs/kubernetes/practice/02-namespaces-and-resources.md
new file mode 100644
index 0000000..9045a9b
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/02-namespaces-and-resources.md
@@ -0,0 +1,201 @@
+---
+title: 'Exercise 2: Namespaces and Resources'
+---
+
+## Objective
+
+Learn how to organize and manage resources using namespaces, and understand
+resource management concepts like resource quotas and labels.
+
+## Prerequisites
+
+- Completed [Exercise 1: Basic Kubernetes](./01-basic-kubernetes)
+- Understanding of Pods, Deployments, and Services
+
+## Exercise: Organize Resources with Namespaces
+
+### Step 1: Create Multiple Namespaces
+
+Namespaces help you organize resources and apply policies. Let's create separate
+namespaces for different environments.
+
+**YAML Version:** Create `namespaces.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: development
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: staging
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: production
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f namespaces.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get namespaces
+```
+
+### Step 2: Deploy Applications to Different Namespaces
+
+Let's deploy the same application to different namespaces to see how namespaces
+provide isolation.
+
+**YAML Version:** Create `deployment-dev.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: web-app
+  namespace: development
+  labels:
+    app: web-app
+    environment: development
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: web-app
+  template:
+    metadata:
+      labels:
+        app: web-app
+        environment: development
+    spec:
+      containers:
+        - name: nginx
+          image: nginx:latest
+          ports:
+            - containerPort: 80
+```
+
+**YAML Version:** Create `deployment-staging.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: web-app
+  namespace: staging
+  labels:
+    app: web-app
+    environment: staging
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: web-app
+  template:
+    metadata:
+      labels:
+        app: web-app
+        environment: staging
+    spec:
+      containers:
+        - name: nginx
+          image: nginx:latest
+          ports:
+            - containerPort: 80
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f deployment-dev.yaml
+kubectl apply -f deployment-staging.yaml
+```
+
+### Step 3: Use Labels and Selectors
+
+Labels help you organize and select resources. Let's see how they work.
+
+**List pods by label:**
+
+```bash
+# All web-app pods
+kubectl get pods -l app=web-app --all-namespaces
+
+# Only development pods
+kubectl get pods -l environment=development --all-namespaces
+
+# Combined selector
+kubectl get pods -l app=web-app,environment=development -n development
+```
+
+### Step 4: Resource Queries Across Namespaces
+
+See how namespaces isolate resources:
+
+```bash
+# Pods in specific namespace
+kubectl get pods -n development
+kubectl get pods -n staging
+
+# All pods across all namespaces
+kubectl get pods --all-namespaces
+
+# Deployments in all namespaces
+kubectl get deployments --all-namespaces
+```
+
+## Verification
+
+Verify namespace isolation:
+
+```bash
+# Check deployments in each namespace
+kubectl get deployments -n development
+kubectl get deployments -n staging
+
+# Verify pods are isolated
+kubectl get pods -n development
+kubectl get pods -n staging
+
+# Check that services in one namespace can't see pods in another
+kubectl get endpoints -n development
+kubectl get endpoints -n staging
+```
+
+## Understanding What Happened
+
+- **Namespaces**: Provide logical separation and isolation
+- **Labels**: Key-value pairs for organizing and selecting resources
+- **Selectors**: Used to match resources by labels
+- **Isolation**: Resources in different namespaces are isolated from each other
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace development
+kubectl delete namespace staging
+kubectl delete namespace production
+```
+
+## Next Steps
+
+→ [Exercise 3: ConfigMaps and Secrets](./03-configmaps-and-secrets)
+
+## Additional Practice
+
+1. Create a namespace with a specific label:
+   `kubectl create namespace test --dry-run=client -o yaml | kubectl label --local -f - team=backend -o yaml`
+2. List all namespaces with a label: `kubectl get namespaces -l team=backend`
+3. Try to access a pod from another namespace (it won't work without proper
+   service configuration)
diff --git a/docusaurus/docs/kubernetes/practice/03-configmaps-and-secrets.md b/docusaurus/docs/kubernetes/practice/03-configmaps-and-secrets.md
new file mode 100644
index 0000000..8c54534
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/03-configmaps-and-secrets.md
@@ -0,0 +1,256 @@
+---
+title: 'Exercise 3: ConfigMaps and Secrets'
+---
+
+## Objective
+
+Learn how to manage configuration data and sensitive information using
+ConfigMaps and Secrets. These are essential for making applications configurable
+and secure.
+
+## Prerequisites
+
+- Completed
+  [Exercise 2: Namespaces and Resources](./02-namespaces-and-resources)
+- Understanding of Deployments and Pods
+
+## Exercise: Configure an Application
+
+### Step 1: Create a ConfigMap
+
+ConfigMaps store non-sensitive configuration data. Let's create one for
+application settings.
+
+**Command:**
+
+```bash
+kubectl create configmap app-config \
+  --from-literal=database_host=postgres.example.com \
+  --from-literal=database_port=5432 \
+  --from-literal=app_name=my-app \
+  -n practice-03
+```
+
+**YAML Version:** Create `configmap.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-03
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: app-config
+  namespace: practice-03
+data:
+  database_host: postgres.example.com
+  database_port: '5432'
+  app_name: my-app
+  config.properties: |
+    server.port=8080
+    logging.level=INFO
+    feature.flag.enabled=true
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f configmap.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get configmap app-config -n practice-03
+kubectl describe configmap app-config -n practice-03
+```
+
+### Step 2: Create a Secret
+
+Secrets store sensitive data like passwords and API keys. Let's create one for
+database credentials.
+
+**Command:**
+
+```bash
+kubectl create secret generic db-credentials \
+  --from-literal=username=admin \
+  --from-literal=password=secretpassword123 \
+  -n practice-03
+```
+
+**YAML Version:** Create `secret.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: db-credentials
+  namespace: practice-03
+type: Opaque
+stringData:
+  username: admin
+  password: secretpassword123
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f secret.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get secret db-credentials -n practice-03
+```
+
+Note: Secrets are base64 encoded. To view (decode) them:
+
+```bash
+kubectl get secret db-credentials -n practice-03 -o jsonpath='{.data.password}' | base64 -d
+echo
+```
+
+### Step 3: Use ConfigMap and Secret in a Deployment
+
+Now let's create a deployment that uses both the ConfigMap and Secret.
+
+**YAML Version:** Create `deployment.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: configured-app
+  namespace: practice-03
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: configured-app
+  template:
+    metadata:
+      labels:
+        app: configured-app
+    spec:
+      containers:
+        - name: app
+          image: busybox
+          command: ['/bin/sh']
+          args:
+            - -c
+            - |
+              echo "App Name: $APP_NAME"
+              echo "DB Host: $DB_HOST"
+              echo "DB Port: $DB_PORT"
+              echo "DB User: $DB_USERNAME"
+              echo "Config file contents:"
+              cat /etc/config/config.properties
+              sleep 3600
+          env:
+            # From ConfigMap
+            - name: APP_NAME
+              valueFrom:
+                configMapKeyRef:
+                  name: app-config
+                  key: app_name
+            - name: DB_HOST
+              valueFrom:
+                configMapKeyRef:
+                  name: app-config
+                  key: database_host
+            - name: DB_PORT
+              valueFrom:
+                configMapKeyRef:
+                  name: app-config
+                  key: database_port
+            # From Secret
+            - name: DB_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  name: db-credentials
+                  key: username
+            - name: DB_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: db-credentials
+                  key: password
+          volumeMounts:
+            - name: config-volume
+              mountPath: /etc/config
+      volumes:
+        - name: config-volume
+          configMap:
+            name: app-config
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f deployment.yaml
+```
+
+**Wait for pod to be ready:**
+
+```bash
+kubectl wait --for=condition=ready pod -l app=configured-app -n practice-03 --timeout=60s
+```
+
+### Step 4: Verify Configuration
+
+Check that the pod is using the configuration:
+
+```bash
+kubectl logs -l app=configured-app -n practice-03
+```
+
+You should see the environment variables and config file contents printed.
+
+## Verification
+
+Verify everything is configured correctly:
+
+```bash
+# Check ConfigMap exists
+kubectl get configmap app-config -n practice-03
+
+# Check Secret exists (note: values are encoded)
+kubectl get secret db-credentials -n practice-03
+
+# Check pod is using the config
+kubectl get pods -n practice-03
+kubectl logs -l app=configured-app -n practice-03
+
+# Check environment variables in the pod
+kubectl exec -it deployment/configured-app -n practice-03 -- env | grep -E "APP_NAME|DB_"
+```
+
+## Understanding What Happened
+
+- **ConfigMap**: Stores non-sensitive configuration as key-value pairs or files
+- **Secret**: Stores sensitive data (base64 encoded by default)
+- **Environment Variables**: Injected from ConfigMap/Secret using `valueFrom`
+- **Volume Mounts**: ConfigMaps can be mounted as files in containers
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-03
+```
+
+## Next Steps
+
+→ [Exercise 4: Persistent Volumes](./04-persistent-volumes)
+
+## Additional Practice
+
+1. Update the ConfigMap and see if the pod picks up changes (hint: it won't
+   automatically - you need to restart the pod)
+2. Create a Secret from a file:
+   `kubectl create secret generic file-secret --from-file=./secret-file.txt -n practice-03`
+3. Mount a Secret as a volume instead of environment variables
diff --git a/docusaurus/docs/kubernetes/practice/04-persistent-volumes.md b/docusaurus/docs/kubernetes/practice/04-persistent-volumes.md
new file mode 100644
index 0000000..c15e796
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/04-persistent-volumes.md
@@ -0,0 +1,188 @@
+---
+title: 'Exercise 4: Persistent Volumes'
+---
+
+## Objective
+
+Learn how to use PersistentVolumeClaims (PVCs) to provide storage for
+applications. Understand the difference between ephemeral and persistent
+storage.
+
+## Prerequisites
+
+- Completed [Exercise 3: ConfigMaps and Secrets](./03-configmaps-and-secrets)
+- Understanding of Deployments and Pods
+
+## Exercise: Create an Application with Persistent Storage
+
+### Step 1: Create a PersistentVolumeClaim
+
+A PVC requests storage from the cluster. The cluster provisions it using a
+StorageClass.
+
+**YAML Version:** Create `namespace-pvc.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-04
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: app-storage
+  namespace: practice-04
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+  storageClassName: longhorn
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f namespace-pvc.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get pvc -n practice-04
+```
+
+Wait until the PVC shows `Bound` status. This means storage has been
+provisioned.
+
+### Step 2: Create a Deployment Using the PVC
+
+Now let's create a deployment that uses this persistent storage.
+
+**YAML Version:** Create `deployment.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: storage-app
+  namespace: practice-04
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: storage-app
+  template:
+    metadata:
+      labels:
+        app: storage-app
+    spec:
+      containers:
+        - name: app
+          image: busybox
+          command: ['/bin/sh']
+          args:
+            - -c
+            - |
+              echo "Writing to persistent storage..." > /data/message.txt
+              echo "Current time: $(date)" >> /data/message.txt
+              echo "Data persisted!" >> /data/message.txt
+              cat /data/message.txt
+              sleep 3600
+          volumeMounts:
+            - name: storage
+              mountPath: /data
+      volumes:
+        - name: storage
+          persistentVolumeClaim:
+            claimName: app-storage
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f deployment.yaml
+```
+
+**Wait for pod:**
+
+```bash
+kubectl wait --for=condition=ready pod -l app=storage-app -n practice-04 --timeout=60s
+```
+
+### Step 3: Verify Data Persistence
+
+Let's verify that data is being written to persistent storage:
+
+```bash
+# Check the pod logs
+kubectl logs -l app=storage-app -n practice-04
+
+# Check the data in the pod
+kubectl exec -it deployment/storage-app -n practice-04 -- cat /data/message.txt
+```
+
+### Step 4: Test Data Persistence
+
+The real test of persistent storage is that data survives pod restarts. Let's
+test this:
+
+```bash
+# Delete the pod (it will be recreated by the Deployment)
+kubectl delete pod -l app=storage-app -n practice-04
+
+# Wait for new pod
+kubectl wait --for=condition=ready pod -l app=storage-app -n practice-04 --timeout=60s
+
+# Check if data still exists
+kubectl exec -it deployment/storage-app -n practice-04 -- cat /data/message.txt
+```
+
+The data should still be there! This proves the storage is persistent.
+
+## Verification
+
+Verify persistent storage is working:
+
+```bash
+# Check PVC status
+kubectl get pvc -n practice-04
+
+# Check pod is using the volume
+kubectl describe pod -l app=storage-app -n practice-04 | grep -A 5 "Volumes:"
+
+# Verify data persistence after pod restart
+kubectl delete pod -l app=storage-app -n practice-04
+sleep 10
+kubectl exec -it deployment/storage-app -n practice-04 -- ls -la /data
+```
+
+## Understanding What Happened
+
+- **PersistentVolumeClaim (PVC)**: Requests storage from the cluster
+- **StorageClass**: Defines how storage is provisioned (we used `longhorn`)
+- **Volume Mount**: Makes the storage available inside the container
+- **Persistence**: Data survives pod restarts and deletions
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-04
+```
+
+This will also delete the PVC and the underlying volume.
+
+## Next Steps
+
+→ [Exercise 5: Longhorn Storage](./05-longhorn-storage)
+
+## Additional Practice
+
+1. Check available StorageClasses: `kubectl get storageclass`
+2. Create a PVC with a different size: Change `storage: 1Gi` to `storage: 2Gi`
+3. Try mounting the same PVC in multiple pods (hint: ReadWriteOnce means only
+   one pod can mount it)
diff --git a/docusaurus/docs/kubernetes/practice/05-longhorn-storage.md b/docusaurus/docs/kubernetes/practice/05-longhorn-storage.md
new file mode 100644
index 0000000..7233ffa
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/05-longhorn-storage.md
@@ -0,0 +1,217 @@
+---
+title: 'Exercise 5: Longhorn Storage'
+---
+
+## Objective
+
+Learn how to use Longhorn for distributed, replicated storage in your K3s
+cluster. Understand Longhorn-specific features and volume management.
+
+## Prerequisites
+
+- Completed [Exercise 4: Persistent Volumes](./04-persistent-volumes)
+- Longhorn installed in your cluster (check with
+  `kubectl get pods -n longhorn-system`)
+
+## Exercise: Use Longhorn for Distributed Storage
+
+### Step 1: Verify Longhorn Installation
+
+First, let's make sure Longhorn is running:
+
+```bash
+# Check Longhorn pods
+kubectl get pods -n longhorn-system
+
+# Check StorageClass
+kubectl get storageclass longhorn
+```
+
+If Longhorn isn't installed, you'll need to install it first (see
+[Longhorn Setup](../k3s-backup-longhorn) documentation).
+
+### Step 2: Create a PVC Using Longhorn
+
+Longhorn provides the `longhorn` StorageClass. Let's create a PVC that uses it
+explicitly.
+
+**YAML Version:** Create `longhorn-pvc.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-05
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: longhorn-volume
+  namespace: practice-05
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 2Gi
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f longhorn-pvc.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get pvc -n practice-05
+```
+
+Wait for `Bound` status.
+
+### Step 3: Check Longhorn Volume
+
+Longhorn creates a volume resource you can inspect:
+
+```bash
+# List Longhorn volumes
+kubectl get volumes -n longhorn-system
+
+# Describe the volume (replace with your volume name)
+kubectl get volumes -n longhorn-system -o name | head -1 | xargs kubectl describe -n longhorn-system
+```
+
+### Step 4: Create a Stateful Application
+
+Let's create a database-like application that benefits from persistent storage.
+
+**YAML Version:** Create `stateful-app.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: database-app
+  namespace: practice-05
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: database-app
+  template:
+    metadata:
+      labels:
+        app: database-app
+    spec:
+      containers:
+        - name: db
+          image: postgres:15-alpine
+          env:
+            - name: POSTGRES_DB
+              value: myapp
+            - name: POSTGRES_USER
+              value: admin
+            - name: POSTGRES_PASSWORD
+              value: password123
+          ports:
+            - containerPort: 5432
+          volumeMounts:
+            - name: data
+              mountPath: /var/lib/postgresql/data
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: longhorn-volume
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f stateful-app.yaml
+```
+
+**Wait for pod:**
+
+```bash
+kubectl wait --for=condition=ready pod -l app=database-app -n practice-05 --timeout=120s
+```
+
+### Step 5: Test Data Persistence
+
+Let's create some data and verify it persists:
+
+```bash
+# Create a test database
+kubectl exec -it deployment/database-app -n practice-05 -- \
+  psql -U admin -d myapp -c "CREATE TABLE test (id SERIAL, data TEXT);"
+
+# Insert some data
+kubectl exec -it deployment/database-app -n practice-05 -- \
+  psql -U admin -d myapp -c "INSERT INTO test (data) VALUES ('This data should persist');"
+
+# Verify data exists
+kubectl exec -it deployment/database-app -n practice-05 -- \
+  psql -U admin -d myapp -c "SELECT * FROM test;"
+```
+
+### Step 6: Test Longhorn Replication
+
+Longhorn replicates data across nodes. Let's verify the volume is replicated:
+
+```bash
+# Check volume replication status in Longhorn
+kubectl get volumes -n longhorn-system
+
+# Check Longhorn UI (if accessible)
+# kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80
+```
+
+## Verification
+
+Verify Longhorn storage is working correctly:
+
+```bash
+# Check PVC is bound
+kubectl get pvc -n practice-05
+
+# Check Longhorn volume exists
+kubectl get volumes -n longhorn-system
+
+# Verify data persists after pod restart
+kubectl delete pod -l app=database-app -n practice-05
+sleep 15
+kubectl exec -it deployment/database-app -n practice-05 -- \
+  psql -U admin -d myapp -c "SELECT * FROM test;"
+```
+
+## Understanding What Happened
+
+- **Longhorn StorageClass**: Provides distributed, replicated storage
+- **Volume Replication**: Longhorn replicates data across multiple nodes for
+  redundancy
+- **Persistent Storage**: Data survives pod restarts and node failures
+- **Stateful Applications**: Perfect for databases and applications that need
+  persistent data
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-05
+```
+
+This will delete the PVC and the Longhorn volume.
+
+## Next Steps
+
+→ [Exercise 6: Services and Networking](./06-services-and-networking)
+
+## Additional Practice
+
+1. Check Longhorn volume details:
+   `kubectl get volumes -n longhorn-system -o yaml`
+2. Create multiple PVCs and see how Longhorn manages them
+3. Check Longhorn UI to see volume replication status (if you have access to it)
diff --git a/docusaurus/docs/kubernetes/practice/06-services-and-networking.md b/docusaurus/docs/kubernetes/practice/06-services-and-networking.md
new file mode 100644
index 0000000..e9a9a1c
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/06-services-and-networking.md
@@ -0,0 +1,253 @@
+---
+title: 'Exercise 6: Services and Networking'
+---
+
+## Objective
+
+Learn about different Kubernetes Service types and networking patterns.
+Understand when to use ClusterIP, NodePort, and LoadBalancer services.
+
+## Prerequisites
+
+- Completed [Exercise 5: Longhorn Storage](./05-longhorn-storage)
+- Understanding of Deployments and Pods
+- MetalLB installed (for LoadBalancer type) - check with
+  `kubectl get pods -n metallb-system`
+
+## Exercise: Expose Applications with Different Service Types
+
+### Step 1: Create a Web Application
+
+Let's create a simple web application to expose with different service types.
+
+**YAML Version:** Create `app.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-06
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: web-app
+  namespace: practice-06
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: web-app
+  template:
+    metadata:
+      labels:
+        app: web-app
+    spec:
+      containers:
+        - name: nginx
+          image: nginx:latest
+          ports:
+            - containerPort: 80
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f app.yaml
+```
+
+**Wait for pods:**
+
+```bash
+kubectl wait --for=condition=ready pod -l app=web-app -n practice-06 --timeout=60s
+```
+
+### Step 2: ClusterIP Service (Default)
+
+ClusterIP services are only accessible within the cluster. This is the default
+service type.
+
+**YAML Version:** Create `service-clusterip.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: web-app-clusterip
+  namespace: practice-06
+spec:
+  type: ClusterIP
+  selector:
+    app: web-app
+  ports:
+    - port: 80
+      targetPort: 80
+      protocol: TCP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f service-clusterip.yaml
+```
+
+**Test (from within cluster):**
+
+```bash
+# Port forward to access from local machine
+kubectl port-forward svc/web-app-clusterip 8080:80 -n practice-06
+
+# In another terminal
+curl http://localhost:8080
+```
+
+### Step 3: NodePort Service
+
+NodePort services expose the application on a port on each node, making it
+accessible from outside the cluster.
+
+**YAML Version:** Create `service-nodeport.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: web-app-nodeport
+  namespace: practice-06
+spec:
+  type: NodePort
+  selector:
+    app: web-app
+  ports:
+    - port: 80
+      targetPort: 80
+      nodePort: 30080
+      protocol: TCP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f service-nodeport.yaml
+```
+
+**Verify:**
+
+```bash
+kubectl get svc web-app-nodeport -n practice-06
+```
+
+Note the `30080` port. You can access the service at `:30080` from
+outside the cluster.
+
+### Step 4: LoadBalancer Service
+
+LoadBalancer services get an external IP (via MetalLB in our setup) and are
+accessible from outside the cluster.
+
+**YAML Version:** Create `service-loadbalancer.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: web-app-loadbalancer
+  namespace: practice-06
+spec:
+  type: LoadBalancer
+  selector:
+    app: web-app
+  ports:
+    - port: 80
+      targetPort: 80
+      protocol: TCP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f service-loadbalancer.yaml
+```
+
+**Wait for external IP:**
+
+```bash
+kubectl get svc web-app-loadbalancer -n practice-06 -w
+```
+
+Once you see an `EXTERNAL-IP`, you can access the service at that IP from
+outside the cluster.
+
+### Step 5: Compare Service Types
+
+Let's see the differences:
+
+```bash
+# List all services
+kubectl get svc -n practice-06
+
+# Check endpoints (should be the same for all)
+kubectl get endpoints -n practice-06
+
+# Describe each service to see details
+kubectl describe svc web-app-clusterip -n practice-06
+kubectl describe svc web-app-nodeport -n practice-06
+kubectl describe svc web-app-loadbalancer -n practice-06
+```
+
+## Verification
+
+Verify all service types are working:
+
+```bash
+# Check all services
+kubectl get svc -n practice-06
+
+# Verify endpoints (all should point to the same pods)
+kubectl get endpoints -n practice-06
+
+# Test ClusterIP (port forward)
+kubectl port-forward svc/web-app-clusterip 8080:80 -n practice-06 &
+curl http://localhost:8080
+
+# Test NodePort (if you have node IP)
+# curl http://:30080
+
+# Test LoadBalancer (if external IP assigned)
+# curl http://
+```
+
+## Understanding What Happened
+
+- **ClusterIP**: Internal only, accessed via port-forward or from within cluster
+- **NodePort**: Exposed on each node at a specific port (30000-32767 range)
+- **LoadBalancer**: Gets external IP via MetalLB, accessible from outside
+- **Service Selectors**: All services route to the same pods via label selectors
+- **Endpoints**: Kubernetes automatically creates endpoints for services
+
+## When to Use Each Type
+
+- **ClusterIP**: Default for internal services, microservices communication
+- **NodePort**: Quick external access for development/testing
+- **LoadBalancer**: Production external access, integrates with ingress
+  controllers
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-06
+```
+
+## Next Steps
+
+→ [Exercise 7: CloudNative PG Basics](./07-cloudnative-pg-basics)
+
+## Additional Practice
+
+1. Create a service that selects pods by multiple labels
+2. Check service endpoints:
+   `kubectl get endpoints  -n practice-06`
+3. Scale the deployment and watch endpoints update automatically
+4. Try accessing the LoadBalancer service from outside the cluster
diff --git a/docusaurus/docs/kubernetes/practice/07-cloudnative-pg-basics.md b/docusaurus/docs/kubernetes/practice/07-cloudnative-pg-basics.md
new file mode 100644
index 0000000..8b6371c
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/07-cloudnative-pg-basics.md
@@ -0,0 +1,228 @@
+---
+title: 'Exercise 7: CloudNative PG Basics'
+---
+
+## Objective
+
+Learn how to create and manage PostgreSQL clusters using CloudNative PG. This
+exercise introduces database management in Kubernetes.
+
+## Prerequisites
+
+- Completed [Exercise 6: Services and Networking](./06-services-and-networking)
+- CloudNative PG operator installed (check with
+  `kubectl get pods -n cnpg-system`)
+- Longhorn storage available
+
+Verify CloudNative PG is installed:
+
+```bash
+kubectl get pods -n cnpg-system
+```
+
+You should see the `cloudnative-pg-controller-manager` pod running.
+
+## Exercise: Create a PostgreSQL Cluster
+
+### Step 1: Create Namespace and Secret
+
+PostgreSQL needs credentials. Let's create a secret for the superuser.
+
+**YAML Version:** Create `postgres-setup.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-07
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: postgres-credentials
+  namespace: practice-07
+type: Opaque
+stringData:
+  username: postgres
+  password: mysecurepassword123
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f postgres-setup.yaml
+```
+
+### Step 2: Create PostgreSQL Cluster
+
+Now let's create a PostgreSQL cluster with CloudNative PG.
+
+**YAML Version:** Create `postgres-cluster.yaml`:
+
+```yaml
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: practice-postgres
+  namespace: practice-07
+spec:
+  instances: 1
+  imageName: ghcr.io/cloudnative-pg/postgresql:15
+  primaryUpdateMethod: switchover
+  storage:
+    size: 2Gi
+    storageClass: longhorn
+  superuserSecret:
+    name: postgres-credentials
+  bootstrap:
+    initdb:
+      database: practice_db
+      owner: app_user
+      secret:
+        name: postgres-credentials
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f postgres-cluster.yaml
+```
+
+**Wait for cluster to be ready:**
+
+```bash
+kubectl wait --for=condition=ready cluster practice-postgres -n practice-07 --timeout=300s
+```
+
+This may take a few minutes as PostgreSQL initializes.
+
+### Step 3: Verify Cluster Status
+
+Check that the cluster is running:
+
+```bash
+# Check cluster status
+kubectl get cluster practice-postgres -n practice-07
+
+# Check pods
+kubectl get pods -n practice-07 -l cnpg.io/cluster=practice-postgres
+
+# Check services (CloudNative PG creates multiple services)
+kubectl get svc -n practice-07
+```
+
+You should see:
+
+- `practice-postgres-r` - Read service (any replica)
+- `practice-postgres-ro` - Read-only service (replicas only)
+- `practice-postgres-rw` - Read-write service (primary only)
+
+### Step 4: Connect to the Database
+
+Let's connect to the database and create some data.
+
+**Get the primary pod name:**
+
+```bash
+PRIMARY_POD=$(kubectl get pods -n practice-07 -l cnpg.io/cluster=practice-postgres,role=primary -o jsonpath='{.items[0].metadata.name}')
+echo $PRIMARY_POD
+```
+
+**Connect to PostgreSQL:**
+
+```bash
+kubectl exec -it $PRIMARY_POD -n practice-07 -- psql -U postgres -d practice_db
+```
+
+**In the PostgreSQL prompt, run:**
+
+```sql
+-- Create a table
+CREATE TABLE users (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100),
+    email VARCHAR(100)
+);
+
+-- Insert some data
+INSERT INTO users (name, email) VALUES
+    ('Alice', 'alice@example.com'),
+    ('Bob', 'bob@example.com');
+
+-- Query the data
+SELECT * FROM users;
+
+-- Exit
+\q
+```
+
+### Step 5: Use the Read-Write Service
+
+CloudNative PG provides services for different access patterns. Let's use the
+read-write service.
+
+**Port forward to the read-write service:**
+
+```bash
+kubectl port-forward svc/practice-postgres-rw 5432:5432 -n practice-07
+```
+
+**In another terminal, connect (if you have psql locally):**
+
+```bash
+# If you have psql installed locally
+psql -h localhost -U postgres -d practice_db
+```
+
+Or use kubectl exec as shown above.
+
+## Verification
+
+Verify the PostgreSQL cluster is working:
+
+```bash
+# Check cluster is ready
+kubectl get cluster practice-postgres -n practice-07
+
+# Check pods are running
+kubectl get pods -n practice-07
+
+# Check services
+kubectl get svc -n practice-07
+
+# Verify data exists
+PRIMARY_POD=$(kubectl get pods -n practice-07 -l role=primary -o jsonpath='{.items[0].metadata.name}')
+kubectl exec -it $PRIMARY_POD -n practice-07 -- \
+  psql -U postgres -d practice_db -c "SELECT * FROM users;"
+```
+
+## Understanding What Happened
+
+- **CloudNative PG Cluster**: Manages PostgreSQL instances in Kubernetes
+- **Storage**: Uses Longhorn for persistent storage
+- **Services**: Automatically creates read, read-only, and read-write services
+- **High Availability**: Can scale to multiple instances (we used 1 for
+  simplicity)
+- **Secrets**: Credentials managed via Kubernetes secrets
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-07
+```
+
+This will delete the cluster and all associated resources.
+
+## Next Steps
+
+→ [Exercise 8: CloudNative PG Advanced](./08-cloudnative-pg-advanced)
+
+## Additional Practice
+
+1. Scale the cluster to 3 instances: Update `instances: 3` in the cluster spec
+2. Check the different service types and understand when to use each
+3. Create a new database in the cluster
+4. Check cluster status:
+   `kubectl describe cluster practice-postgres -n practice-07`
diff --git a/docusaurus/docs/kubernetes/practice/08-cloudnative-pg-advanced.md b/docusaurus/docs/kubernetes/practice/08-cloudnative-pg-advanced.md
new file mode 100644
index 0000000..13c3a65
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/08-cloudnative-pg-advanced.md
@@ -0,0 +1,274 @@
+---
+title: 'Exercise 8: CloudNative PG Advanced'
+---
+
+## Objective
+
+Learn advanced CloudNative PG features including multi-instance clusters,
+backups, and connecting applications to databases.
+
+## Prerequisites
+
+- Completed [Exercise 7: CloudNative PG Basics](./07-cloudnative-pg-basics)
+- Understanding of PostgreSQL basics
+- CloudNative PG operator installed
+
+## Exercise: Multi-Instance Cluster and Application Integration
+
+### Step 1: Create a High-Availability PostgreSQL Cluster
+
+Let's create a 3-instance cluster for high availability.
+
+**YAML Version:** Create `ha-postgres-cluster.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-08
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: postgres-credentials
+  namespace: practice-08
+type: Opaque
+stringData:
+  username: postgres
+  password: mysecurepassword123
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: ha-postgres
+  namespace: practice-08
+spec:
+  instances: 3
+  imageName: ghcr.io/cloudnative-pg/postgresql:15
+  primaryUpdateMethod: switchover
+  storage:
+    size: 2Gi
+    storageClass: longhorn
+  superuserSecret:
+    name: postgres-credentials
+  bootstrap:
+    initdb:
+      database: app_db
+      owner: app_user
+      secret:
+        name: postgres-credentials
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f ha-postgres-cluster.yaml
+```
+
+**Wait for cluster:**
+
+```bash
+kubectl wait --for=condition=ready cluster ha-postgres -n practice-08 --timeout=300s
+```
+
+**Verify HA setup:**
+
+```bash
+# Check all instances are running
+kubectl get pods -n practice-08 -l cnpg.io/cluster=ha-postgres
+
+# Check which is primary
+kubectl get pods -n practice-08 -l cnpg.io/cluster=ha-postgres -l role=primary
+
+# Check replicas
+kubectl get pods -n practice-08 -l cnpg.io/cluster=ha-postgres -l role=replica
+```
+
+### Step 2: Create an Application That Uses the Database
+
+Let's create a simple application that connects to the database.
+
+**YAML Version:** Create `app-with-db.yaml`:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: app-config
+  namespace: practice-08
+data:
+  DATABASE_HOST: ha-postgres-rw
+  DATABASE_PORT: '5432'
+  DATABASE_NAME: app_db
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: app-db-credentials
+  namespace: practice-08
+type: Opaque
+stringData:
+  DATABASE_USER: app_user
+  DATABASE_PASSWORD: mysecurepassword123
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: app
+  namespace: practice-08
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: web-app
+  template:
+    metadata:
+      labels:
+        app: web-app
+    spec:
+      containers:
+        - name: app
+          image: postgres:15-alpine
+          command: ['/bin/sh']
+          args:
+            - -c
+            - |
+              echo "Connecting to database..."
+              echo "Host: $DATABASE_HOST"
+              echo "Port: $DATABASE_PORT"
+              echo "Database: $DATABASE_NAME"
+              echo "User: $DATABASE_USER"
+              # Test connection
+              PGPASSWORD=$DATABASE_PASSWORD psql -h $DATABASE_HOST -p $DATABASE_PORT -U $DATABASE_USER -d $DATABASE_NAME -c "SELECT version();"
+              echo "Connection successful!"
+              sleep 3600
+          envFrom:
+            - configMapRef:
+                name: app-config
+            - secretRef:
+                name: app-db-credentials
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: app
+  namespace: practice-08
+spec:
+  selector:
+    app: web-app
+  ports:
+    - port: 80
+      targetPort: 80
+  type: ClusterIP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f app-with-db.yaml
+```
+
+**Check app logs:**
+
+```bash
+kubectl logs -l app=web-app -n practice-08
+```
+
+You should see the database connection information and a successful connection
+message.
+
+### Step 3: Test High Availability
+
+Let's test what happens when the primary fails.
+
+**Get primary pod:**
+
+```bash
+PRIMARY_POD=$(kubectl get pods -n practice-08 -l role=primary -o jsonpath='{.items[0].metadata.name}')
+echo "Primary pod: $PRIMARY_POD"
+```
+
+**Create some data:**
+
+```bash
+kubectl exec -it $PRIMARY_POD -n practice-08 -- \
+  psql -U postgres -d app_db -c "CREATE TABLE IF NOT EXISTS test_data (id SERIAL, data TEXT);"
+
+kubectl exec -it $PRIMARY_POD -n practice-08 -- \
+  psql -U postgres -d app_db -c "INSERT INTO test_data (data) VALUES ('This should survive failover');"
+```
+
+**Simulate primary failure (delete the primary pod):**
+
+```bash
+kubectl delete pod $PRIMARY_POD -n practice-08
+```
+
+**Watch CloudNative PG promote a new primary:**
+
+```bash
+kubectl get pods -n practice-08 -l cnpg.io/cluster=ha-postgres -w
+```
+
+After a few moments, a replica will be promoted to primary.
+
+**Verify data survived:**
+
+```bash
+NEW_PRIMARY=$(kubectl get pods -n practice-08 -l role=primary -o jsonpath='{.items[0].metadata.name}')
+kubectl exec -it $NEW_PRIMARY -n practice-08 -- \
+  psql -U postgres -d app_db -c "SELECT * FROM test_data;"
+```
+
+The data should still be there!
+
+## Verification
+
+Verify the HA cluster and application integration:
+
+```bash
+# Check cluster status
+kubectl get cluster ha-postgres -n practice-08
+
+# Check all instances
+kubectl get pods -n practice-08 -l cnpg.io/cluster=ha-postgres
+
+# Verify application can connect
+kubectl logs -l app=web-app -n practice-08
+
+# Check data persistence after failover
+kubectl get pods -n practice-08 -l role=primary
+PRIMARY=$(kubectl get pods -n practice-08 -l role=primary -o jsonpath='{.items[0].metadata.name}')
+kubectl exec -it $PRIMARY -n practice-08 -- \
+  psql -U postgres -d app_db -c "SELECT COUNT(*) FROM test_data;"
+```
+
+## Understanding What Happened
+
+- **High Availability**: 3-instance cluster provides redundancy
+- **Automatic Failover**: CloudNative PG promotes a replica when primary fails
+- **Service Abstraction**: Using `ha-postgres-rw` service means apps don't need
+  to know which pod is primary
+- **Data Persistence**: Data survives pod failures and failovers
+- **Application Integration**: Apps connect via services, not direct pod access
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-08
+```
+
+## Next Steps
+
+→ [Exercise 10: Complete Application](./10-complete-application)
+
+## Additional Practice
+
+1. Scale the cluster to 5 instances and observe replication
+2. Check cluster events: `kubectl get events -n practice-08`
+3. Manually trigger a switchover:
+   `kubectl cnpg promote  -n practice-08` (if cnpg plugin is
+   installed)
+4. Monitor replication lag between primary and replicas
diff --git a/docusaurus/docs/kubernetes/practice/10-complete-application.md b/docusaurus/docs/kubernetes/practice/10-complete-application.md
new file mode 100644
index 0000000..9ea89c4
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/10-complete-application.md
@@ -0,0 +1,376 @@
+---
+title: 'Exercise 10: Complete Application'
+---
+
+## Objective
+
+Build a complete, production-like application stack using everything you've
+learned: Deployments, Services, ConfigMaps, Secrets, Persistent Volumes,
+Longhorn, and CloudNative PG.
+
+## Prerequisites
+
+- Completed all previous exercises (01-08)
+- Understanding of all concepts covered so far
+- CloudNative PG and Longhorn installed
+
+## Exercise: Full-Stack Application
+
+We'll build a complete application with:
+
+- Web frontend (nginx)
+- Backend API (simple app)
+- PostgreSQL database (CloudNative PG)
+- Persistent storage (Longhorn)
+- Configuration management (ConfigMaps/Secrets)
+- Services for networking
+
+### Step 1: Create Namespace and Database
+
+**YAML Version:** Create `01-database.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: practice-complete
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: postgres-credentials
+  namespace: practice-complete
+type: Opaque
+stringData:
+  username: postgres
+  password: securepassword123
+---
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: app-database
+  namespace: practice-complete
+spec:
+  instances: 1
+  imageName: ghcr.io/cloudnative-pg/postgresql:15
+  primaryUpdateMethod: switchover
+  storage:
+    size: 5Gi
+    storageClass: longhorn
+  superuserSecret:
+    name: postgres-credentials
+  bootstrap:
+    initdb:
+      database: myapp
+      owner: app_user
+      secret:
+        name: postgres-credentials
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f 01-database.yaml
+```
+
+**Wait for database:**
+
+```bash
+kubectl wait --for=condition=ready cluster app-database -n practice-complete --timeout=300s
+```
+
+### Step 2: Create Application Configuration
+
+**YAML Version:** Create `02-config.yaml`:
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: app-config
+  namespace: practice-complete
+data:
+  APP_NAME: 'Complete Practice App'
+  APP_ENV: 'production'
+  DATABASE_HOST: 'app-database-rw'
+  DATABASE_PORT: '5432'
+  DATABASE_NAME: 'myapp'
+---
+apiVersion: v1
+kind: Secret
+metadata:
+  name: app-secrets
+  namespace: practice-complete
+type: Opaque
+stringData:
+  DATABASE_USER: 'app_user'
+  DATABASE_PASSWORD: 'securepassword123'
+  API_KEY: 'secret-api-key-12345'
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f 02-config.yaml
+```
+
+### Step 3: Create Backend API
+
+**YAML Version:** Create `03-backend.yaml`:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: backend-api
+  namespace: practice-complete
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: backend
+  template:
+    metadata:
+      labels:
+        app: backend
+    spec:
+      containers:
+        - name: api
+          image: postgres:15-alpine
+          command: ['/bin/sh']
+          args:
+            - -c
+            - |
+              echo "Backend API Starting..."
+              echo "Database: $DATABASE_HOST:$DATABASE_PORT/$DATABASE_NAME"
+              echo "API Key: $API_KEY"
+              # Simulate API server
+              while true; do
+                echo "$(date): API server running, connected to database"
+                PGPASSWORD=$DATABASE_PASSWORD psql -h $DATABASE_HOST -p $DATABASE_PORT -U $DATABASE_USER -d $DATABASE_NAME -c "SELECT NOW();" 2>/dev/null || echo "Database connection check..."
+                sleep 30
+              done
+          envFrom:
+            - configMapRef:
+                name: app-config
+            - secretRef:
+                name: app-secrets
+          ports:
+            - containerPort: 8080
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: backend-api
+  namespace: practice-complete
+spec:
+  selector:
+    app: backend
+  ports:
+    - port: 8080
+      targetPort: 8080
+  type: ClusterIP
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f 03-backend.yaml
+```
+
+### Step 4: Create Frontend with Persistent Storage
+
+**YAML Version:** Create `04-frontend.yaml`:
+
+```yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: frontend-storage
+  namespace: practice-complete
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: frontend
+  namespace: practice-complete
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: frontend
+  template:
+    metadata:
+      labels:
+        app: frontend
+    spec:
+      containers:
+        - name: nginx
+          image: nginx:latest
+          ports:
+            - containerPort: 80
+          volumeMounts:
+            - name: storage
+              mountPath: /usr/share/nginx/html/data
+            - name: config
+              mountPath: /etc/nginx/conf.d
+          env:
+            - name: BACKEND_URL
+              value: 'http://backend-api:8080'
+      volumes:
+        - name: storage
+          persistentVolumeClaim:
+            claimName: frontend-storage
+        - name: config
+          configMap:
+            name: app-config
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: frontend
+  namespace: practice-complete
+spec:
+  selector:
+    app: frontend
+  ports:
+    - port: 80
+      targetPort: 80
+  type: LoadBalancer
+```
+
+**Apply:**
+
+```bash
+kubectl apply -f 04-frontend.yaml
+```
+
+**Wait for LoadBalancer IP:**
+
+```bash
+kubectl get svc frontend -n practice-complete -w
+```
+
+### Step 5: Initialize Database Schema
+
+**Create database schema:**
+
+```bash
+PRIMARY_POD=$(kubectl get pods -n practice-complete -l cnpg.io/cluster=app-database,role=primary -o jsonpath='{.items[0].metadata.name}')
+
+kubectl exec -it $PRIMARY_POD -n practice-complete -- \
+  psql -U postgres -d myapp -c "
+    CREATE TABLE IF NOT EXISTS users (
+      id SERIAL PRIMARY KEY,
+      name VARCHAR(100),
+      email VARCHAR(100),
+      created_at TIMESTAMP DEFAULT NOW()
+    );
+
+    CREATE TABLE IF NOT EXISTS posts (
+      id SERIAL PRIMARY KEY,
+      user_id INTEGER REFERENCES users(id),
+      title VARCHAR(200),
+      content TEXT,
+      created_at TIMESTAMP DEFAULT NOW()
+    );
+
+    INSERT INTO users (name, email) VALUES
+      ('Alice', 'alice@example.com'),
+      ('Bob', 'bob@example.com');
+
+    INSERT INTO posts (user_id, title, content) VALUES
+      (1, 'First Post', 'This is my first post!'),
+      (2, 'Hello World', 'Hello from Bob!');
+  "
+```
+
+## Verification
+
+Verify the complete application stack:
+
+```bash
+# Check all components
+kubectl get all -n practice-complete
+
+# Check database cluster
+kubectl get cluster app-database -n practice-complete
+
+# Check persistent volumes
+kubectl get pvc -n practice-complete
+
+# Check services
+kubectl get svc -n practice-complete
+
+# Check backend logs
+kubectl logs -l app=backend -n practice-complete
+
+# Verify database data
+PRIMARY_POD=$(kubectl get pods -n practice-complete -l role=primary -o jsonpath='{.items[0].metadata.name}')
+kubectl exec -it $PRIMARY_POD -n practice-complete -- \
+  psql -U postgres -d myapp -c "SELECT * FROM users;"
+kubectl exec -it $PRIMARY_POD -n practice-complete -- \
+  psql -U postgres -d myapp -c "SELECT * FROM posts;"
+
+# Test frontend (if LoadBalancer IP assigned)
+# curl http://
+```
+
+## Understanding What Happened
+
+You've built a complete application stack with:
+
+- **Database**: CloudNative PG managing PostgreSQL with persistent storage
+- **Backend**: API service connecting to database via ConfigMap/Secret
+- **Frontend**: Web server with persistent storage for static files
+- **Networking**: Services connecting components
+- **Storage**: Longhorn providing persistent volumes
+- **Configuration**: ConfigMaps and Secrets managing app config
+- **High Availability**: Multiple replicas for frontend and backend
+
+This is a production-like setup!
+
+## Cleanup
+
+Remove all resources:
+
+```bash
+kubectl delete namespace practice-complete
+```
+
+This will clean up everything: database, applications, storage, and
+configuration.
+
+## Congratulations! 🎉
+
+You've completed the practice exercises! You now understand:
+
+- Basic Kubernetes resources
+- Namespace organization
+- Configuration management
+- Persistent storage with Longhorn
+- Database management with CloudNative PG
+- Complete application stacks
+
+## Next Steps
+
+- Explore [K3s Maintenance](../k3s-maintenance) guides
+- Learn about [Backup Strategies](../k3s-backup)
+- Check out [ArgoCD Setup](../setup-argocd) for GitOps
+- Review [Common Kubernetes Commands](../common-kubernetes-commands) reference
+
+## Additional Practice Ideas
+
+1. Add an Ingress resource to route traffic to the frontend
+2. Scale the backend to 5 replicas and observe load distribution
+3. Create a backup of the database using CloudNative PG backups
+4. Add monitoring and logging
+5. Implement health checks and readiness probes
+6. Set up resource limits and requests
diff --git a/docusaurus/docs/kubernetes/practice/getting-started.md b/docusaurus/docs/kubernetes/practice/getting-started.md
new file mode 100644
index 0000000..8cf66c5
--- /dev/null
+++ b/docusaurus/docs/kubernetes/practice/getting-started.md
@@ -0,0 +1,153 @@
+---
+title: Practice Makes Perfect 🥷🏻🚀
+---
+
+At this point, your Raspberry Pis should be configured, and you should have a
+basic understanding of Kubernetes. Most importantly, you know why you're
+learning all of this. Now, let's move into the practical side of things by using
+[`kubectl`](https://kubernetes.io/docs/reference/kubectl/) (pronounced
+"kube-control").
+
+Until we start using tools like [`helm`](https://helm.sh/),
+[`kubectl`](https://kubernetes.io/docs/reference/kubectl/) will be your best
+friend. As I've mentioned before in previous sections or during my
+[live streams](https://www.twitch.tv/programmer_network), we should add tools
+and abstractions only **once** the work becomes repetitive and frustrating.
+
+In this case, we aren't going to use [`helm`](https://helm.sh/) until we've
+learned how to use [`kubectl`](https://kubernetes.io/docs/reference/kubectl/)
+thoroughly and memorized the key commands. Mastering the basics will help us
+build a strong foundation and make it clear when it's time to introduce new
+abstractions.
+
+## Learning Path Overview
+
+This practice section is organized into a progressive learning path that starts
+with basic Kubernetes concepts and gradually introduces the infrastructure
+components you'll use in production. Each exercise builds on the previous one,
+so it's best to follow them in order.
+
+### Prerequisites
+
+Before starting these exercises, make sure you have:
+
+- K3s cluster running and accessible
+- `kubectl` configured and connected to your cluster
+- Basic understanding of containers and YAML
+- Access to your cluster via `kubectl get nodes` (should show your nodes)
+
+### How to Use This Guide
+
+1. **Start with Beginner exercises** - Even if you have some experience, these
+   establish the foundation
+2. **Complete exercises in order** - Each builds on concepts from previous ones
+3. **Practice both ways** - Try commands first, then create YAML files
+4. **Verify your work** - Always run the verification steps
+5. **Clean up** - Remove resources after each exercise to keep your cluster
+   clean
+
+## Practice Exercises
+
+### Beginner Level
+
+Start here if you're new to Kubernetes. These exercises cover the fundamental
+concepts you'll use everywhere.
+
+1. **[Basic Kubernetes Resources](./01-basic-kubernetes)** - Pods, Deployments,
+   and Services
+2. **[Namespaces and Resources](./02-namespaces-and-resources)** - Organizing
+   your cluster
+3. **[ConfigMaps and Secrets](./03-configmaps-and-secrets)** - Managing
+   configuration and sensitive data
+
+### Intermediate Level
+
+Once you're comfortable with basics, these exercises introduce storage and
+networking concepts.
+
+4. **[Persistent Volumes](./04-persistent-volumes)** - Understanding storage in
+   Kubernetes
+5. **[Longhorn Storage](./05-longhorn-storage)** - Using Longhorn for persistent
+   storage
+6. **[Services and Networking](./06-services-and-networking)** - Different
+   service types and networking patterns
+
+### Advanced Level
+
+These exercises combine everything you've learned and introduce database
+management and complete application stacks.
+
+7. **[CloudNative PG Basics](./07-cloudnative-pg-basics)** - Creating and
+   managing PostgreSQL clusters
+8. **[CloudNative PG Advanced](./08-cloudnative-pg-advanced)** - Database
+   management, backups, and advanced features
+9. **[Complete Application](./10-complete-application)** - Full-stack
+   application with database, storage, and services
+
+## Progression Guide
+
+### Level 1: Basic Kubernetes (Exercises 01-03)
+
+**What you'll learn:**
+
+- Creating and managing Pods, Deployments, and Services
+- Working with namespaces
+- Managing configuration with ConfigMaps and Secrets
+
+**Infrastructure components:** None yet - pure Kubernetes basics
+
+**Time estimate:** 1-2 hours
+
+### Level 2: Storage and Networking (Exercises 04-06)
+
+**What you'll learn:**
+
+- PersistentVolumeClaims and storage concepts
+- Using Longhorn for distributed storage
+- Different service types and networking patterns
+
+**Infrastructure components:** Longhorn
+
+**Time estimate:** 2-3 hours
+
+### Level 3: Databases and Advanced Topics (Exercises 07-10)
+
+**What you'll learn:**
+
+- Managing PostgreSQL with CloudNative PG
+- Database backups and recovery
+- Building complete application stacks
+
+**Infrastructure components:** CloudNative PG, Longhorn
+
+**Time estimate:** 3-4 hours
+
+## Tips for Success
+
+1. **Don't skip verification steps** - They help you understand what's happening
+2. **Read error messages carefully** - Kubernetes error messages are usually
+   helpful
+3. **Use `kubectl describe`** - When something doesn't work, describe the
+   resource to see what's wrong
+4. **Clean up after each exercise** - Keeps your cluster manageable and helps
+   you learn cleanup patterns
+5. **Experiment** - Once you complete an exercise, try modifying it to see what
+   happens
+
+## Getting Help
+
+If you get stuck:
+
+1. Check the resource status: `kubectl get  -n `
+2. Describe the resource:
+   `kubectl describe   -n `
+3. Check events: `kubectl get events -n `
+4. Review logs: `kubectl logs  -n `
+
+## Ready to Start?
+
+Begin with [Exercise 1: Basic Kubernetes Resources](./01-basic-kubernetes) and
+work through the exercises in order. Remember, the goal isn't to rush through
+them—it's to build a solid understanding of how Kubernetes works in practice.
+
+Good luck, and have fun! 🚀
diff --git a/docusaurus/sidebars.ts b/docusaurus/sidebars.ts
index f2309c3..cdad1f2 100644
--- a/docusaurus/sidebars.ts
+++ b/docusaurus/sidebars.ts
@@ -1,4 +1,4 @@
-import type { SidebarsConfig } from "@docusaurus/plugin-content-docs";
+import type { SidebarsConfig } from '@docusaurus/plugin-content-docs'
 
 // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...)
 
@@ -15,407 +15,470 @@ import type { SidebarsConfig } from "@docusaurus/plugin-content-docs";
 const sidebars: SidebarsConfig = {
   tutorialSidebar: [
     {
-      type: "category",
-      label: "Welcome",
+      type: 'category',
+      label: 'Welcome',
       items: [
         {
-          type: "doc",
-          label: "Welcome",
-          id: "welcome",
+          type: 'doc',
+          label: 'Welcome',
+          id: 'welcome',
         },
         {
-          type: "doc",
-          label: "Why?",
-          id: "why",
+          type: 'doc',
+          label: 'Why?',
+          id: 'why',
         },
         {
-          type: "doc",
+          type: 'doc',
           label: "The reason why it's hard",
-          id: "why-is-it-hard",
+          id: 'why-is-it-hard',
         },
         {
-          type: "doc",
-          label: "The outcome",
-          id: "what-we-will-learn",
+          type: 'doc',
+          label: 'The outcome',
+          id: 'what-we-will-learn',
         },
       ],
     },
     {
-      type: "category",
-      label: "Hardware",
+      type: 'category',
+      label: 'Hardware',
       items: [
         {
-          type: "doc",
-          label: "Components",
-          id: "hardware-raspberry-pi-setup/hardware",
+          type: 'doc',
+          label: 'Components',
+          id: 'hardware-raspberry-pi-setup/hardware',
         },
         {
-          type: "category",
-          label: "Setup",
+          type: 'category',
+          label: 'Setup',
           items: [
             {
-              type: "doc",
-              label: "Before We Start",
-              id: "hardware-raspberry-pi-setup/before-we-start",
+              type: 'doc',
+              label: 'Before We Start',
+              id: 'hardware-raspberry-pi-setup/before-we-start',
             },
             {
-              type: "doc",
-              label: "Raspberry Pis",
-              id: "hardware-raspberry-pi-setup/raspberry-pi-setup",
+              type: 'doc',
+              label: 'Raspberry Pis',
+              id: 'hardware-raspberry-pi-setup/raspberry-pi-setup',
             },
             {
-              type: "doc",
-              label: "Mini PCs",
-              id: "hardware-raspberry-pi-setup/mini-pcs-setup",
+              type: 'doc',
+              label: 'Mini PCs',
+              id: 'hardware-raspberry-pi-setup/mini-pcs-setup',
             },
           ],
         },
       ],
     },
     {
-      type: "category",
-      label: "Mikrotik",
+      type: 'category',
+      label: 'Mikrotik',
       items: [
         {
-          type: "doc",
-          label: "Why Mikrotik?",
-          id: "networking/mikrotik/why-mikrotik",
+          type: 'doc',
+          label: 'Why Mikrotik?',
+          id: 'networking/mikrotik/why-mikrotik',
         },
         {
-          type: "doc",
-          label: "Network Overview",
-          id: "networking/mikrotik/network-overview",
+          type: 'doc',
+          label: 'Network Overview',
+          id: 'networking/mikrotik/network-overview',
         },
         {
-          type: "doc",
-          label: "Core Concepts",
-          id: "networking/mikrotik/core-concepts",
+          type: 'doc',
+          label: 'Core Concepts',
+          id: 'networking/mikrotik/core-concepts',
         },
         {
-          type: "doc",
-          label: "MikroTik RouterOS on Lenovo M920q",
-          id: "networking/mikrotik/lenovo-m920q-roas",
+          type: 'doc',
+          label: 'MikroTik RouterOS on Lenovo M920q',
+          id: 'networking/mikrotik/lenovo-m920q-roas',
         },
         {
-          type: "doc",
-          label: "VLAN Schema",
-          id: "networking/mikrotik/vlan-schema",
+          type: 'doc',
+          label: 'VLAN Schema',
+          id: 'networking/mikrotik/vlan-schema',
         },
         {
-          type: "doc",
-          label: "Device Configuration",
-          id: "networking/mikrotik/device-configuration",
+          type: 'doc',
+          label: 'Device Configuration',
+          id: 'networking/mikrotik/device-configuration',
         },
         {
-          type: "doc",
-          label: "Firewall Logic",
-          id: "networking/mikrotik/firewall-logic",
+          type: 'doc',
+          label: 'Firewall Logic',
+          id: 'networking/mikrotik/firewall-logic',
         },
         {
-          type: "doc",
-          label: "Configure Email",
-          id: "networking/mikrotik/configure-email-on-mikrotik",
+          type: 'doc',
+          label: 'Configure Email',
+          id: 'networking/mikrotik/configure-email-on-mikrotik',
         },
         {
-          type: "doc",
-          label: "Dynamic DNS with Cloudflare",
-          id: "networking/mikrotik/dynamic-dns-with-cloudflare",
+          type: 'doc',
+          label: 'Dynamic DNS with Cloudflare',
+          id: 'networking/mikrotik/dynamic-dns-with-cloudflare',
         },
         {
-          type: "doc",
-          label: "Common Scenarios",
-          id: "networking/mikrotik/common-scenarios",
+          type: 'doc',
+          label: 'Common Scenarios',
+          id: 'networking/mikrotik/common-scenarios',
         },
         {
-          type: "doc",
-          label: "Summary & Checklist",
-          id: "networking/mikrotik/summary-and-checklist",
+          type: 'doc',
+          label: 'Summary & Checklist',
+          id: 'networking/mikrotik/summary-and-checklist',
         },
       ],
     },
     {
-      type: "category",
-      label: "Kubernetes",
+      type: 'category',
+      label: 'Kubernetes',
       items: [
         // 1. Getting Started (Setup)
         {
-          type: "doc",
-          label: "K3s Setup",
-          id: "kubernetes/k3s-setup",
+          type: 'doc',
+          label: 'K3s Setup',
+          id: 'kubernetes/k3s-setup',
         },
         // 2. Core Concepts
         {
-          type: "category",
-          label: "Core Concepts",
+          type: 'category',
+          label: 'Core Concepts',
           items: [
             {
-              type: "doc",
-              label: "What Is Kubernetes",
-              id: "kubernetes/what-is-kubernetes",
+              type: 'doc',
+              label: 'What Is Kubernetes',
+              id: 'kubernetes/what-is-kubernetes',
             },
             {
-              type: "doc",
-              label: "Anatomy of a kubectl Command",
-              id: "kubernetes/anatomy-of-kubectl-command",
+              type: 'doc',
+              label: 'Anatomy of a kubectl Command',
+              id: 'kubernetes/anatomy-of-kubectl-command',
             },
             {
-              type: "doc",
-              label: "Anatomy of a Kubernetes YAML",
-              id: "kubernetes/anatomy-of-kubernetes-yaml",
+              type: 'doc',
+              label: 'Anatomy of a Kubernetes YAML',
+              id: 'kubernetes/anatomy-of-kubernetes-yaml',
             },
             {
-              type: "doc",
-              label: "Kubernetes 80/20 Rule",
-              id: "kubernetes/kubernetes-80-20-rule",
+              type: 'doc',
+              label: 'Kubernetes YAML Structure',
+              id: 'kubernetes/kubernetes-yml-structure',
+            },
+            {
+              type: 'doc',
+              label: 'Common Kubernetes Commands',
+              id: 'kubernetes/common-kubernetes-commands',
+            },
+            {
+              type: 'doc',
+              label: 'Kubernetes 80/20 Rule',
+              id: 'kubernetes/kubernetes-80-20-rule',
             },
           ],
         },
         // 3. Infrastructure Components
         {
-          type: "category",
-          label: "Storage",
+          type: 'category',
+          label: 'Storage',
           items: [
             {
-              type: "doc",
-              label: "Understanding Longhorn Concepts",
-              id: "storage/understanding-longhorn-concepts",
+              type: 'doc',
+              label: 'Understanding Longhorn Concepts',
+              id: 'storage/understanding-longhorn-concepts',
             },
             {
-              type: "doc",
-              label: "Setup Longhorn",
-              id: "storage/setup-longhorn",
+              type: 'doc',
+              label: 'Setup Longhorn',
+              id: 'storage/setup-longhorn',
             },
             {
-              type: "doc",
-              label: "Setup Longhorn Dashboard",
-              id: "storage/setup-longhorn-dashboard",
+              type: 'doc',
+              label: 'Setup Longhorn Dashboard',
+              id: 'storage/setup-longhorn-dashboard',
             },
           ],
         },
         {
-          type: "category",
-          label: "Networking",
+          type: 'category',
+          label: 'Networking',
           items: [
             {
-              type: "doc",
-              label: "Kubernetes Networking Explained",
-              id: "networking/kubernetes-networking-explained",
+              type: 'doc',
+              label: 'Kubernetes Networking Explained',
+              id: 'networking/kubernetes-networking-explained',
             },
             {
-              type: "doc",
-              label: "Understanding Network Components",
-              id: "networking/understanding-network-components",
+              type: 'doc',
+              label: 'Understanding Network Components',
+              id: 'networking/understanding-network-components',
             },
             {
-              type: "doc",
-              label: "Expose Traefik Dashboard Inside the K3s Cluster",
-              id: "networking/expose-traefik-dashboard-inside-the-k3s-cluster",
+              type: 'doc',
+              label: 'Expose Traefik Dashboard Inside the K3s Cluster',
+              id: 'networking/expose-traefik-dashboard-inside-the-k3s-cluster',
             },
             {
-              type: "doc",
-              label: "Setup MetalLB",
-              id: "networking/setup-metallb",
+              type: 'doc',
+              label: 'Setup MetalLB',
+              id: 'networking/setup-metallb',
             },
           ],
         },
         {
-          type: "category",
-          label: "GitOps",
+          type: 'category',
+          label: 'GitOps',
           items: [
             {
-              type: "doc",
-              label: "Setup ArgoCD",
-              id: "kubernetes/setup-argocd",
+              type: 'doc',
+              label: 'Setup ArgoCD',
+              id: 'kubernetes/setup-argocd',
             },
           ],
         },
         {
-          type: "category",
-          label: "Secrets Management",
+          type: 'category',
+          label: 'Secrets Management',
           items: [
             {
-              type: "doc",
-              label: "Setup Vault",
-              id: "kubernetes/setup-vault",
+              type: 'doc',
+              label: 'Setup Vault',
+              id: 'kubernetes/setup-vault',
             },
           ],
         },
         // 4. Operations
         {
-          type: "category",
-          label: "K3s Backup",
+          type: 'category',
+          label: 'K3s Backup',
           items: [
             {
-              type: "doc",
-              label: "Overview",
-              id: "kubernetes/k3s-backup",
+              type: 'doc',
+              label: 'Overview',
+              id: 'kubernetes/k3s-backup',
             },
             {
-              type: "doc",
-              label: "etcd Snapshots",
-              id: "kubernetes/k3s-backup-etcd",
+              type: 'doc',
+              label: 'etcd Snapshots',
+              id: 'kubernetes/k3s-backup-etcd',
             },
             {
-              type: "doc",
-              label: "Longhorn Backups",
-              id: "kubernetes/k3s-backup-longhorn",
+              type: 'doc',
+              label: 'Longhorn Backups',
+              id: 'kubernetes/k3s-backup-longhorn',
             },
             {
-              type: "doc",
-              label: "Velero Backups",
-              id: "kubernetes/k3s-backup-velero",
+              type: 'doc',
+              label: 'Velero Backups',
+              id: 'kubernetes/k3s-backup-velero',
             },
             {
-              type: "doc",
-              label: "CloudNative PG Backups",
-              id: "kubernetes/k3s-backup-cloudnative-pg",
+              type: 'doc',
+              label: 'CloudNative PG Backups',
+              id: 'kubernetes/k3s-backup-cloudnative-pg',
             },
             {
-              type: "doc",
-              label: "Disaster Recovery",
-              id: "kubernetes/k3s-backup-disaster-recovery",
+              type: 'doc',
+              label: 'Disaster Recovery',
+              id: 'kubernetes/k3s-backup-disaster-recovery',
             },
           ],
         },
         {
-          type: "category",
-          label: "K3s Maintenance",
+          type: 'category',
+          label: 'K3s Maintenance',
           items: [
             {
-              type: "doc",
-              label: "Overview",
-              id: "kubernetes/k3s-maintenance",
+              type: 'doc',
+              label: 'Overview',
+              id: 'kubernetes/k3s-maintenance',
             },
             {
-              type: "doc",
-              label: "Updating K3s",
-              id: "kubernetes/k3s-maintenance-updates",
+              type: 'doc',
+              label: 'Updating K3s',
+              id: 'kubernetes/k3s-maintenance-updates',
             },
             {
-              type: "doc",
-              label: "Health Checks",
-              id: "kubernetes/k3s-maintenance-health",
+              type: 'doc',
+              label: 'Health Checks',
+              id: 'kubernetes/k3s-maintenance-health',
             },
             {
-              type: "doc",
-              label: "Troubleshooting",
-              id: "kubernetes/k3s-maintenance-troubleshooting",
+              type: 'doc',
+              label: 'Troubleshooting',
+              id: 'kubernetes/k3s-maintenance-troubleshooting',
             },
             {
-              type: "doc",
-              label: "Node Management",
-              id: "kubernetes/k3s-maintenance-nodes",
+              type: 'doc',
+              label: 'Node Management',
+              id: 'kubernetes/k3s-maintenance-nodes',
             },
           ],
         },
         // 5. Applications
         {
-          type: "category",
-          label: "Databases",
+          type: 'category',
+          label: 'Databases',
           items: [
             {
-              type: "doc",
-              label: "Databases Within Kubernetes",
-              id: "databases/databases-within-kubernetes",
+              type: 'doc',
+              label: 'Databases Within Kubernetes',
+              id: 'databases/databases-within-kubernetes',
             },
             {
-              type: "doc",
-              label: "Setup CloudNative PG",
-              id: "databases/setup-cloudnative-pg",
+              type: 'doc',
+              label: 'Setup CloudNative PG',
+              id: 'databases/setup-cloudnative-pg',
             },
           ],
         },
         // 6. Practice
         {
-          type: "category",
-          label: "Practice",
+          type: 'category',
+          label: 'Practice',
           items: [
             {
-              type: "doc",
-              label: "Kubernetes YML Structure",
-              id: "kubernetes/kubernetes-yml-structure",
-            },
-            {
-              type: "doc",
-              label: "Getting Started With Kubernetes",
-              id: "kubernetes/getting-started-with-kubernetes",
-            },
-            {
-              type: "doc",
-              label: "Common Kubernetes Commands",
-              id: "kubernetes/common-kubernetes-commands",
+              type: 'doc',
+              label: 'Getting Started',
+              id: 'kubernetes/practice/getting-started',
+            },
+            {
+              type: 'category',
+              label: 'Beginner',
+              items: [
+                {
+                  type: 'doc',
+                  label: 'Basic Kubernetes',
+                  id: 'kubernetes/practice/basic-kubernetes',
+                },
+                {
+                  type: 'doc',
+                  label: 'Namespaces & Resources',
+                  id: 'kubernetes/practice/namespaces-and-resources',
+                },
+                {
+                  type: 'doc',
+                  label: 'ConfigMaps & Secrets',
+                  id: 'kubernetes/practice/configmaps-and-secrets',
+                },
+              ],
+            },
+            {
+              type: 'category',
+              label: 'Intermediate',
+              items: [
+                {
+                  type: 'doc',
+                  label: 'Persistent Volumes',
+                  id: 'kubernetes/practice/persistent-volumes',
+                },
+                {
+                  type: 'doc',
+                  label: 'Longhorn Storage',
+                  id: 'kubernetes/practice/longhorn-storage',
+                },
+                {
+                  type: 'doc',
+                  label: 'Services & Networking',
+                  id: 'kubernetes/practice/services-and-networking',
+                },
+              ],
+            },
+            {
+              type: 'category',
+              label: 'Advanced',
+              items: [
+                {
+                  type: 'doc',
+                  label: 'CloudNative PG Basics',
+                  id: 'kubernetes/practice/cloudnative-pg-basics',
+                },
+                {
+                  type: 'doc',
+                  label: 'CloudNative PG Advanced',
+                  id: 'kubernetes/practice/cloudnative-pg-advanced',
+                },
+                {
+                  type: 'doc',
+                  label: 'Complete Application',
+                  id: 'kubernetes/practice/complete-application',
+                },
+              ],
             },
           ],
         },
       ],
     },
     {
-      type: "category",
-      label: "Tools",
+      type: 'category',
+      label: 'Tools',
       items: [
         {
-          type: "category",
-          label: "Automation",
+          type: 'category',
+          label: 'Automation',
           items: [
             {
-              type: "doc",
-              label: "Ansible",
-              id: "ansible/automation-with-ansible",
+              type: 'doc',
+              label: 'Ansible',
+              id: 'ansible/automation-with-ansible',
             },
           ],
         },
       ],
     },
   ],
-};
+}
 
 // Recursively number categories and doc items
-function numberSidebar(items, prefix = "") {
-  let count = 1;
+function numberSidebar(items, prefix = '') {
+  let count = 1
 
-  return items.map(item => {
-    const number = `${prefix}${count}`;
-    count++;
+  return items.map((item) => {
+    const number = `${prefix}${count}`
+    count++
 
     // Handle categories
-    if (item.type === "category") {
-      const newLabel = `${number}. ${item.label}`;
-      const numberedItems = numberSidebar(item.items, number + ".");
+    if (item.type === 'category') {
+      const newLabel = `${number}. ${item.label}`
+      const numberedItems = numberSidebar(item.items, number + '.')
       return {
         ...item,
         label: newLabel,
         items: numberedItems,
-      };
+      }
     }
 
     // Handle string items (doc IDs)
-    if (typeof item === "string") {
+    if (typeof item === 'string') {
       return {
-        type: "doc",
+        type: 'doc',
         id: item,
         label: `${number}. ${humanizeId(item)}`,
-      };
+      }
     }
 
     // Handle doc objects
-    if (item.type === "doc") {
+    if (item.type === 'doc') {
       return {
         ...item,
         label: `${number}. ${item.label || humanizeId(item.id)}`,
-      };
+      }
     }
 
-    return item;
-  });
+    return item
+  })
 }
 
 // Helper to turn 'quick-start/basic' → 'Basic'
 function humanizeId(id) {
-  const parts = id.split("/");
-  const last = parts[parts.length - 1];
-  return last.replace(/-/g, " ").replace(/\b\w/g, l => l.toUpperCase());
+  const parts = id.split('/')
+  const last = parts[parts.length - 1]
+  return last.replace(/-/g, ' ').replace(/\b\w/g, (l) => l.toUpperCase())
 }
 
 export default {
   tutorialSidebar: numberSidebar(sidebars.tutorialSidebar),
-};
+}