From 3999016b78858c2d13baca51b002e1e8d2c19706 Mon Sep 17 00:00:00 2001 From: Pim Kunis Date: Thu, 29 May 2025 12:37:50 +0200 Subject: [PATCH] Purge Longhorn --- docs/longhorn.md | 98 --------------------- globals.nix | 2 - modules/bootstrap-default.nix | 42 --------- modules/default.nix | 1 - modules/dummy-types.nix | 14 --- modules/longhorn-volume.nix | 157 ---------------------------------- 6 files changed, 314 deletions(-) delete mode 100644 docs/longhorn.md delete mode 100644 modules/longhorn-volume.nix diff --git a/docs/longhorn.md b/docs/longhorn.md deleted file mode 100644 index 3230543..0000000 --- a/docs/longhorn.md +++ /dev/null @@ -1,98 +0,0 @@ -# Longhorn notes - -## Troubleshooting - -``` -Multi-Attach error for volume "prowlarr" Volume is already exclusively attached to one node and can't be attached to another -``` - -I solved the above problem like this: -``` -❯ kubectl get volumeattachments | grep prowlarr -csi-f13ee1f46a4acc0d7e4abe8a3c993c7e043e9a55cd7573bda3499085654b493a driver.longhorn.io prowlarr lewis true 3m38s -❯ kubectl delete volumeattachments csi-f13ee1f46a4acc0d7e4abe8a3c993c7e043e9a55cd7573bda3499085654b493a -❯ kubectl rollout restart -n media deployment prowlarr -``` - -``` -driver name driver.longhorn.io not found in the list of registered CSI drivers -``` - -I solved this by restarting k3s: -``` -systemctl restart k3s -``` - -## Migration from NFS to Longhorn - -1. Delete the workload, and delete the PVC and PVC using NFS. -2. Create Longhorn volumes as described below. -3. Copy NFS data from lewis.dmz to local disk. -4. Spin up a temporary pod and mount the Longhorn volume(s) in it: - ```nix - { - pods.testje.spec = { - containers.testje = { - image = "nginx"; - - volumeMounts = [ - { - name = "uploads"; - mountPath = "/hedgedoc/public/uploads"; - } - ]; - }; - - volumes = { - uploads.persistentVolumeClaim.claimName = "hedgedoc-uploads"; - }; - }; - } - ``` -5. Use `kubectl cp` to copy the data from the local disk to the pod. -6. Delete the temporary pod. -7. Be sure to set the group ownership of the mount to the correct GID. -7. Create the workload with updated volume mounts. -8. Delete the data from local disk. - -## Creation of new Longhorn volumes - -While it seems handy to use a K8s StorageClass for Longhorn, we do *not* want to use that. -If you use a StorageClass, a PV and Longhorn volume will be automatically provisioned. -These will have the name `pvc-`, where the UID of the PVC is random. -This makes it hard to restore a backup to a Longhorn volume with the correct name. - -Instead, we want to manually create the Longhorn volumes via the web UI. -Then, we can create the PV and PVC as usual using our K8s provisioning tool (e.g. Kubectl/Kubenix). - -Follow these actions to create a Volume: -1. Using the Longhorn web UI, create a new Longhorn volume, keeping the following in mind: - - The size can be some more than what we expect to reasonable use. We use storage-overprovisioning, so the total size of volumes can exceed real disk size. - - The number of replicas should be 2. -2. Enable the "backup-nfs" recurring job for the Longhorn volume. -3. Disable the "default" recurring job group for the Longhorn volume. -4. Create the PV, PVC and workload as usual. - -## Disaster recovery using Longhorn backups - -Backing up Longhorn volumes is very easy, but restoring them is more tricky. -We consider here the case when all our machines are wiped, and all we have left is Longhorn backups. -To restore a backup, perform the following actions: -1. Restore the latest snapshot in the relevant Longhorn backup, keeping the following in mind: - - The name should remain the same (i.e. the one chosen at Longhorn volume creation). - - The number of replicas should be 2. - - Disable recurring jobs. -2. Enable the "backup-nfs" recurring job for the Longhorn volume. -3. Disable the "default" recurring job group for the Longhorn volume. -4. Create the PV, PVC and workload as usual. - -## Recovering Longhorn volumes without a Kubernetes cluster - -1. Navigate to the Longhorn backupstore location (`/mnt/longhorn/persistent/longhorn-backup/backupstore/volumes` for us). -2. Find the directory for the desired volume: `ls **/**`. -3. Determine the last backup for the volume: `cat volume.cfg | jq '.LastBackupName'`. -4. Find the blocks and the order that form the volume: `cat backups/.cfg | jq '.Blocks'`. -5. Extract each block using lz4: `lz4 -d blocks/XX/YY/XXYY.blk block`. -6. Append the blocks to form the file system: `cat block1 block2 block3 > volume.img` -7. Lastly we need to fix the size of the image. We can simply append zero's to the end until the file is long enough so `fsck.ext4` does not complain anymore. -8. Mount the image: `mount -o loop volume.img /mnt/volume`. diff --git a/globals.nix b/globals.nix index d021288..155119f 100644 --- a/globals.nix +++ b/globals.nix @@ -33,7 +33,6 @@ _: { }; routerPublicIPv4 = "89.220.7.89"; - #routerPublicIPv6 = "2a0d:6e00:1a77::1"; bind9Ipv6 = "2a0d:6e00:1a77:30::134"; # Load balancer IPv4 @@ -46,7 +45,6 @@ _: { dnsmasqIPv4 = "192.168.30.135"; jellyseerrIPv4 = "192.168.30.137"; syncthingIPv4 = "192.168.30.138"; - longhornIPv4 = "192.168.30.139"; radarrIPv4 = "192.168.30.140"; prowlarrIPv4 = "192.168.30.141"; sonarrIPv4 = "192.168.30.142"; diff --git a/modules/bootstrap-default.nix b/modules/bootstrap-default.nix index 7c1cdaa..dcacb10 100644 --- a/modules/bootstrap-default.nix +++ b/modules/bootstrap-default.nix @@ -15,28 +15,9 @@ chart = nixhelm.chartsDerivations.${system}.metallb.metallb; includeCRDs = true; }; - - longhorn = { - chart = nixhelm.chartsDerivations.${system}.longhorn.longhorn; - includeCRDs = true; - - values = { - persistence.defaultClassReplicaCount = 2; - service.ui.type = "LoadBalancer"; - - defaultSettings = { - defaultDataPath = "/mnt/longhorn"; - storageMinimalAvailablePercentage = 0; - allowRecurringJobWhileVolumeDetached = true; - backupTarget = "nfs://lewis.dmz:/mnt/longhorn/persistent/longhorn-backup"; - }; - }; - }; }; resources = { - services.longhorn-frontend.spec.loadBalancerIP = globals.longhornIPv4; - namespaces = { static-websites = {}; freshrss = {}; @@ -67,32 +48,9 @@ }) globals.nodeLabels; - recurringJobs.backup-nfs.spec = { - cron = "0 1 * * *"; # One o'clock at night - task = "backup"; - retain = 2; # We don't need many, as we also make Borg backups. - concurrency = 1; - }; - - backuptargets.backup.spec = { - backupTargetURL = "nfs://lewis.dmz:/mnt/longhorn/persistent/longhorn-backup"; - pollInterval = "5m0s"; - }; - ipAddressPools.main.spec.addresses = ["192.168.30.128-192.168.30.200" "2a0d:6e00:1a77:30::2-2a0d:6e00:1a77:30:ffff:ffff:ffff:fffe"]; l2Advertisements.main.metadata = {}; }; }; - - lab = { - tailscaleIngresses.tailscale-longhorn = { - host = "longhorn"; - - service = { - name = "longhorn-frontend"; - portName = "http"; - }; - }; - }; }; } diff --git a/modules/default.nix b/modules/default.nix index 9a11292..9409b69 100644 --- a/modules/default.nix +++ b/modules/default.nix @@ -2,7 +2,6 @@ imports = [ ./inbucket.nix ./tailscale-ingress.nix - ./longhorn-volume.nix ./ingress.nix ./dummy-types.nix ./dnsmasq.nix diff --git a/modules/dummy-types.nix b/modules/dummy-types.nix index d54dd1f..14daf85 100644 --- a/modules/dummy-types.nix +++ b/modules/dummy-types.nix @@ -32,25 +32,11 @@ kind = "ClusterIssuer"; }; - recurringJob = { - attrName = "recurringJobs"; - group = "longhorn.io"; - version = "v1beta1"; - kind = "RecurringJob"; - }; - middlewares = { attrName = "middlewares"; group = "traefik.io"; version = "v1alpha1"; kind = "Middleware"; }; - - backuptargets = { - attrName = "backuptargets"; - group = "longhorn.io"; - version = "v1beta1"; - kind = "BackupTarget"; - }; }; } diff --git a/modules/longhorn-volume.nix b/modules/longhorn-volume.nix deleted file mode 100644 index db08037..0000000 --- a/modules/longhorn-volume.nix +++ /dev/null @@ -1,157 +0,0 @@ -{ - lib, - config, - ... -}: let - longhornVolumeOpts = _: { - options = { - storage = lib.mkOption { - type = lib.types.str; - }; - - namespace = lib.mkOption { - type = lib.types.str; - default = "default"; - }; - }; - }; - - longhornPVOpts = _: { - options = { - storage = lib.mkOption { - type = lib.types.str; - }; - }; - }; - - longhornPVCOpts = {name, ...}: { - options = { - volumeName = lib.mkOption { - type = lib.types.str; - default = name; - }; - - # TODO: ideally we take this from the longhornPV so we don't duplicate this information. - storage = lib.mkOption { - type = lib.types.str; - }; - }; - }; -in { - options = { - lab.longhornVolumes = lib.mkOption { - type = with lib.types; attrsOf (submodule longhornVolumeOpts); - default = {}; - }; - - lab.longhorn = { - persistentVolume = lib.mkOption { - type = with lib.types; attrsOf (submodule longhornPVOpts); - default = {}; - }; - - persistentVolumeClaim = lib.mkOption { - type = with lib.types; attrsOf (submodule longhornPVCOpts); - default = {}; - }; - }; - }; - - config = { - kubernetes.resources = { - persistentVolumes = - lib.mergeAttrs - (builtins.mapAttrs - (name: longhornVolume: { - spec = { - accessModes = ["ReadWriteOnce"]; - capacity.storage = longhornVolume.storage; - persistentVolumeReclaimPolicy = "Delete"; - volumeMode = "Filesystem"; - - claimRef = { - inherit name; - inherit (longhornVolume) namespace; - }; - - csi = { - driver = "driver.longhorn.io"; - fsType = "ext4"; - volumeHandle = name; - - volumeAttributes = { - dataLocality = "disabled"; - fromBackup = ""; - fsType = "ext4"; - numberOfReplicas = "2"; - staleReplicaTimeout = "30"; - unmapMarkSnapChainRemoved = "ignored"; - - recurringJobSelector = lib.generators.toYAML {} [ - { - name = "backup-nfs"; - isGroup = false; - } - ]; - }; - }; - }; - }) - config.lab.longhornVolumes) - (builtins.mapAttrs - (name: longhornPV: { - spec = { - accessModes = ["ReadWriteOnce"]; - capacity.storage = longhornPV.storage; - persistentVolumeReclaimPolicy = "Delete"; - volumeMode = "Filesystem"; - - csi = { - driver = "driver.longhorn.io"; - fsType = "ext4"; - volumeHandle = name; - - volumeAttributes = { - dataLocality = "disabled"; - fromBackup = ""; - fsType = "ext4"; - numberOfReplicas = "2"; - staleReplicaTimeout = "30"; - unmapMarkSnapChainRemoved = "ignored"; - - recurringJobSelector = lib.generators.toYAML {} [ - { - name = "backup-nfs"; - isGroup = false; - } - ]; - }; - }; - }; - }) - config.lab.longhorn.persistentVolume); - - persistentVolumeClaims = - lib.mergeAttrs - (builtins.mapAttrs - (_name: longhornVolume: { - spec = { - accessModes = ["ReadWriteOnce"]; - resources.requests.storage = longhornVolume.storage; - storageClassName = ""; - }; - }) - config.lab.longhornVolumes) - (builtins.mapAttrs - (_name: longhornPVC: { - spec = { - accessModes = ["ReadWriteOnce"]; - resources.requests.storage = longhornPVC.storage; - storageClassName = ""; - inherit (longhornPVC) volumeName; - }; - }) - config.lab.longhorn.persistentVolumeClaim); - }; - }; -}