Add recurring backup job for our data to lewis.dmz via NFS

Add documentation on our Longhorn usage Migrate Hedgedoc uploads to Longhorn Fix mounting of data disk on Lewis
2024-05-20 17:47:49 +02:00 · 2024-05-20 17:47:49 +02:00 · 790746a4ce
commit 790746a4ce
parent 05a49f4e35
7 changed files with 117 additions and 25 deletions
--- a/docs/longhorn.md
+++ b/docs/longhorn.md
@ -0,0 +1,30 @@
+# Longhorn notes
+
+## Creation of new Longhorn volumes
+
+While it seems handy to use a K8s StorageClass for Longhorn, we do *not* want to use that.
+If you use a StorageClass, a PV and Longhorn volume will be automatically provisioned.
+These will have the name `pvc-<UID of PVC>`, where the UID of the PVC is random.
+This makes it hard to restore a backup to a Longhorn volume with the correct name.
+
+Instead, we want to manually create the Longhorn volumes via the web UI.
+Then, we can create the PV and PVC as usual using our K8s provisioning tool (e.g. Kubectl/Kubenix).
+
+Follow these actions to create a Volume:
+1. Using the Longhorn web UI, create a new Longhorn volume, keeping the following in mind:
+     - The size can be some more than what we expect to reasonable use. We use storage-overprovisioning, so the total size of volumes can exceed real disk size.
+     - The number of replicas should be 2.
+2. Create the PV, PVC and workload as usual.
+
+## Disaster recovery using Longhorn backups
+
+Backing up Longhorn volumes is very easy, but restoring them is more tricky.
+We consider here the case when all our machines are wiped, and all we have left is Longhorn backups.
+To restore a backup, perform the following actions:
+1. Restore the latest snapshot in the relevant Longhorn backup, keeping the following in mind:
+   - The name should remain the same (i.e. the one chosen at Longhorn volume creation).
+   - The number of replicas should be 2.
+   - Disable recurring jobs.
+2. Enable the "backup-nfs" recurring job for the Longhorn volume.
+3. Disable the "default" recurring job group for the Longhorn volume.
+4. Create the PV, PVC and workload as usual.
--- a/kubenix-modules/custom-types.nix
+++ b/kubenix-modules/custom-types.nix
@ -31,5 +31,12 @@
      version = "v1";
      kind = "ClusterIssuer";
    };
+
+    recurringJob = {
+      attrName = "recurringJobs";
+      group = "longhorn.io";
+      version = "v1beta1";
+      kind = "RecurringJob";
+    };
  };
 }
--- a/kubenix-modules/hedgedoc.nix
+++ b/kubenix-modules/hedgedoc.nix
@ -62,7 +62,7 @@
            };

            volumes = {
-              uploads.persistentVolumeClaim.claimName = "hedgedoc";
+              uploads.persistentVolumeClaim.claimName = "hedgedoc-uploads";
              config.configMap.name = "hedgedoc-config";
            };
          };
@ -70,6 +70,23 @@
      };
    };

+    # pods.testje.spec = {
+    #   containers.testje = {
+    #     image = "nginx";
+
+    #     volumeMounts = [
+    #       {
+    #         name = "uploads";
+    #         mountPath = "/hedgedoc/public/uploads";
+    #       }
+    #     ];
+    #   };
+
+    #   volumes = {
+    #     uploads.persistentVolumeClaim.claimName = "hedgedoc-uploads";
+    #   };
+    # };
+
    services.hedgedoc.spec = {
      selector.app = "hedgedoc";

@ -78,11 +95,47 @@
        targetPort = "web";
      };
    };
+
+    persistentVolumeClaims.hedgedoc-uploads.spec = {
+      accessModes = [ "ReadWriteOnce" ];
+      resources.requests.storage = "50Mi";
+      storageClassName = "";
+    };
+
+    persistentVolumes.hedgedoc-uploads.spec = {
+      accessModes = [ "ReadWriteOnce" ];
+      capacity.storage = "50Mi";
+
+      claimRef = {
+        name = "hedgedoc-uploads";
+        namespace = "default";
+      };
+
+      csi = {
+        driver = "driver.longhorn.io";
+        fsType = "ext4";
+
+        volumeAttributes = {
+          dataLocality = "disabled";
+          fromBackup = "";
+          fsType = "ext4";
+          numberOfReplicas = "2";
+          recurringJobSelector = lib.generators.toYAML { } [{
+            name = "backup-nfs";
+            isGroup = false;
+          }];
+          staleReplicaTimeout = "30";
+          unmapMarkSnapChainRemoved = "ignored";
+        };
+        volumeHandle = "hedgedoc-uploads";
+      };
+
+      persistentVolumeReclaimPolicy = "Delete";
+      volumeMode = "Filesystem";
+    };
  };

  lab = {
-    nfsVolumes.hedgedoc.path = "hedgedoc/uploads";
-
    ingresses.hedgedoc = {
      host = "md.kun.is";

--- a/kubenix-modules/inbucket.nix
+++ b/kubenix-modules/inbucket.nix
@ -1,4 +1,4 @@
-{ myLib, ... }: {
+{ lib, myLib, ... }: {
  kubernetes.resources = {
    deployments.inbucket = {
      metadata.labels.app = "inbucket";
@ -17,14 +17,7 @@
                web.containerPort = 9000;
                smtp.containerPort = 2500;
              };
-
-              volumeMounts = [{
-                name = "storage";
-                mountPath = "/storage";
-              }];
            };
-
-            volumes.storage.persistentVolumeClaim.claimName = "inbucket";
          };
        };
      };
@ -51,12 +44,6 @@
        }];
      };
    };
-
-    persistentVolumeClaims.inbucket.spec = {
-      accessModes = [ "ReadWriteOnce" ];
-      storageClassName = "longhorn";
-      resources.requests.storage = "30Mi";
-    };
  };

  lab.ingresses.inbucket = {
--- a/kubenix-modules/longhorn.nix
+++ b/kubenix-modules/longhorn.nix
@ -1,4 +1,4 @@
-{ nixhelm, system, ... }: {
+{ lib, nixhelm, system, ... }: {
  config = {
    kubernetes = {
      helm.releases.longhorn = {
@ -8,6 +8,8 @@
          defaultSettings = {
            defaultDataPath = "/mnt/longhorn";
            storageMinimalAvailablePercentage = 0;
+            allowRecurringJobWhileVolumeDetached = true;
+            backupTarget = "nfs://lewis.dmz:/mnt/data/nfs/longhorn-backup";
          };

          persistence = {
@ -46,6 +48,13 @@
            }];
          };
        };
+
+        recurringJobs.backup-nfs.spec = {
+          cron = "0 1 * * *"; # One o'clock at night
+          task = "backup";
+          retain = 2; # We don't need many, as we also make Borg backups.
+          concurrency = 1;
+        };
      };
    };
  };
--- a/nixos-modules/data-sharing.nix
+++ b/nixos-modules/data-sharing.nix
@ -32,6 +32,7 @@ let
    "/bazarr/config"
    "/minecraft"
    "/atticd"
+    "/longhorn-backup"
  ];

  nfsExports = lib.strings.concatLines (
--- a/nixos-modules/storage.nix
+++ b/nixos-modules/storage.nix
@ -35,13 +35,18 @@ in {
  };

  config = {
-    fileSystems = {
-      "/" = lib.mkIf machine.isRaspberryPi {
-        device = "/dev/disk/by-label/NIXOS_SD";
-        fsType = "ext4";
-        options = [ "noatime" ];
-      };
-    };
+    fileSystems = lib.attrsets.mergeAttrsList [
+      (lib.optionalAttrs ((! machine.isRaspberryPi) && (! cfg.kubernetesNode)) {
+        "${cfg.dataMountPoint}".device = cfg.dataPartition;
+      })
+      (lib.optionalAttrs machine.isRaspberryPi {
+        "/" = {
+          device = "/dev/disk/by-label/NIXOS_SD";
+          fsType = "ext4";
+          options = [ "noatime" ];
+        };
+      })
+    ];

    disko = lib.mkIf (! machine.isRaspberryPi) (if cfg.kubernetesNode then {
      devices = {