Documentation
¶
Index ¶
- func ApplyManifestFromFile(ctx context.Context, clusterClient client.Client, filePath string) error
- func ApplyManifestFromReader(ctx context.Context, clusterClient client.Client, reader io.Reader) error
- func ApplyManifestFromURL(ctx context.Context, clusterClient client.Client, url string) error
- func CopySealedSecretsKeysFromManagement(ctx context.Context, mgmt, main client.Client) error
- func CreateArgoCDProject(ctx context.Context, argoCDProjectClient project.ProjectServiceClient, ...) error
- func CreateBackup(ctx context.Context, name string, clusterClient client.Client) error
- func CreateKubernetesClient(ctx context.Context, kubeconfigPath string) (client.Client, error)
- func CreateNamespace(ctx context.Context, namespaceName string, clusterClient client.Client) error
- func CreateUnstructuredClient(_ context.Context) (client.Client, error)
- func EnsureSealedSecretsHealthy(ctx context.Context, mgmt, main client.Client) error
- func GenerateSealedSecret(ctx context.Context, secretFilePath string) error
- func GetCapiClusterNamespace() string
- func GetClusterResource(ctx context.Context, clusterClient client.Client) (*clusterAPIV1Beta1.Cluster, error)
- func GetKubernetesResource(ctx context.Context, clusterClient client.Client, resource client.Object) error
- func GetLatestVeleroBackup(ctx context.Context, clusterClient client.Client) (*veleroV1.Backup, error)
- func GetMainClusterEndpoint(ctx context.Context) (*url.URL, error)
- func GetManagementClusterKubeconfigPath(_ context.Context) (string, error)
- func HelmInstall(ctx context.Context, args *HelmInstallArgs) error
- func InstallAndSetupArgoCD(ctx context.Context, clusterDir string, clusterClient client.Client) error
- func InstallAndSetupCrossplane(ctx context.Context) error
- func InstallSealedSecrets(ctx context.Context) error
- func IsClusterctlMoveExecuted(ctx context.Context) bool
- func IsNodeGroupCountZero(ctx context.Context) bool
- func NewArgoCDClient(ctx context.Context, clusterClient client.Client) (apiclient.Client, error)
- func RecreateArgoCDApplicationClient(ctx context.Context, clusterClient client.Client) error
- func ReinstallSealedSecrets(ctx context.Context) error
- func RemoveNoScheduleTaintsFromMasterNodes(ctx context.Context, clusterClient client.Client) error
- func ReplaceForceFromDir(ctx context.Context, clusterClient client.Client, dirPath string) error
- func RestoreVeleroBackup(ctx context.Context, clusterClient client.Client, ...) error
- func SaveProvisionedClusterKubeconfig(ctx context.Context, kubeClient client.Client) error
- func SealIfPlaintextChanged(ctx context.Context, destinationFilePath string, plaintextBytes []byte) error
- func SyncAllArgoCDApps(ctx context.Context, skipMonitoringSetup bool, orderedApps []AppSyncStep) error
- func SyncArgoCDApp(ctx context.Context, name string, ...) error
- func TriggerCRONJob(ctx context.Context, objectKey client.ObjectKey, clusterClient client.Client) error
- func UsingClusterAPI() (usingClusterAPI bool)
- func WaitForAllMachinesRunning(ctx context.Context, managementClusterClient, mainClusterClient client.Client) error
- func WaitForArgoCDAppHealthy(ctx context.Context, name string) error
- func WaitForCPNodesNetworkingReady(ctx context.Context, kubeClient client.Client) error
- func WaitForCertificatesReady(ctx context.Context, kubeClient client.Client, certs []types.NamespacedName) error
- func WaitForMainClusterToBeProvisioned(ctx context.Context, managementClusterClient client.Client) error
- func WaitForMainClusterToBeReady(ctx context.Context, kubeClient client.Client) error
- type AppSyncStep
- type ArgoCDAppClient
- type ArgoCDAppManager
- type HelmActionFactory
- type HelmInstallArgs
- type HelmInstallRunner
- type HelmListRunner
- type HelmUpgradeRunner
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ApplyManifestFromFile ¶ added in v0.23.0
ApplyManifestFromFile reads a (possibly multi-document) YAML file and applies each document to the cluster using server-side apply.
func ApplyManifestFromReader ¶ added in v0.23.0
func ApplyManifestFromReader(ctx context.Context, clusterClient client.Client, reader io.Reader) error
ApplyManifestFromReader reads a multi-document YAML stream and applies each document to the cluster using server-side apply (patch with Apply strategy and force ownership).
func ApplyManifestFromURL ¶ added in v0.23.0
ApplyManifestFromURL fetches YAML from the given HTTP(S) URL and applies each document to the cluster using server-side apply.
func CopySealedSecretsKeysFromManagement ¶ added in v0.23.0
CopySealedSecretsKeysFromManagement copies every active sealed-secrets key Secret from the management cluster's sealed-secrets namespace to the main cluster's same namespace. After the copy, the main cluster's sealed-secrets controller picks the new key(s) up via its Secret watch (typically within a second), adding them to its decryption keyring alongside whatever key it generated on first start.
Why we do this on every bootstrap: kubeaid-cli runs kubeseal against the *management* controller during the management-cluster phase, producing SealedSecret artefacts in kubeaid-config that are encrypted with the management controller's key. If the main controller doesn't also know that key, those SealedSecrets stay undecryptable forever (sealing is non-deterministic, so the per-file kubeaid-sha256 cache won't re-seal them on the next run unless the plaintext changed).
Idempotent: re-runs overwrite each copied Secret with the current management-side bytes, so a re-bootstrap on top of a partially- pivoted cluster converges cleanly. Safe to call when the management cluster has zero keys (returns nil, no-op).
Mirrors the DR-restore pattern in pkg/core/setup_cluster.go — same shape, different source (live management cluster instead of object- storage backup).
func CreateArgoCDProject ¶
func CreateArgoCDProject(ctx context.Context, argoCDProjectClient project.ProjectServiceClient, name string) error
CreateArgoCDProject creates an ArgoCD Project with the given name. Returns nil if the project already exists.
func CreateBackup ¶ added in v0.11.1
CreateBackup creates a Velero Backup with the given name.
func CreateKubernetesClient ¶
Tries to create a Kubernetes Go client using the Kubeconfig file present at the given path. Returns the Kubernetes Go client.
func CreateNamespace ¶
Creates the given namespace (if it doesn't already exist).
func CreateUnstructuredClient ¶ added in v0.23.0
CreateUnstructuredClient creates a Kubernetes client suitable for working with unstructured objects. It reads the kubeconfig path from the KUBECONFIG env var.
func EnsureSealedSecretsHealthy ¶ added in v0.23.0
EnsureSealedSecretsHealthy is the single source of truth for "is sealed-secrets actually functional on this cluster?" after we've run the install + copied keys.
Two independent checks, two independent recovery actions:
**Key parity** — count of Secrets labelled sealedsecrets.bitnami.com/sealed-secrets-key=active on main must equal the count on mgmt. If short, re-run the copy. Recovery for "the copy didn't actually land" (transient API blip, racing with namespace creation, etc.).
**Controller Deployment health** — Deployment must have AvailableReplicas == desired AND ReadyReplicas == desired AND UnavailableReplicas == 0. If unhealthy after a 5min poll, call ReinstallSealedSecrets (Helm install with Replace=true, bypassing skip-if-deployed). Recovery for "Helm thinks the release is fine but the Deployment was deleted out-of-band" (operator manual recovery, ArgoCD pruning, etc.).
Retry budget = 1 reinstall. If the Deployment is still unhealthy afterward, return a rich diagnostic so the operator knows whether it's a taint, an image pull, a crashing container, etc. — not just "Sealed Secrets controller not Ready".
func GenerateSealedSecret ¶
GenerateSealedSecret takes the path to a Kubernetes Secret file. It replaces the contents of that file by generating the corresponding Sealed Secret.
Reads the plaintext from disk into a buffer, encrypts via the shared sealPlaintextToBytes helper, atomically writes the sealed YAML back in a single op. The buffer-and-write pattern means there's no transient half-written file on disk — either the original plaintext is there or the sealed output is, never both / neither.
func GetCapiClusterNamespace ¶
func GetCapiClusterNamespace() string
Returns the namespace (capi-cluster / capi-cluster-<customer-id>) where the 'cloud-credentials' Kubernetes Secret will exist. This Kubernetes Secret will be used by Cluster API to communicate with the underlying cloud provider.
func GetClusterResource ¶
func GetClusterResource(ctx context.Context, clusterClient client.Client, ) (*clusterAPIV1Beta1.Cluster, error)
Looks for and returns the Cluster resource in the given Kubernetes cluster.
func GetKubernetesResource ¶
func GetKubernetesResource(ctx context.Context, clusterClient client.Client, resource client.Object) error
Tries to fetch the given Kubernetes resource using the given Kubernetes cluster client.
func GetLatestVeleroBackup ¶ added in v0.10.0
func GetLatestVeleroBackup(ctx context.Context, clusterClient client.Client) (*veleroV1.Backup, error)
GetLatestVeleroBackup identifies and returns the latest / most recent Velero Backup.
func GetMainClusterEndpoint ¶ added in v0.10.0
func GetManagementClusterKubeconfigPath ¶
GetManagementClusterKubeconfigPath returns the management cluster kubeconfig file path on the host.
func HelmInstall ¶
func HelmInstall(ctx context.Context, args *HelmInstallArgs) error
Installs the Helm chart (if not already deployed), present at the given local path. We clone the KubeAid repository locally and then use absolute path to one of it's Helm chart (like argo-cd / sealed-secrets), to install that corresponding Helm chart.
func InstallAndSetupArgoCD ¶
func InstallAndSetupArgoCD(ctx context.Context, clusterDir string, clusterClient client.Client) error
Installs the ArgoCD Helm chart and creates the root ArgoCD App. Then creates and returns an ArgoCD Application client.
func InstallAndSetupCrossplane ¶ added in v0.12.2
InstallAndSetupCrossplane syncs the crossplane, crossplane-providers-and-functions and crossplane-compositions ArgoCD Apps one by one.
The three apps must be synced in order: crossplane first, then providers/functions, then compositions. ArgoCD sync waves inside a single chart were attempted but did not produce the required ordering — hence the sequential calls below.
func InstallSealedSecrets ¶
InstallSealedSecrets performs a minimal installation of Sealed Secrets in the underlying Kubernetes cluster. Honours the standard skip-if-deployed fast path — re-runs against a healthy install are cheap no-ops.
func IsClusterctlMoveExecuted ¶
Returns whether the 'clusterctl move' command has already been executed or not.
func IsNodeGroupCountZero ¶ added in v0.12.0
Returns whether there are zero node-groups or not. If yes, then we need to remove taints from the control-plane nodes right after the main cluster is provisioned.
func NewArgoCDClient ¶
Port-forwards the ArgoCD server and creates an ArgoCD client. Returns the ArgoCD client.
func RecreateArgoCDApplicationClient ¶
Recreates the ArgoCD Application client by port-forwarding the ArgoCD server. If the clusterClient is not provided (is nil), then it picks up the KUBECONFIG envionment variable and constructs the cluster client by itself.
func ReinstallSealedSecrets ¶ added in v0.23.0
ReinstallSealedSecrets is the recovery entry point used by EnsureSealedSecretsHealthy when the actual in-cluster state shows the controller's Deployment is missing or stuck. Uses `helm upgrade` — it works for releases in any non-pending state, reads the previous release manifest, re-renders the chart, and applies the diff against the live cluster — re-creating any drifted resources (e.g. an operator-manually-deleted Deployment).
`helm install --replace` only handles uninstalled/failed states (per pkg/action/install.go::availableName), so it can't recover a release stuck in "deployed" with missing resources. Upgrade has no such restriction.
func RemoveNoScheduleTaintsFromMasterNodes ¶ added in v0.12.0
RemoveNoScheduleTaintsFromMasterNodes removes the 'node-role.kubernetes.io/control-plane:NoSchedule' taint from master nodes.
func ReplaceForceFromDir ¶ added in v0.23.0
ReplaceForceFromDir reads all YAML files in the given directory and, for each document: deletes the existing resource (ignoring not-found) then creates it. This replicates `kubectl replace --force -f <dir>`.
func RestoreVeleroBackup ¶ added in v0.10.0
func RestoreVeleroBackup(ctx context.Context, clusterClient client.Client, latestVeleroBackup *veleroV1.Backup, ) error
RestoreVeleroBackup creates a Velero Restore object for the given Velero Backup.
func SaveProvisionedClusterKubeconfig ¶ added in v0.8.0
SaveProvisionedClusterKubeconfig saves kubeconfig of the provisioned cluster locally.
func SealIfPlaintextChanged ¶ added in v0.23.0
func SealIfPlaintextChanged(ctx context.Context, destinationFilePath string, plaintextBytes []byte, ) error
SealIfPlaintextChanged converts plaintextBytes to a SealedSecret YAML at destinationFilePath — but only when the cache header on the existing file doesn't already match the hash of (plaintextBytes ‖ controllerCert). On a cache hit, leaves the file untouched: no kubeseal call, no rewrite, no git diff.
The cache key folds in the sealed-secrets controller's public cert so it invalidates on cluster re-key. Without that, recreating the management cluster (which provisions a fresh controller key) leaves every sealed-secret file's plaintext-hash matching but the cached ciphertext encrypted with the dead key — the new controller then fails to decrypt with "no key could decrypt secret" and the bootstrap dies later trying to use the empty Secret.
On cache miss (header missing, header mismatched, or no file yet), runs kubeseal in memory using the just-loaded public key, prepends the new hash header to the sealed bytes, and writes atomically via renameio — the plaintext never lands on disk and there's no transient half-written file the operator could trip over mid-bootstrap.
The header is a YAML comment so the sealed-secrets-controller's reconciler doesn't see it; it's purely a kubeaid-cli-side cache key that happens to live inside the sealed-secret artifact file.
func SyncAllArgoCDApps ¶
func SyncAllArgoCDApps(ctx context.Context, skipMonitoringSetup bool, orderedApps []AppSyncStep, ) error
SyncAllArgoCDApps lists and syncs all the ArgoCD Apps.
orderedApps are synced first, in slice order, each immediately followed by its AfterSync hook (if any). Every other App is then synced in a generic loop. A step whose App isn't present in the cluster is skipped.
func SyncArgoCDApp ¶
func SyncArgoCDApp(ctx context.Context, name string, resources []*argoCDV1Aplha1.SyncOperationResource) error
func TriggerCRONJob ¶ added in v0.11.1
func TriggerCRONJob(ctx context.Context, objectKey client.ObjectKey, clusterClient client.Client) error
Triggers the given CRONJob, by creating a Job from its Job template.
func UsingClusterAPI ¶ added in v0.12.0
func UsingClusterAPI() (usingClusterAPI bool)
Returns whether we're using Clusterapi or not.
func WaitForAllMachinesRunning ¶ added in v0.23.0
func WaitForAllMachinesRunning(ctx context.Context, managementClusterClient, mainClusterClient client.Client, ) error
WaitForAllMachinesRunning blocks until every Machine in the capi-cluster namespace has reached Phase=Running with status.nodeRef populated. This is `clusterctl move`'s headline pre-condition: it refuses to start the move while any Machine is still bringing up its Node. Earlier in the bootstrap, WaitForMainClusterToBeProvisioned has already cleared the *initial* provisioning — but SetupCluster runs long-lived ArgoCD syncs after that, any of which can flip the KubeadmControlPlane spec (chart upgrade between bootstrap attempts, values change, etc.) and trigger a control-plane rolling update. That rollout leaves us back at "one of N Machines is mid-provision" by the time pivotCluster fires, which is exactly when clusterctl move would error out. This wait makes the operation idempotent on rolling-update collisions.
While waiting it shows the same live Machine-status table as WaitForMainClusterToBeProvisioned. On success it swaps that table for a `kubectl get nodes`-style table built from mainClusterClient — the live table was the during-the-wait view; the Nodes table is the persistent pre-pivot audit trail the operator eyeballs before the move. Same screen-ownership and timeout semantics. Returns nil only when every Machine has a Node registered — empty Machine list counts as not-ready (a freshly scaled-to-zero cluster wouldn't be a sensible thing to clusterctl-move anyway, and treating it as ready would hide a misconfiguration).
func WaitForArgoCDAppHealthy ¶ added in v0.23.0
WaitForArgoCDAppHealthy blocks until the named ArgoCD App reports both Sync=Synced and Health=Healthy. Used by callers that need to do follow-on work against the underlying application (e.g. talk to Keycloak admin API once the keycloakx App is fully up).
func WaitForCPNodesNetworkingReady ¶ added in v0.23.0
WaitForCPNodesNetworkingReady blocks until every control-plane Node in the main cluster reports BOTH Ready=True AND NetworkUnavailable=False, ctx is cancelled, or waitForCPNodesNetworkingTimeout passes.
CAPI's Cluster.Phase=Provisioned + ReadyCondition=True (the predicate WaitForMainClusterToBeProvisioned waits on above) flips True the moment the static control-plane pods (apiserver/etcd/controller-manager/scheduler) respond on the cluster's apiserver endpoint. It has no signal about whether the CNI is installed and a Node can actually schedule pods. Historically that gap has masked cilium postKubeadm install failures (e.g. the `helm install cilium --atomic --wait` rolling back when hubble Deployments stay Pending behind the kubeadm control-plane:NoSchedule taint on a single-node bootstrap) — kubeaid-cli marched past WaitForMainClusterToBeProvisioned into SetupCluster, then SealedSecrets / ArgoCD App sync surfaced as the failing layer with workload pods stuck ContainerCreating indefinitely.
NetworkUnavailable=False is the standard kubelet/CNI predicate Kubernetes itself uses to gate workload scheduling on a Node (kubernetes/kubernetes#k8s.io/api/core/v1.NodeNetworkUnavailable), so this check is CNI-agnostic (cilium, calico, weave, anything) and aligned with what the scheduler would care about anyway. Ready=True is the broader kubelet "I can run pods" predicate; we require both because either alone has known false positives during cloud-provider init.
func WaitForCertificatesReady ¶ added in v0.23.0
func WaitForCertificatesReady( ctx context.Context, kubeClient client.Client, certs []types.NamespacedName, ) error
WaitForCertificatesReady blocks until every Certificate in certs reports Ready=True, ctx is cancelled, or waitForCertificatesReadyTimeout passes. An empty certs slice is a no-op.
On a VPN cluster the netbird / keycloak workloads are unusable until Traefik serves a real TLS cert for their FQDNs — netbird-management, for instance, crashloops on the OIDC discovery fetch while Traefik falls back to its self-signed default. Gating the bootstrap here turns that cryptic downstream x509 crashloop into a clear "cert <name> is not Ready: <reason>" failure that points the operator straight at cert-manager's Order / Challenge.
cert-manager retries failed issuance with backoff, so a transient failure resolves on its own — the loop keeps polling and only gives up at the timeout, printing the last-seen reasons and a cert-manager describe hint to stderr (see printCertificateFailureHint). Certificates are read via the unstructured client (GVK cert-manager.io/v1 Certificate); a Certificate that doesn't exist yet counts as not-ready, since cert-manager's ingress-shim creates it once the Ingress is synced.
func WaitForMainClusterToBeProvisioned ¶
func WaitForMainClusterToBeProvisioned(ctx context.Context, managementClusterClient client.Client) error
WaitForMainClusterToBeProvisioned blocks until the main cluster's CAPI Cluster resource reports Phase=Provisioned + ReadyCondition=True, ctx is cancelled, or capiWaitTotalTimeout passes. While waiting it renders a live lipgloss table showing the Cluster row and one row per Machine (with HCloud InstanceState / FailureReason where available), re-rendering every capiWaitPollInterval. The last-rendered tick stays in scrollback as the audit trail of what state the cluster was in when the wait succeeded — operators have already pinged us once asking "is it stuck or just slow", so the persisted table answers that for future runs.
Owns the screen for its duration: pauses the progress bar so the spinner's 100ms re-render goroutine can't \r-overwrite the table rows, resumes on exit. Caller should NOT wrap with bar.InProgress — the bar's substep stream is below the persisted table. After this returns nil, the caller emits its own "✓ Main cluster Machines provisioned" substep.
func WaitForMainClusterToBeReady ¶
WaitForMainClusterToBeReady waits for the main cluster to be ready to run application workloads. It polls until at least one initialized worker node exists or the context is cancelled.
Types ¶
type AppSyncStep ¶ added in v0.23.0
type AppSyncStep struct {
// Name is the ArgoCD App name. A step whose App isn't present in
// the cluster is skipped.
Name string
// AfterSync, if non-nil, runs once Name has synced — before the
// next step. The bootstrap gates here on, e.g., the App's
// cert-manager Certificate being Ready: a Synced ArgoCD App only
// means its manifests (Ingress included) were applied, not that
// the TLS cert was actually issued.
AfterSync func(context.Context) error
}
AppSyncStep is one entry in SyncAllArgoCDApps's ordered list: an ArgoCD App to sync, plus an optional hook run immediately after it syncs (before the next step and before the remaining-apps loop).
The bootstrap uses it to bring up the Hetzner VPN dependency chain in a guaranteed sequence — ccm → traefik → cert-manager → keycloakx → netbird — with the LB-DNS and TLS-cert gates wired in as AfterSync hooks, instead of relying on the alphabetical order ArgoCD's List happens to return.
type ArgoCDAppClient ¶ added in v0.23.0
type ArgoCDAppClient interface {
List(ctx context.Context, q *application.ApplicationQuery, opts ...grpc.CallOption) (*argoCDV1Aplha1.ApplicationList, error)
Sync(ctx context.Context, r *application.ApplicationSyncRequest, opts ...grpc.CallOption) (*argoCDV1Aplha1.Application, error)
Get(ctx context.Context, q *application.ApplicationQuery, opts ...grpc.CallOption) (*argoCDV1Aplha1.Application, error)
}
type ArgoCDAppManager ¶ added in v0.23.0
type ArgoCDAppManager struct {
// contains filtered or unexported fields
}
func NewArgoCDAppManager ¶ added in v0.23.0
func NewArgoCDAppManager(appClient ArgoCDAppClient, reconnect func(ctx context.Context)) *ArgoCDAppManager
type HelmActionFactory ¶ added in v0.23.0
type HelmActionFactory interface {
// NewInstall returns a runner configured for the given release name and namespace.
NewInstall(releaseName, namespace string) HelmInstallRunner
// NewUpgrade returns a runner that upgrades the given release. Used by
// the recovery path (ReinstallSealedSecrets) — `helm upgrade` works for
// any release state, re-applies the chart's manifests, and re-creates
// any in-cluster resources that have drifted (e.g. a Deployment
// removed out-of-band). Install with Replace=true only handles
// uninstalled/failed states; upgrade handles every state.
NewUpgrade(namespace string) HelmUpgradeRunner
// NewList returns a runner that lists releases matching the given filter.
NewList(filter string) HelmListRunner
// LoadChart loads a Helm chart from the given filesystem path.
LoadChart(path string) (*chart.Chart, error)
}
HelmActionFactory creates per-operation runners. Production wires this to *action.Configuration; tests provide a fake.
type HelmInstallArgs ¶
type HelmInstallRunner ¶ added in v0.23.0
type HelmInstallRunner interface {
Run(chrt *chart.Chart, vals map[string]any) (*release.Release, error)
}
HelmInstallRunner runs a single Helm install operation. Implementations must honour ReleaseName, Namespace, CreateNamespace, Timeout, and Wait
type HelmListRunner ¶ added in v0.23.0
HelmListRunner lists Helm releases.