Documentation
¶
Index ¶
- Variables
- func CreateImage(ctx context.Context, s *Scenario) *config.Image
- func CreateSIGImageVersionFromDisk(ctx context.Context, s *Scenario, version string, diskResourceID string) *config.Image
- func CustomDataWithHack(s *Scenario, binaryURL string) (string, error)
- func CustomDataWithNBCCmdHack(s *Scenario, customData, binaryURL string) (string, error)
- func DialSSHOverBastion(ctx context.Context, bastion *Bastion, vmPrivateIP string, ...) (*ssh.Client, error)
- func GetFieldFromJsonObjectOnNode(ctx context.Context, s *Scenario, fileName string, jsonPath string) string
- func RestartNodeProblemDetector(ctx context.Context, s *Scenario)
- func RunCommand(ctx context.Context, s *Scenario, command string) (armcompute.RunCommandResult, error)
- func RunScenario(t *testing.T, s *Scenario)
- func ServiceCanRestartValidator(ctx context.Context, s *Scenario, serviceName string, ...)
- func ValidateACLFIPSEnabled(ctx context.Context, s *Scenario)
- func ValidateAKSLocalDNSHostsSetupService(ctx context.Context, s *Scenario)
- func ValidateAKSLogCollector(ctx context.Context, s *Scenario)
- func ValidateAppArmorBasic(ctx context.Context, s *Scenario)
- func ValidateAzureNetworkFiles(ctx context.Context, s *Scenario)
- func ValidateCiliumIsNotRunningWindows(ctx context.Context, s *Scenario)
- func ValidateCiliumIsRunningWindows(ctx context.Context, s *Scenario)
- func ValidateCollectWindowsLogsScript(ctx context.Context, s *Scenario)
- func ValidateCommonLinux(ctx context.Context, s *Scenario)
- func ValidateCommonWindows(ctx context.Context, s *Scenario)
- func ValidateContainerRuntimePlugins(ctx context.Context, s *Scenario)
- func ValidateContainerd2Properties(ctx context.Context, s *Scenario, versions []string)
- func ValidateDirectoryContent(ctx context.Context, s *Scenario, path string, files []string)
- func ValidateDiskQueueService(ctx context.Context, s *Scenario)
- func ValidateDllIsNotLoadedWindows(ctx context.Context, s *Scenario, dllName string)
- func ValidateDllLoadedWindows(ctx context.Context, s *Scenario, dllName string)
- func ValidateDotnetNotInstalledWindows(ctx context.Context, s *Scenario)
- func ValidateEnableNvidiaResource(ctx context.Context, s *Scenario)
- func ValidateFIPSProvider(ctx context.Context, s *Scenario)
- func ValidateFileDoesNotExist(ctx context.Context, s *Scenario, fileName string)
- func ValidateFileExcludesContent(ctx context.Context, s *Scenario, fileName string, contents string)
- func ValidateFileExcludesExactContent(ctx context.Context, s *Scenario, fileName string, contents string)
- func ValidateFileExists(ctx context.Context, s *Scenario, fileName string)
- func ValidateFileHasContent(ctx context.Context, s *Scenario, fileName string, contents string)
- func ValidateFileIsRegularFile(ctx context.Context, s *Scenario, fileName string)
- func ValidateGPUWorkloadSchedulable(ctx context.Context, s *Scenario, gpuCount int, resourceName string)
- func ValidateIMDSRestrictionRule(ctx context.Context, s *Scenario, table string)
- func ValidateIPTablesCompatibleWithCiliumEBPF(ctx context.Context, s *Scenario)
- func ValidateInspektorGadget(ctx context.Context, s *Scenario)
- func ValidateInstalledPackageVersion(ctx context.Context, s *Scenario, component, version string)
- func ValidateJournalctlOutput(ctx context.Context, s *Scenario, serviceName string, expectedContent string)
- func ValidateJsonFileDoesNotHaveField(ctx context.Context, s *Scenario, fileName string, jsonPath string, ...)
- func ValidateJsonFileHasField(ctx context.Context, s *Scenario, fileName string, jsonPath string, ...)
- func ValidateKernelLogs(ctx context.Context, s *Scenario)
- func ValidateKubeletHasFlags(ctx context.Context, s *Scenario, filePath string)
- func ValidateKubeletHasNotStopped(ctx context.Context, s *Scenario)
- func ValidateKubeletNodeIP(ctx context.Context, s *Scenario)
- func ValidateKubeletServingCertificateRotation(ctx context.Context, s *Scenario)
- func ValidateLeakedSecrets(ctx context.Context, s *Scenario)
- func ValidateLocalDNSExporterMetrics(ctx context.Context, s *Scenario)
- func ValidateLocalDNSHostsFile(ctx context.Context, s *Scenario, fqdns []string)
- func ValidateLocalDNSHostsPluginBypass(ctx context.Context, s *Scenario)
- func ValidateLocalDNSHostsPluginColdStart(ctx context.Context, s *Scenario)
- func ValidateLocalDNSHostsPluginIPv6(ctx context.Context, s *Scenario)
- func ValidateLocalDNSResolution(ctx context.Context, s *Scenario, server string)
- func ValidateLocalDNSService(ctx context.Context, s *Scenario, state string)
- func ValidateMIGInstancesCreated(ctx context.Context, s *Scenario, migProfile string)
- func ValidateMIGModeEnabled(ctx context.Context, s *Scenario)
- func ValidateMultipleKubeProxyVersionsExist(ctx context.Context, s *Scenario)
- func ValidateNPDFilesystemCorruption(ctx context.Context, s *Scenario)
- func ValidateNPDGPUCountAfterFailure(ctx context.Context, s *Scenario)
- func ValidateNPDGPUCountCondition(ctx context.Context, s *Scenario)
- func ValidateNPDGPUCountPlugin(ctx context.Context, s *Scenario)
- func ValidateNPDHealthyNvidiaGridLicenseStatus(ctx context.Context, s *Scenario)
- func ValidateNPDIBLinkFlappingAfterFailure(ctx context.Context, s *Scenario)
- func ValidateNPDIBLinkFlappingCondition(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDCGMServices(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDCGMServicesAfterFailure(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDCGMServicesCondition(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDevicePlugin(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDevicePluginAfterFailure(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaDevicePluginCondition(ctx context.Context, s *Scenario)
- func ValidateNPDUnhealthyNvidiaGridLicenseStatusAfterFailure(ctx context.Context, s *Scenario)
- func ValidateNetworkInterfaceConfig(ctx context.Context, s *Scenario, nicConfig map[string]string)
- func ValidateNoFailedSystemdUnits(ctx context.Context, s *Scenario)
- func ValidateNodeAdvertisesGPUResources(ctx context.Context, s *Scenario, gpuCountExpected int64, resourceName string)
- func ValidateNodeCanRunAPod(ctx context.Context, s *Scenario)
- func ValidateNodeExporter(ctx context.Context, s *Scenario)
- func ValidateNodeHasLabel(ctx context.Context, s *Scenario, labelKey, expectedValue string)
- func ValidateNodeProblemDetector(ctx context.Context, s *Scenario)
- func ValidateNonEmptyDirectory(ctx context.Context, s *Scenario, dirName string)
- func ValidateNvidiaDCGMExporterIsScrapable(ctx context.Context, s *Scenario)
- func ValidateNvidiaDCGMExporterScrapeCommonMetric(ctx context.Context, s *Scenario, metric string)
- func ValidateNvidiaDCGMExporterSystemDServiceRunning(ctx context.Context, s *Scenario)
- func ValidateNvidiaDevicePluginServiceRunning(ctx context.Context, s *Scenario)
- func ValidateNvidiaGRIDLicenseValid(ctx context.Context, s *Scenario)
- func ValidateNvidiaModProbeInstalled(ctx context.Context, s *Scenario)
- func ValidateNvidiaPersistencedRunning(ctx context.Context, s *Scenario)
- func ValidateNvidiaSMIInstalled(ctx context.Context, s *Scenario)
- func ValidateNvidiaSMINotInstalled(ctx context.Context, s *Scenario)
- func ValidatePodRunning(ctx context.Context, s *Scenario, pod *corev1.Pod)
- func ValidatePodRunningWithRetry(ctx context.Context, s *Scenario, pod *corev1.Pod, maxRetries int)
- func ValidatePubkeySSHDisabled(ctx context.Context, s *Scenario)
- func ValidateRuncVersion(ctx context.Context, s *Scenario, versions []string)
- func ValidateRxBufferDefault(ctx context.Context, s *Scenario)
- func ValidateSSHServiceDisabled(ctx context.Context, s *Scenario)
- func ValidateSSHServiceEnabled(ctx context.Context, s *Scenario)
- func ValidateScriptlessCSECmd(ctx context.Context, s *Scenario)
- func ValidateScriptlessNBCCSECmd(ctx context.Context, s *Scenario)
- func ValidateServicesDoNotRestartKubelet(ctx context.Context, s *Scenario)
- func ValidateStaleCachedKubeBinariesRemoved(ctx context.Context, s *Scenario)
- func ValidateSysctlConfig(ctx context.Context, s *Scenario, customSysctls map[string]string)
- func ValidateSystemdUnitIsNotFailed(ctx context.Context, s *Scenario, serviceName string)
- func ValidateSystemdUnitIsNotRunning(ctx context.Context, s *Scenario, serviceName string)
- func ValidateSystemdUnitIsRunning(ctx context.Context, s *Scenario, serviceName string)
- func ValidateSystemdWatchdogForKubernetes132Plus(ctx context.Context, s *Scenario)
- func ValidateTLSBootstrapping(ctx context.Context, s *Scenario)
- func ValidateTaints(ctx context.Context, s *Scenario, expectedTaints string)
- func ValidateUlimitSettings(ctx context.Context, s *Scenario, ulimits map[string]string)
- func ValidateVulnerableKernelModulesDisabled(ctx context.Context, s *Scenario)
- func ValidateWaagentLog(ctx context.Context, s *Scenario)
- func ValidateWindowsCiliumIsNotRunning(ctx context.Context, s *Scenario)
- func ValidateWindowsCiliumIsRunning(ctx context.Context, s *Scenario)
- func ValidateWindowsDisplayVersion(ctx context.Context, s *Scenario, displayVersion string)
- func ValidateWindowsProcessContainsArgumentStrings(ctx context.Context, s *Scenario, processName string, substrings []string)
- func ValidateWindowsProcessDoesNotContainArgumentStrings(ctx context.Context, s *Scenario, processName string, substrings []string)
- func ValidateWindowsProcessHasCliArguments(ctx context.Context, s *Scenario, processName string, arguments []string)
- func ValidateWindowsProductName(ctx context.Context, s *Scenario, productName string)
- func ValidateWindowsServiceIsNotRunning(ctx context.Context, s *Scenario, serviceName string)
- func ValidateWindowsServiceIsRunning(ctx context.Context, s *Scenario, serviceName string)
- func ValidateWindowsSystemServiceRestartConfiguration(ctx context.Context, s *Scenario, serviceName string)
- func ValidateWindowsSystemServicesRestartConfiguration(ctx context.Context, s *Scenario)
- func ValidateWindowsVersionFromWindowsSettings(ctx context.Context, s *Scenario, windowsVersion string)
- type Bastion
- type CSEProvisionTiming
- type CSETaskTiming
- type CSETimingReport
- type CSETimingThresholds
- type Cluster
- type ClusterParams
- type ClusterRequest
- type Config
- type CreateGalleryImageRequest
- type CreateGalleryRequest
- type CustomDataWriteFile
- type GetLatestExtensionVersionRequest
- type GetVHDRequest
- type Kubeclient
- func (k *Kubeclient) CreateDaemonset(ctx context.Context, ds *appsv1.DaemonSet) error
- func (k *Kubeclient) EnsureDebugDaemonsets(ctx context.Context, isNetworkIsolated bool, privateACRName string) error
- func (k *Kubeclient) GetPodNetworkDebugPodForNode(ctx context.Context, kubeNodeName string) (*corev1.Pod, error)
- func (k *Kubeclient) GetProxyURL(ctx context.Context) (string, error)
- func (k *Kubeclient) WaitUntilNodeReady(ctx context.Context, t testing.TB, vmssName string) string
- func (k *Kubeclient) WaitUntilPodRunning(ctx context.Context, namespace string, labelSelector string, ...) (*corev1.Pod, error)
- func (k *Kubeclient) WaitUntilPodRunningWithRetry(ctx context.Context, namespace string, labelSelector string, ...) (*corev1.Pod, error)
- type Scenario
- func (s *Scenario) GetClientPrivateKey() string
- func (s *Scenario) GetContainerRegistryFQDN() string
- func (s *Scenario) GetDefaultFQDNsForValidation() []string
- func (s *Scenario) GetK8sVersion() string
- func (s *Scenario) GetServicePrincipalSecret() string
- func (s *Scenario) GetTLSBootstrapToken() string
- func (s *Scenario) HasServicePrincipalData() bool
- func (s *Scenario) IsHostsPluginEnabled() bool
- func (s *Scenario) IsLinux() bool
- func (s *Scenario) IsWindows() bool
- func (s *Scenario) KubeletConfigFileEnabled() bool
- func (s *Scenario) PrepareAKSNodeConfig()
- func (s *Scenario) PrepareVMSSModel(ctx context.Context, t testing.TB, vmss *armcompute.VirtualMachineScaleSet)
- func (s *Scenario) SecureTLSBootstrappingEnabled() bool
- type ScenarioRuntime
- type ScenarioVM
- type Tags
- type VMSizeSKURequest
- type VNet
Constants ¶
This section is empty.
Variables ¶
var AllowedSSHPrefixes = []string{ssh.KeyAlgoED25519, ssh.KeyAlgoRSA, ssh.KeyAlgoRSASHA256, ssh.KeyAlgoRSASHA512}
var CachedCompileAndUploadAKSNodeController = cachedFunc(compileAndUploadAKSNodeController)
var CachedCreateGallery = cachedFunc(createGallery)
var CachedCreateGalleryImage = cachedFunc(createGalleryImage)
var CachedCreateVMManagedIdentity = cachedFunc(config.Azure.CreateVMManagedIdentity)
var CachedEnsureResourceGroup = cachedFunc(ensureResourceGroup)
var CachedGetLatestVMExtensionImageVersion = cachedFunc( func(ctx context.Context, req GetLatestExtensionVersionRequest) (string, error) { return config.Azure.GetLatestVMExtensionImageVersion(ctx, req.Location, req.ExtType, req.Publisher) }, )
CachedGetLatestVMExtensionImageVersion caches the result of querying the Azure API for the latest VM extension image version.
var CachedIsVMSizeGen2Only = cachedFunc(func(ctx context.Context, req VMSizeSKURequest) (bool, error) { return config.Azure.IsVMSizeGen2Only(ctx, req.Location, req.VMSize) })
CachedIsVMSizeGen2Only caches the result of querying the Azure Resource SKUs API to determine if a VM size only supports the Gen2 hypervisor.
var CachedPrepareVHD = cachedFunc(prepareVHD)
var CachedVMSizeSupportsNVMe = cachedFunc(func(ctx context.Context, req VMSizeSKURequest) (bool, error) { return config.Azure.VMSizeSupportsNVMe(ctx, req.Location, req.VMSize) })
CachedVMSizeSupportsNVMe caches the result of querying the Azure Resource SKUs API to determine if a VM size supports the NVMe disk controller type.
var ClusterAzureBootstrapProfileCache = cachedFunc(clusterAzureBootstrapProfileCache)
var ClusterAzureNetwork = cachedFunc(clusterAzureNetwork)
var ClusterAzureNetworkIsolated = cachedFunc(clusterAzureNetworkIsolated)
var ClusterAzureOverlayNetwork = cachedFunc(clusterAzureOverlayNetwork)
var ClusterAzureOverlayNetworkDualStack = cachedFunc(clusterAzureOverlayNetworkDualStack)
var ClusterCiliumNetwork = cachedFunc(clusterCiliumNetwork)
var ClusterKubenet = cachedFunc(clusterKubenet)
var ClusterLatestKubernetesVersion = cachedFunc(clusterLatestKubernetesVersion)
Functions ¶
func CreateSIGImageVersionFromDisk ¶
func CreateSIGImageVersionFromDisk(ctx context.Context, s *Scenario, version string, diskResourceID string) *config.Image
CreateSIGImageVersionFromDisk creates a new SIG image version directly from a VM disk
func CustomDataWithHack ¶
CustomDataWithHack is similar to nodeconfigutils.CustomData, but it uses a hack to run new aks-node-controller binary. Original aks-node-controller isn't run because it fails systemd check validating aks-node-controller-config.json exists (check aks-node-controller.service for details).
Uses a cloud-boothook to write the config file and create a systemd service unit early in boot (during cloud-init init). The systemd service waits for network-online.target before downloading the binary and running provisioning, avoiding the race condition where runcmd or boothook scripts execute before networking is available. Flatcar cannot use boothooks (coreos-cloudinit doesn't support MIME multipart), so it uses cloud-config with a coreos.units block to define and start the service instead.
func CustomDataWithNBCCmdHack ¶
CustomDataWithNBCCmdHack is similar to baker.boothooktemplate, but it uses a hack to run new aks-node-controller binary. Original aks-node-controller isn't run because it fails systemd check validating aks-node-controller-config.json exists (check aks-node-controller.service for details). with a coreos.units block to define and start the service instead.
func DialSSHOverBastion ¶
func RunCommand ¶
func RunCommand(ctx context.Context, s *Scenario, command string) (armcompute.RunCommandResult, error)
RunCommand executes a command on the VMSS VM with instance ID "0" and returns the raw JSON response from Azure Unlike default approach, it doesn't use SSH and uses Azure tooling This approach is generally slower, but it works even if SSH is not available
func RunScenario ¶
func ValidateACLFIPSEnabled ¶
ValidateACLFIPSEnabled asserts ACL-specific FIPS markers are present on the node: the /etc/system-fips marker file written by vhdbuilder/scripts/linux/acl/tool_installs_acl.sh. Kernel FIPS mode (/proc/sys/crypto/fips_enabled == 1) is universal and is asserted by ValidateFIPSProvider; callers should compose the two validators when both are needed.
func ValidateAKSLocalDNSHostsSetupService ¶
ValidateAKSLocalDNSHostsSetupService checks that aks-localdns-hosts-setup.service ran successfully and the aks-localdns-hosts-setup.timer is active to ensure periodic refresh of /etc/localdns/hosts.
func ValidateAKSLogCollector ¶
func ValidateAppArmorBasic ¶
ValidateAppArmorBasic validates that AppArmor is running without requiring aa-status
func ValidateAzureNetworkFiles ¶
ValidateAzureNetworkFiles checks that udev rules files exist.
func ValidateCollectWindowsLogsScript ¶
ValidateCollectWindowsLogsScript runs c:\k\debug\collect-windows-logs.ps1 on the node and verifies that a zip archive was produced by the script.
func ValidateCommonLinux ¶
func ValidateCommonWindows ¶
func ValidateFIPSProvider ¶
ValidateFIPSProvider verifies that FIPS is properly configured on the node:
- Kernel FIPS mode is enabled (/proc/sys/crypto/fips_enabled == 1).
- OpenSSL (3.x) has an active FIPS or SymCrypt provider loaded. The check is skipped on hosts shipping OpenSSL 1.1.x (e.g. Ubuntu 20.04 FIPS), which use the legacy FIPS module rather than the providers interface.
- /opt/cni/bin/portmap runs without panicking (regression guard for ICM 51000001009688 where the OpenSSL FIPS provider was not loaded on AzureLinux V3 FIPS nodes).
func ValidateFileExcludesContent ¶
func ValidateFileExcludesContent(ctx context.Context, s *Scenario, fileName string, contents string)
ValidateFileExcludesContent fails the test if the specified file contains the specified contents. The contents doesn't need to be surrounded by non-word characters. E.g.: searching "bcd" in "abcdef" is a match, thus the validation fails.
func ValidateFileExcludesExactContent ¶
func ValidateFileExcludesExactContent(ctx context.Context, s *Scenario, fileName string, contents string)
ValidateFileExcludesExactContent fails the test if the specified file contains the specified contents. The contents needs to be surrounded by non-word characters. E.g.: searching "bcd" in "abcdef" is not a match, thus the validation passes.
func ValidateFileExists ¶
func ValidateFileHasContent ¶
ValidateFileHasContent passes the test if the specified file contains the specified contents. The contents doesn't need to be surrounded by non-word characters. E.g.: searching "bcd" in "abcdef" is a match, thus the validation passes.
func ValidateIPTablesCompatibleWithCiliumEBPF ¶
ValidateIPTablesCompatibleWithCiliumEBPF validates that all iptables rules in each table match the provided patterns which are accounted for when eBPF host routing is enabled.
func ValidateInspektorGadget ¶
func ValidateJournalctlOutput ¶
func ValidateJournalctlOutput(ctx context.Context, s *Scenario, serviceName string, expectedContent string)
ValidateJournalctlOutput checks if specific content exists in the systemd service logs
func ValidateKernelLogs ¶
ValidateKernelLogs checks kernel logs for critical errors across multiple categories: - Kernel panics/crashes (panic, oops, call trace, BUG, etc.) - CPU lockups/stalls (soft/hard lockup, RCU stall, hung task, watchdog) - Memory issues (OOM killer, page allocation failure, memory corruption) - I/O and filesystem errors (I/O error, filesystem errors, nvme/ata/scsi errors)
func ValidateKubeletHasFlags ¶
ValidateKubeletHasFlags checks kubelet is started with the right flags and configs.
func ValidateKubeletNodeIP ¶
func ValidateLeakedSecrets ¶
func ValidateLocalDNSExporterMetrics ¶
ValidateLocalDNSExporterMetrics checks if the localdns metrics exporter is working and exports the expected VnetDNS and KubeDNS forward IP metrics.
The validation script is too large (~18KB) to send as a single command over bastion SSH tunnels which have an 8KB WebSocket buffer limit. To work around this, we encode the script in base64, upload it in small chunks via multiple SSH commands, then decode and execute it on the VM.
func ValidateLocalDNSHostsFile ¶
ValidateLocalDNSHostsFile checks that /etc/localdns/hosts contains at least one IPv4 entry for each critical FQDN. This validation approach avoids flakiness with CDN/frontdoor-backed FQDNs (like mcr.microsoft.com) whose A records can rotate between queries. We verify presence, not exact IP matching. The hosts file is populated asynchronously by the aks-localdns-hosts-setup timer/service, so we poll with a timeout.
func ValidateLocalDNSHostsPluginBypass ¶
ValidateLocalDNSHostsPluginBypass verifies that localdns serves FQDNs from /etc/localdns/hosts via the CoreDNS hosts plugin. It checks:
- The node has the kubernetes.azure.com/localdns-hosts-plugin=enabled annotation
- The Corefile has the hosts plugin configured in both VnetDNS and KubeDNS listeners
- The IPs returned by dig match the entries in /etc/localdns/hosts for the same FQDN
We intentionally do NOT assert on DNS flags (AA, RA) because CoreDNS can set these regardless of which plugin served the response.
func ValidateLocalDNSHostsPluginColdStart ¶
ValidateLocalDNSHostsPluginColdStart verifies that localdns works correctly when restarted with an empty hosts file — the exact scenario that occurs when localdns starts before aks-localdns-hosts-setup finishes resolving FQDNs.
Test flow:
- Truncate hosts file, restart localdns — CoreDNS starts fresh with empty hosts file and empty cache
- Verify critical and non-critical FQDNs resolve via fallthrough (upstream DNS)
- Populate hosts file with a canary entry (simulates aks-localdns-hosts-setup completing)
- Wait for CoreDNS reload (5s), verify canary resolves (hosts plugin picks up new file)
- Restore original hosts file and restart localdns to leave node in clean state
func ValidateLocalDNSHostsPluginIPv6 ¶
ValidateLocalDNSHostsPluginIPv6 checks that IPv6 entries in /etc/localdns/hosts are properly served by CoreDNS's hosts plugin. If the hosts file has no IPv6 entries (some FQDNs don't have AAAA records), the test is skipped gracefully.
Test flow:
- Find the first FQDN with an IPv6 entry in the hosts file
- Query localdns for AAAA records for that FQDN
- Verify the returned IPv6 addresses match the hosts file entries
func ValidateLocalDNSResolution ¶
ValidateLocalDNSResolution checks if the DNS resolution for an external domain is successful from localdns clusterlistenerIP. It uses the 'dig' command to check the DNS resolution and expects a successful response.
func ValidateLocalDNSService ¶
ValidateLocalDNSService checks if the localdns service is in the expected state (enabled or disabled).
func ValidateMIGModeEnabled ¶
func ValidateNetworkInterfaceConfig ¶
ValidateNetworkInterfaceConfig validates network interface configuration settings using ethtool. It identifies network interfaces with slot names matching the enP* pattern (same logic as the udev rule), then verifies that each interface has the expected configuration settings (e.g., rx buffer size). The nicConfig map specifies the ethtool settings to validate (key: setting name, value: expected value).
func ValidateNodeCanRunAPod ¶
func ValidateNodeExporter ¶
func ValidateNodeHasLabel ¶
ValidateNodeHasLabel checks if the node has the expected label with the expected value
func ValidatePodRunning ¶
func ValidatePubkeySSHDisabled ¶
ValidatePubkeySSHDisabled validates that SSH with private key authentication is disabled by checking sshd_config
func ValidateRuncVersion ¶
func ValidateRxBufferDefault ¶
ValidateRxBufferDefault validates rx buffer config using default values based on VM's CPU count
func ValidateSSHServiceDisabled ¶
ValidateSSHServiceDisabled validates that the SSH daemon service is disabled and stopped on the node
func ValidateScriptlessCSECmd ¶
ValidateScriptlessCSECmd checks if the node has scriptless cmd correctly enabled
func ValidateScriptlessNBCCSECmd ¶
ValidateScriptlessNBCCSECmd checks if the node has scriptless NBCCSECmd correctly enabled
func ValidateStaleCachedKubeBinariesRemoved ¶
ValidateStaleCachedKubeBinariesRemoved validates that stale versioned kube binaries (e.g. kubelet-1.29.0, kubectl-1.29.0) have been removed from /opt/bin/ after the correct version is installed.
func ValidateSysctlConfig ¶
func ValidateTaints ¶
ValidateTaints checks if the node has the expected taints that are set in the kubelet config with --register-with-taints flag
func ValidateUlimitSettings ¶
func ValidateVulnerableKernelModulesDisabled ¶
ValidateVulnerableKernelModulesDisabled verifies that kernel modules with known LPE vulnerabilities are blocked via modprobe config, not loaded, and cannot be loaded. Covers: CVE-2026-31431 (algif_aead), DirtyFrag (esp4, esp6, rxrpc). To add a new CVE mitigation, append the module name to the list below.
func ValidateWaagentLog ¶
ValidateWaagentLog checks /var/log/waagent.log for expected agent behavior: - AutoUpdate is disabled as expected - The correct version is running as ExtHandler - No errors from ExtHandler Skipped on Flatcar and OSGuard VHDs which manage WALinuxAgent independently.
Types ¶
type Bastion ¶
type Bastion struct {
// contains filtered or unexported fields
}
func NewBastion ¶
func NewBastion(credential *azidentity.AzureCLICredential, subscriptionID, resourceGroupName, dnsName string) *Bastion
type CSEProvisionTiming ¶
type CSEProvisionTiming struct {
ExitCode string `json:"ExitCode"`
ExecDuration string `json:"ExecDuration"`
KernelStartTime string `json:"KernelStartTime"`
CloudInitLocalStart string `json:"CloudInitLocalStartTime"`
CloudInitStart string `json:"CloudInitStartTime"`
CloudFinalStart string `json:"CloudFinalStartTime"`
CSEStartTime string `json:"CSEStartTime"`
GuestAgentStartTime string `json:"GuestAgentStartTime"`
SystemdSummary string `json:"SystemdSummary"`
BootDatapoints json.RawMessage `json:"BootDatapoints"`
}
CSEProvisionTiming represents the overall provisioning timing from provision.json.
type CSETaskTiming ¶
type CSETaskTiming struct {
TaskName string
StartTime time.Time
EndTime time.Time
Duration time.Duration
Message string
}
CSETaskTiming represents the timing of a single CSE task.
type CSETimingReport ¶
type CSETimingReport struct {
Tasks []CSETaskTiming
Provision *CSEProvisionTiming
// contains filtered or unexported fields
}
CSETimingReport holds all parsed timing data from a VM.
func ExtractCSETimings ¶
func ExtractCSETimings(ctx context.Context, s *Scenario) (*CSETimingReport, error)
ExtractCSETimings SSHes into the scenario VM and extracts all CSE task timings. Returns an error if no tasks could be parsed, since an empty report would make regression detection ineffective.
func ValidateCSETimings ¶
func ValidateCSETimings(ctx context.Context, s *Scenario, thresholds CSETimingThresholds) *CSETimingReport
ValidateCSETimings extracts CSE task timings from the VM, logs them, and validates against thresholds. Each threshold check runs as a t.Run() sub-test so that ADO Pipeline Analytics (via gotestsum → JUnit XML → PublishTestResults) can track individual CSE task pass/fail and duration trends over time.
func (*CSETimingReport) GetTask ¶
func (r *CSETimingReport) GetTask(name string) *CSETaskTiming
GetTask returns the timing for a specific task, or nil if not found.
func (*CSETimingReport) LogReport ¶
func (r *CSETimingReport) LogReport(_ context.Context, t interface{ Logf(string, ...any) })
LogReport logs all task timings to the test logger.
func (*CSETimingReport) TotalCSEDuration ¶
func (r *CSETimingReport) TotalCSEDuration() time.Duration
TotalCSEDuration returns the duration of the cse_start task if present.
type CSETimingThresholds ¶
type CSETimingThresholds struct {
// TaskThresholds maps task name suffixes to maximum duration.
// Task names are matched by suffix to allow flexible matching
// (e.g., "installDebPackageFromFile" matches "AKS.CSE.installkubelet.installDebPackageFromFile").
TaskThresholds map[string]time.Duration
// TotalCSEThreshold is the maximum acceptable total CSE duration.
TotalCSEThreshold time.Duration
// DefaultTaskThreshold is the threshold applied to any task that exceeds it
// but has no specific entry in TaskThresholds. This ensures that ALL slow tasks
// appear as sub-tests in ADO Pipeline Analytics, even newly added ones.
// Tasks below this threshold are silently skipped.
// Set to 0 to disable dynamic tracking.
DefaultTaskThreshold time.Duration
}
CSETimingThresholds defines maximum acceptable durations for CSE tasks.
type Cluster ¶
type Cluster struct {
Model *armcontainerservice.ManagedCluster
Kube *Kubeclient
KubeletIdentity *armcontainerservice.UserAssignedIdentity
SubnetID string
ClusterParams *ClusterParams
Bastion *Bastion
ProxyURL string
}
func (*Cluster) IsAzureCNI ¶
Returns true if the cluster is configured with Azure CNI
func (*Cluster) MaxPodsPerNode ¶
Returns the maximum number of pods per node of the cluster's agentpool
type ClusterParams ¶
type ClusterRequest ¶
ClusterRequest represents the parameters needed to create a cluster
type Config ¶
type Config struct {
// Cluster creates, updates or re-uses an AKS cluster for the scenario
Cluster func(ctx context.Context, request ClusterRequest) (*Cluster, error)
// VHD is the node image used by the scenario.
VHD *config.Image
// BootstrapConfigMutator is a function which mutates the base NodeBootstrappingConfig according to the scenario's requirements
BootstrapConfigMutator func(*Cluster, *datamodel.NodeBootstrappingConfiguration)
// AKSNodeConfigMutator if defined then aks-node-controller will be used to provision nodes
AKSNodeConfigMutator func(*Cluster, *aksnodeconfigv1.Configuration)
// VMConfigMutator is a function which mutates the base VMSS model according to the scenario's requirements
VMConfigMutator func(*armcompute.VirtualMachineScaleSet)
// CustomDataWriteFiles injects additional cloud-init write_files entries into rendered customData.
// This is for e2e-only validation scenarios.
CustomDataWriteFiles []CustomDataWriteFile
// Validator is a function where the scenario can perform any extra validation checks
Validator func(ctx context.Context, s *Scenario)
// SkipDefaultValidation is a flag to indicate whether the common validation (like spawning a pod) should be skipped.
// It shouldn't be used for majority of scenarios, currently only used for preparing VHD in a two-stage scenario
SkipDefaultValidation bool
// SkipSSHConnectivityValidation is a flag to indicate whether the ssh connectivity validation should be skipped.
// It shouldn't be used for majority of scenarios, currently only used for scenarios where the node is not expected to be reachable via ssh
SkipSSHConnectivityValidation bool
// WaitForSSHAfterReboot if set to non-zero duration, SSH connectivity validation will retry with exponential backoff
// for up to this duration when encountering reboot-related errors. This is useful for scenarios where the node
// reboots during provisioning (e.g., MIG-enabled GPU nodes). Default (zero value) means no retry.
WaitForSSHAfterReboot time.Duration
// if VHDCaching is set then a VHD will be created first for the test scenario and then a VM will be created from that VHD.
// The main purpose is to validate VHD Caching logic and ensure a reboot step between basePrep and nodePrep doesn't break anything.
VHDCaching bool
// ExpectedError, when set, indicates that VMSS creation is expected to fail with an error containing this substring.
// The assertion is performed inside the scenario's subtest.
ExpectedError string
// UseNVMe indicates whether to use NVMe-based disk placement/controller. This is required for certain VM sizes (e.g., v6 and v7 series) which only support NVMe disk controllers.
UseNVMe bool
// SkipScriptlessNBC when true prevents the automatic scriptless_nbc sub-test from being generated.
// Use this for scenarios that depend on CSE script execution (e.g., CSE timing validation)
// which is not available in scriptless mode.
SkipScriptlessNBC bool
// EagerCSETimingExtraction when true causes CSE timing events to be extracted
// immediately after SSH is established, before other validators run.
// This prevents the Guest Agent from sweeping events before they can be read.
// Only set this on CSE performance test scenarios.
EagerCSETimingExtraction bool
}
Config represents the configuration of an AgentBaker E2E scenario.
type CreateGalleryImageRequest ¶
type CreateGalleryImageRequest struct {
ResourceGroup string
GalleryName string
Location string
Arch string
Windows bool
HyperVGeneration *armcompute.HyperVGeneration
}
type CreateGalleryRequest ¶
type CustomDataWriteFile ¶
CustomDataWriteFile defines an e2e-only cloud-init write_files entry.
type GetLatestExtensionVersionRequest ¶
GetLatestExtensionVersionRequest is the cache key for VM extension version lookups.
type GetVHDRequest ¶
type Kubeclient ¶
type Kubeclient struct {
Dynamic client.Client
Typed kubernetes.Interface
RESTConfig *rest.Config
KubeConfig []byte
}
func (*Kubeclient) CreateDaemonset ¶
func (*Kubeclient) EnsureDebugDaemonsets ¶
func (k *Kubeclient) EnsureDebugDaemonsets(ctx context.Context, isNetworkIsolated bool, privateACRName string) error
this is a bit ugly, but we don't want to execute this piece concurrently with other tests
func (*Kubeclient) GetPodNetworkDebugPodForNode ¶
func (k *Kubeclient) GetPodNetworkDebugPodForNode(ctx context.Context, kubeNodeName string) (*corev1.Pod, error)
GetPodNetworkDebugPodForNode returns a pod that's a member of the 'debugnonhost' daemonset running in the cluster - this will return the name of the pod that is running on the node created for specifically for the test case which is running validation checks.
func (*Kubeclient) GetProxyURL ¶
func (k *Kubeclient) GetProxyURL(ctx context.Context) (string, error)
GetProxyURL returns the proxy URL after verifying the proxy pod is ready on at least one system pool node.
func (*Kubeclient) WaitUntilNodeReady ¶
func (*Kubeclient) WaitUntilPodRunning ¶
type Scenario ¶
type Scenario struct {
// Description is a short description of what the scenario does and tests for
Description string
// Tags are used for filtering scenarios to run based on the tags provided
Tags Tags
// Config contains the configuration of the scenario
Config
// Location is the Azure location where the scenario will run. This can be
// used to override the default location.
Location string
// K8sSystemPoolSKU is the VM size to use for the system nodepool. If empty,
// a default size will be used.
K8sSystemPoolSKU string
// Runtime contains the runtime state of the scenario. It's populated in the beginning of the test run
Runtime *ScenarioRuntime
T testing.TB
}
Scenario represents an AgentBaker E2E scenario.
func (*Scenario) GetClientPrivateKey ¶
func (*Scenario) GetContainerRegistryFQDN ¶
GetContainerRegistryFQDN returns the container registry FQDN for the cloud environment determined by the cluster's location. Uses Runtime.Cluster.Model.Location so it works for both legacy (NBC) and scriptless (AKSNodeConfig) bootstrap paths.
func (*Scenario) GetDefaultFQDNsForValidation ¶
GetDefaultFQDNsForValidation returns the public cloud FQDNs to validate in hosts file checks. AgentBaker e2e only runs in public cloud, so sovereign cloud branches are unnecessary.
func (*Scenario) GetK8sVersion ¶
func (*Scenario) GetServicePrincipalSecret ¶
func (*Scenario) GetTLSBootstrapToken ¶
func (*Scenario) HasServicePrincipalData ¶
func (*Scenario) IsHostsPluginEnabled ¶
IsHostsPluginEnabled returns true if the hosts plugin is explicitly enabled via either NBC (traditional) or AKSNodeConfig (scriptless) paths.
func (*Scenario) KubeletConfigFileEnabled ¶
func (*Scenario) PrepareAKSNodeConfig ¶
func (s *Scenario) PrepareAKSNodeConfig()
func (*Scenario) PrepareVMSSModel ¶
func (s *Scenario) PrepareVMSSModel(ctx context.Context, t testing.TB, vmss *armcompute.VirtualMachineScaleSet)
PrepareVMSSModel mutates the input VirtualMachineScaleSet based on the scenario's VMConfigMutator, if configured. This method will also use the scenario's configured VHD selector to modify the input VMSS to reference the correct VHD resource.
func (*Scenario) SecureTLSBootstrappingEnabled ¶
type ScenarioRuntime ¶
type ScenarioRuntime struct {
NBC *datamodel.NodeBootstrappingConfiguration
AKSNodeConfig *aksnodeconfigv1.Configuration
Cluster *Cluster
VM *ScenarioVM
VMSSName string
EnableScriptlessNBCCSECmd bool
CSETimingReport *CSETimingReport // eagerly extracted before GA can sweep events
}
type ScenarioVM ¶
type ScenarioVM struct {
KubeName string
VMSS *armcompute.VirtualMachineScaleSet
VM *armcompute.VirtualMachineScaleSetVM
PrivateIP string
SSHClient *ssh.Client
}
func ConfigureAndCreateVMSS ¶
func ConfigureAndCreateVMSS(ctx context.Context, s *Scenario) (*ScenarioVM, error)
func CreateVMSS ¶
func CreateVMSSWithRetry ¶
func CreateVMSSWithRetry(ctx context.Context, s *Scenario) (*ScenarioVM, error)
type Tags ¶
type Tags struct {
Name string
ImageName string
OS string
Arch string
NetworkIsolated bool
NonAnonymousACR bool
GPU bool
WASM bool
BootstrapTokenFallback bool
KubeletCustomConfig bool
Scriptless bool
VHDCaching bool
MockAzureChinaCloud bool
VMSeriesCoverageTest bool
}
func (Tags) MatchesAnyFilter ¶
MatchesAnyFilter checks if the Tags struct matches at least one of the given filters. Filters are comma-separated "key=value" pairs (e.g., "gpu=true,os=x64"). Returns true if any filter matches, false if none match. Errors on invalid input.
func (Tags) MatchesFilters ¶
MatchesFilters checks if the Tags struct matches all given filters. Filters are comma-separated "key=value" pairs (e.g., "gpu=true,os=x64"). Returns true if all filters match, false otherwise. Errors on invalid input.
type VMSizeSKURequest ¶
VMSizeSKURequest is the cache key for Resource SKU lookups by VM size and location.