instance

package
v1.7.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 12, 2026 License: Apache-2.0, Apache-2.0 Imports: 57 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// VMCreateStartMetric tracks when VM creation starts.
	//
	// STABILITY: ALPHA - This metric may change or be removed without notice.
	VMCreateStartMetric = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Namespace: metrics.Namespace,
			Subsystem: instanceSubsystem,
			Name:      "vm_create_start_total",
			Help:      "Total number of VM creation operations started.",
		},
		[]string{metrics.ImageLabel, metrics.SizeLabel, metrics.ZoneLabel, metrics.CapacityTypeLabel, metrics.NodePoolLabel},
	)

	// VMCreateFailureMetric tracks VM creation failures, regardless of phase.
	//
	// STABILITY: ALPHA - This metric may change or be removed without notice.
	VMCreateFailureMetric = prometheus.NewCounterVec(
		prometheus.CounterOpts{
			Namespace: metrics.Namespace,
			Subsystem: instanceSubsystem,
			Name:      "vm_create_failure_total",
			Help:      "Total number of VM creation failures.",
		},
		[]string{metrics.ImageLabel, metrics.SizeLabel, metrics.ZoneLabel, metrics.CapacityTypeLabel, metrics.NodePoolLabel, metrics.PhaseLabel, metrics.ErrorCodeLabel},
	)
)

We don't need to add disk specification since they are statically defined and can be traced with provided labels.

View Source
var (
	KarpCapacityTypeToVMPriority = map[string]armcompute.VirtualMachinePriorityTypes{
		karpv1.CapacityTypeSpot:     armcompute.VirtualMachinePriorityTypesSpot,
		karpv1.CapacityTypeOnDemand: armcompute.VirtualMachinePriorityTypesRegular,
	}
	VMPriorityToKarpCapacityType = map[armcompute.VirtualMachinePriorityTypes]string{
		armcompute.VirtualMachinePriorityTypesSpot:    karpv1.CapacityTypeSpot,
		armcompute.VirtualMachinePriorityTypesRegular: karpv1.CapacityTypeOnDemand,
	}
	// Note that there is no ScaleSetPriorityToKarpCapacityType because the karpenter.sh/capacity-type
	// label is the "official" label that we actually key priority off of. Selection still works though
	// because when we list instance types on-demand offerings always have v1beta1.ScaleSetPriorityRegular
	// and spot instances always have v1beta1.ScaleSetPrioritySpot, so the correct karpenter.sh/capacity-type
	// label is still selected even if the user is using kubernetes.azure.com/scalesetpriority only on the NodePool.
	VMPriorityToScaleSetPriority = map[armcompute.VirtualMachinePriorityTypes]string{
		armcompute.VirtualMachinePriorityTypesSpot:    v1beta1.ScaleSetPrioritySpot,
		armcompute.VirtualMachinePriorityTypesRegular: v1beta1.ScaleSetPriorityRegular,
	}
)
View Source
var (
	NodePoolTagKey = strings.ReplaceAll(karpv1.NodePoolLabelKey, "/", "_")
)

Functions

func AKSMachineTimestampFromTag added in v1.7.0

func AKSMachineTimestampFromTag(timestampStr string) (time.Time, error)

AKSMachineTimestampFromTag parses an AKS machine creation timestamp tag value back to time.Time

func AKSMachineTimestampToMeta added in v1.7.0

func AKSMachineTimestampToMeta(t time.Time) metav1.Time

AKSMachineTimestampToMeta converts a time.Time to metav1.Time for AKS machine creation timestamps

func AKSMachineTimestampToTag added in v1.7.0

func AKSMachineTimestampToTag(t time.Time) string

AKSMachineTimestampToTag converts a time.Time to the string format used in AKS machine creation timestamp tags

func BuildJSONFromAKSMachine added in v1.7.0

func BuildJSONFromAKSMachine(aksMachine *armcontainerservice.Machine) string

BuildJSONFromAKSMachine returns a JSON string representation of an AKS Machine for logging/debugging purposes. Returns an error string if marshaling fails.

func BuildNodeClaimFromAKSMachine added in v1.7.0

func BuildNodeClaimFromAKSMachine(ctx context.Context, aksMachine *armcontainerservice.Machine, possibleInstanceTypes []*corecloudprovider.InstanceType, aksMachineLocation string) (*karpv1.NodeClaim, error)

Expect AKS machine struct to be fully populated as if it comes from GET. Not assuming that NodeClaim exists.

func BuildNodeClaimFromAKSMachineTemplate added in v1.7.0

func BuildNodeClaimFromAKSMachineTemplate(
	ctx context.Context,
	aksMachineTemplate *armcontainerservice.Machine,
	instanceType *corecloudprovider.InstanceType,
	capacityType string,
	zone *string,
	aksMachineResourceID string,
	vmResourceID string,
	isDeleting bool,
	aksMachineNodeImageVersion string,
) (*karpv1.NodeClaim, error)

Note that the template is not guaranteed to have status fields, thus they are made explicit here. Other Karpenter-level fields are also included as they may be easily retrieved during templating phase. Not assuming that NodeClaim exists.

func ConfigureAKSMachineTags added in v1.7.0

func ConfigureAKSMachineTags(opts *options.Options, nodeClass *v1beta1.AKSNodeClass, nodeClaim *karpv1.NodeClaim, creationTimestamp time.Time) map[string]*string

ConfigureAKSMachineTags returns the tags to be applied to AKS machine instances and their affiliated resources. This includes all standard tags plus the AKS machine distinguishing tag.

func ConvertToVirtualMachineIdentity

func ConvertToVirtualMachineIdentity(nodeIdentities []string) *armcompute.VirtualMachineIdentity

func CreateVirtualMachine

func CreateVirtualMachine(ctx context.Context, client VirtualMachinesAPI, rg, vmName string, vm armcompute.VirtualMachine) (*armcompute.VirtualMachine, error)

func ErrorCodeForMetrics added in v1.6.6

func ErrorCodeForMetrics(err error) string

ErrorCodeForMetrics extracts a stable Azure error code for metric labeling when possible.

func FindNodePoolFromAKSMachine added in v1.7.0

func FindNodePoolFromAKSMachine(ctx context.Context, aksMachine *armcontainerservice.Machine, kubeClient client.Client) (*karpv1.NodePool, error)

May return apimachinery.NotFoundError if NodePool is not found.

func GenerateResourceName

func GenerateResourceName(nodeClaimName string) string

E.g., aks-default-2jf98

func GetAKSLabelZoneFromAKSMachine added in v1.7.0

func GetAKSLabelZoneFromAKSMachine(aksMachine *armcontainerservice.Machine, location string) (string, error)

GetAKSLabelZoneFromAKSMachine returns the zone for the given AKS machine, or an empty string if there is no zone specified This function is analogous to utils.GetAKSLabelZoneFromVM but for AKS machines

func GetAKSMachineNameFromNodeClaim added in v1.7.0

func GetAKSMachineNameFromNodeClaim(nodeClaim *karpv1.NodeClaim) (string, bool)

GetAKSMachineNameFromNodeClaim extracts the AKS machine name from the NodeClaim annotations Returns false if the annotation is not present or the value is empty, which can indicate that the NodeClaim is not associated with an AKS machine.

func GetAKSMachineNameFromNodeClaimName added in v1.7.0

func GetAKSMachineNameFromNodeClaimName(nodeClaimName string) (string, error)

ASSUMPTION: NodeClaim name is in the format of <NodePool name>-<hash suffix> If total length exceeds AKS machine name limit, the exceeded part will be replaced with another deterministic hash. E.g., "thisisalongnodepoolname-a1b2c" --> "thisisalongnoz9y8x7-a1b2c"

func GetAKSMachineNameFromVMName added in v1.7.0

func GetAKSMachineNameFromVMName(aksMachinesPoolName, vmName string) (string, error)

vmName = aks-<machinesPoolName>-<aksMachineName>-########-vm This is distinguishable from VM instance name as its suffix will always be 5 alphanumerics rather than "vm"

func GetCapacityTypeFromVM added in v1.6.6

func GetCapacityTypeFromVM(vm *armcompute.VirtualMachine) string

func GetManagedExtensionNames added in v1.6.2

func GetManagedExtensionNames(provisionMode string, env *auth.Environment) []string

GetManagedExtensionNames gets the names of the VM extensions managed by Karpenter. This is a set of 1 or 2 extensions (depending on provisionMode): aksIdentifyingExtension and (sometimes) cse.

func GetNICListQueryBuilder added in v0.6.2

func GetNICListQueryBuilder(rg string) *kql.Builder

GetNICListQueryBuilder returns a KQL query builder for listing NICs with nodepool tags

func GetScaleSetPriorityLabelFromVM added in v1.7.0

func GetScaleSetPriorityLabelFromVM(vm *armcompute.VirtualMachine) string

func GetVMListQueryBuilder added in v0.6.2

func GetVMListQueryBuilder(rg string) *kql.Builder

GetVMListQueryBuilder returns a KQL query builder for listing VMs with nodepool tags

func IsAKSMachineOrMachinesPoolNotFound added in v1.6.8

func IsAKSMachineOrMachinesPoolNotFound(err error) bool

func NewAKSMachineTimestamp added in v1.7.0

func NewAKSMachineTimestamp() time.Time

NewAKSMachineTimestamp returns the current time truncated to centisecond precision for AKS machine creation timestamps

func NewQueryRequest

func NewQueryRequest(subscriptionID *string, query string) *arg.QueryRequest

func UpdateVirtualMachine

func UpdateVirtualMachine(ctx context.Context, client VirtualMachinesAPI, rg, vmName string, updates armcompute.VirtualMachineUpdate) error

func ZeroAKSMachineTimestamp added in v1.7.0

func ZeroAKSMachineTimestamp() time.Time

Types

type AKSAgentPoolsAPI added in v1.6.8

type AKSAgentPoolsAPI interface {
	Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.AgentPoolsClientGetOptions) (armcontainerservice.AgentPoolsClientGetResponse, error)
	BeginDeleteMachines(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachines armcontainerservice.AgentPoolDeleteMachinesParameter, options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error)
}

func NewNoAKSAgentPoolsClient added in v1.6.8

func NewNoAKSAgentPoolsClient() AKSAgentPoolsAPI

NewNoAKSAgentPoolsClient creates a new dry AKS agent pools client, attempting to create real client internally

type AKSMachinePromise added in v1.7.0

type AKSMachinePromise struct {
	AKSMachineTemplate *armcontainerservice.Machine
	AKSMachineName     string
	InstanceType       *corecloudprovider.InstanceType // Despite the reference nature, this is guaranteed to exist
	CapacityType       string
	Zone               string

	AKSMachineID               string
	AKSMachineNodeImageVersion string
	VMResourceID               string
	// contains filtered or unexported fields
}

Notes on terminology: An "instance" is a remote object, created by the API based on the template. A "template" is a local struct, populated from Karpenter-provided parameters with the logic further below. A "template" shares the struct with an "instance" representation. But read-only fields may not be populated. Ideally, the types should have been separated to avoid making cross-module assumption of the existence of certain fields.

TODO: Consider extracting the template-related fields (AKSMachineTemplate, AKSMachineName, InstanceType, CapacityType, Zone, AKSMachineID, AKSMachineNodeImageVersion, VMResourceID) into a dedicated struct (e.g., AKSMachineDetails or AKSMachineTemplateInfo). This would clarify the relationship between the promise fields and functions like BuildNodeClaimFromAKSMachineTemplate, as well as reduce the number of loose arguments passed around. More discussion: https://github.com/Azure/karpenter-provider-azure/pull/1197#discussion_r2482957255

func NewAKSMachinePromise added in v1.7.0

func NewAKSMachinePromise(
	providerRef AKSMachineProvider,
	aksMachineTemplate *armcontainerservice.Machine,
	waitFunc func() error,
	aksMachineName string,
	instanceType *corecloudprovider.InstanceType,
	capacityType string,
	zone string,
	aksMachineID string,
	aksMachineNodeImageVersion string,
	vmResourceID string,
) *AKSMachinePromise

func (*AKSMachinePromise) Cleanup added in v1.7.0

func (p *AKSMachinePromise) Cleanup(ctx context.Context) error

func (*AKSMachinePromise) GetInstanceName added in v1.7.0

func (p *AKSMachinePromise) GetInstanceName() string

func (*AKSMachinePromise) Wait added in v1.7.0

func (p *AKSMachinePromise) Wait() error

type AKSMachineProvider added in v1.7.0

type AKSMachineProvider interface {
	// BeginCreate starts the creation of an AKS machine instance.
	// Returns a promise that must be waited on to complete the creation.
	BeginCreate(ctx context.Context, nodeClass *v1beta1.AKSNodeClass, nodeClaim *karpv1.NodeClaim, instanceTypes []*corecloudprovider.InstanceType) (*AKSMachinePromise, error)
	// Update updates the AKS machine instance with the specified name. Uses ETag for optimistic concurrency control.
	// Return NodeClaimNotFoundError if not found.
	Update(ctx context.Context, aksMachineName string, aksMachine armcontainerservice.Machine, etag *string) error
	// Get retrieves the AKS machine instance with the specified AKS machine name. Return NodeClaimNotFoundError if not found.
	Get(ctx context.Context, aksMachineName string) (*armcontainerservice.Machine, error)
	// List lists all AKS machine instances in the cluster.
	List(ctx context.Context) ([]*armcontainerservice.Machine, error)
	// Delete deletes the AKS machine instance with the specified name. Return NodeClaimNotFoundError if not found.
	Delete(ctx context.Context, aksMachineName string) error
	// GetMachinesPoolLocation returns the location of the AKS machines pool. The only reason this need to be exported is because armcontainerservice.Machine does not have the location field.
	GetMachinesPoolLocation() string
}

type AKSMachinesAPI added in v1.6.8

type AKSMachinesAPI interface {
	BeginCreateOrUpdate(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, parameters armcontainerservice.Machine, options *armcontainerservice.MachinesClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcontainerservice.MachinesClientCreateOrUpdateResponse], error)
	Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, options *armcontainerservice.MachinesClientGetOptions) (armcontainerservice.MachinesClientGetResponse, error)
	NewListPager(resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.MachinesClientListOptions) *runtime.Pager[armcontainerservice.MachinesClientListResponse]
}

func NewNoAKSMachinesClient added in v1.6.8

func NewNoAKSMachinesClient() AKSMachinesAPI

type AZClient

type AZClient struct {
	NodeImageVersionsClient imagefamilytypes.NodeImageVersionsAPI
	ImageVersionsClient     imagefamilytypes.CommunityGalleryImageVersionsAPI
	NodeBootstrappingClient imagefamilytypes.NodeBootstrappingAPI
	// SKU CLIENT is still using track 1 because skewer does not support the track 2 path. We need to refactor this once skewer supports track 2
	SKUClient                   skewer.ResourceClient
	LoadBalancersClient         loadbalancer.LoadBalancersAPI
	NetworkSecurityGroupsClient networksecuritygroup.API
	SubscriptionsClient         zone.SubscriptionsAPI
	// contains filtered or unexported fields
}

TODO: Move this to another package that more correctly reflects its usage across multiple providers

func NewAZClient

func NewAZClient(ctx context.Context, cfg *auth.Config, env *auth.Environment, cred azcore.TokenCredential) (*AZClient, error)

func NewAZClientFromAPI

func NewAZClientFromAPI(
	virtualMachinesClient VirtualMachinesAPI,
	azureResourceGraphClient AzureResourceGraphAPI,
	aksMachinesClient AKSMachinesAPI,
	agentPoolsClient AKSAgentPoolsAPI,
	virtualMachinesExtensionClient VirtualMachineExtensionsAPI,
	interfacesClient NetworkInterfacesAPI,
	subnetsClient SubnetsAPI,
	loadBalancersClient loadbalancer.LoadBalancersAPI,
	networkSecurityGroupsClient networksecuritygroup.API,
	imageVersionsClient imagefamilytypes.CommunityGalleryImageVersionsAPI,
	nodeImageVersionsClient imagefamilytypes.NodeImageVersionsAPI,
	nodeBootstrappingClient imagefamilytypes.NodeBootstrappingAPI,
	skuClient skewer.ResourceClient,
	subscriptionsClient zone.SubscriptionsAPI,
) *AZClient

func (*AZClient) SubnetsClient added in v1.6.2

func (c *AZClient) SubnetsClient() SubnetsAPI

type DefaultAKSMachineProvider added in v1.7.0

type DefaultAKSMachineProvider struct {
	// contains filtered or unexported fields
}

func NewAKSMachineProvider added in v1.7.0

func NewAKSMachineProvider(
	azClient *AZClient,
	instanceTypeProvider instancetype.Provider,
	imageResolver imagefamily.Resolver,
	offeringsCache *cache.UnavailableOfferings,
	subscriptionID string,
	clusterResourceGroup string,
	clusterName string,
	aksMachinesPoolName string,
	aksMachinesPoolLocation string,
) *DefaultAKSMachineProvider

func (*DefaultAKSMachineProvider) BeginCreate added in v1.7.0

func (p *DefaultAKSMachineProvider) BeginCreate(
	ctx context.Context,
	nodeClass *v1beta1.AKSNodeClass,
	nodeClaim *karpv1.NodeClaim,
	instanceTypes []*corecloudprovider.InstanceType,
) (*AKSMachinePromise, error)

BeginCreate creates an instance given the constraints. Note that the returned instance may not be finished provisioning yet. Errors that occur on the "sync side" of the VM create, such as BadRequest due to invalid user input, and similar, will have the error returned here. Errors that occur on the "async side" of the VM create (after the request is accepted) will be returned from AKSMachinePromise.Wait().

func (*DefaultAKSMachineProvider) Delete added in v1.7.0

func (p *DefaultAKSMachineProvider) Delete(ctx context.Context, aksMachineName string) error

func (*DefaultAKSMachineProvider) Get added in v1.7.0

ASSUMPTION: the AKS machine will be in the current p.aksMachinesPoolName. Otherwise need rework to pass the pool name in.

func (*DefaultAKSMachineProvider) GetMachinesPoolLocation added in v1.7.0

func (p *DefaultAKSMachineProvider) GetMachinesPoolLocation() string

func (*DefaultAKSMachineProvider) List added in v1.7.0

func (*DefaultAKSMachineProvider) Update added in v1.7.0

func (p *DefaultAKSMachineProvider) Update(ctx context.Context, aksMachineName string, aksMachine armcontainerservice.Machine, etag *string) error

type DefaultVMProvider added in v1.6.6

type DefaultVMProvider struct {
	// contains filtered or unexported fields
}

func NewDefaultVMProvider added in v1.6.6

func NewDefaultVMProvider(
	azClient *AZClient,
	instanceTypeProvider instancetype.Provider,
	launchTemplateProvider *launchtemplate.Provider,
	loadBalancerProvider *loadbalancer.Provider,
	networkSecurityGroupProvider *networksecuritygroup.Provider,
	offeringsCache *cache.UnavailableOfferings,
	location string,
	resourceGroup string,
	subscriptionID string,
	provisionMode string,
	diskEncryptionSetID string,
	env *auth.Environment,
) *DefaultVMProvider

func (*DefaultVMProvider) BeginCreate added in v1.6.6

func (p *DefaultVMProvider) BeginCreate(
	ctx context.Context,
	nodeClass *v1beta1.AKSNodeClass,
	nodeClaim *karpv1.NodeClaim,
	instanceTypes []*corecloudprovider.InstanceType,
) (*VirtualMachinePromise, error)

BeginCreate creates an instance given the constraints. instanceTypes should be sorted by priority for spot capacity type. Note that the returned instance may not be finished provisioning yet. Errors that occur on the "sync side" of the VM create, such as quota/capacity, BadRequest due to invalid user input, and similar, will have the error returned here. Errors that occur on the "async side" of the VM create (after the request is accepted) will be returned from VirtualMachinePromise.Wait().

func (*DefaultVMProvider) Delete added in v1.6.6

func (p *DefaultVMProvider) Delete(ctx context.Context, resourceName string) error

func (*DefaultVMProvider) DeleteNic added in v1.6.6

func (p *DefaultVMProvider) DeleteNic(ctx context.Context, nicName string) error

func (*DefaultVMProvider) Get added in v1.6.6

func (*DefaultVMProvider) GetNic added in v1.6.6

func (p *DefaultVMProvider) GetNic(ctx context.Context, rg, nicName string) (*armnetwork.Interface, error)

func (*DefaultVMProvider) List added in v1.6.6

func (*DefaultVMProvider) ListNics added in v1.6.6

func (p *DefaultVMProvider) ListNics(ctx context.Context) ([]*armnetwork.Interface, error)

ListNics returns all network interfaces in the resource group that have the nodepool tag

func (*DefaultVMProvider) Update added in v1.6.6

Update updates the VM with the given updates. If Tags are specified, the tags are also updated on the associated network interface and VM extensions. Note that this means that this method can fail if the extensions have not been created yet. It is expected that the caller handles this and retries the update to propagate the tags to the extensions once they're created.

type NetworkInterfacesAPI

type NetworkInterfacesAPI interface {
	BeginCreateOrUpdate(ctx context.Context, resourceGroupName string, networkInterfaceName string, parameters armnetwork.Interface, options *armnetwork.InterfacesClientBeginCreateOrUpdateOptions) (*runtime.Poller[armnetwork.InterfacesClientCreateOrUpdateResponse], error)
	BeginDelete(ctx context.Context, resourceGroupName string, networkInterfaceName string, options *armnetwork.InterfacesClientBeginDeleteOptions) (*runtime.Poller[armnetwork.InterfacesClientDeleteResponse], error)
	Get(ctx context.Context, resourceGroupName string, networkInterfaceName string, options *armnetwork.InterfacesClientGetOptions) (armnetwork.InterfacesClientGetResponse, error)
	UpdateTags(ctx context.Context, resourceGroupName string, networkInterfaceName string, tags armnetwork.TagsObject, options *armnetwork.InterfacesClientUpdateTagsOptions) (armnetwork.InterfacesClientUpdateTagsResponse, error)
}

type Promise added in v1.6.6

type Promise interface {
	// Cleanup removes the instance from the cloud provider.
	Cleanup(ctx context.Context) error
	// Wait blocks until the instance is ready.
	Wait() error
	// GetInstanceName returns the name of the instance. Recommended to be used for logging only due to generic nature.
	GetInstanceName() string
}

Intended for lifecycle handling on the higher abstractions.

type Resource

type Resource = map[string]interface{}

func GetResourceData

func GetResourceData(ctx context.Context, client AzureResourceGraphAPI, req arg.QueryRequest) ([]Resource, error)

Queries Azure Resource Graph using Resources() and returns a list of all pages of data.

type SubnetsAPI added in v1.6.2

type SubnetsAPI interface {
	Get(ctx context.Context, resourceGroupName string, virtualNetworkName string, subnetName string, options *armnetwork.SubnetsClientGetOptions) (armnetwork.SubnetsClientGetResponse, error)
}

type VirtualMachinePromise added in v1.4.0

type VirtualMachinePromise struct {
	VM       *armcompute.VirtualMachine
	WaitFunc func() error
	// contains filtered or unexported fields
}

func (*VirtualMachinePromise) Cleanup added in v1.6.6

func (p *VirtualMachinePromise) Cleanup(ctx context.Context) error

func (*VirtualMachinePromise) GetInstanceName added in v1.6.6

func (p *VirtualMachinePromise) GetInstanceName() string

func (*VirtualMachinePromise) Wait added in v1.4.0

func (p *VirtualMachinePromise) Wait() error

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL