Documentation
¶
Index ¶
- Variables
- func AiStudioDeploymentLink(tenantId string, subscriptionId string, resourceGroup string, ...) string
- func AiStudioWorkspaceLink(tenantId string, subscriptionId string, resourceGroup string, ...) string
- func IsFinetuneUsageName(usageName string) bool
- func ModelHasDefaultVersion(model AiModel) bool
- func ParseConfig[T comparable](config any) (*T, error)
- func ResolveCapacity(sku AiModelSku, preferred *int32) int32
- func ResolveCapacityWithQuota(sku AiModelSku, preferred *int32, remaining float64) (int32, bool)
- type AiModel
- type AiModelDeployment
- type AiModelService
- func (s *AiModelService) FilterModelsByQuotaAcrossLocations(ctx context.Context, subscriptionId string, models []AiModel, ...) ([]AiModel, error)
- func (s *AiModelService) ListFilteredModels(ctx context.Context, subscriptionId string, options *FilterOptions) ([]AiModel, error)
- func (s *AiModelService) ListLocations(ctx context.Context, subscriptionId string) ([]string, error)
- func (s *AiModelService) ListLocationsWithQuota(ctx context.Context, subscriptionId string, allowedLocations []string, ...) ([]string, error)
- func (s *AiModelService) ListModelLocationsWithQuota(ctx context.Context, subscriptionId string, modelName string, ...) ([]ModelLocationQuota, error)
- func (s *AiModelService) ListModelSkus(ctx context.Context, subscriptionId string, modelName string, location string, ...) ([]AiModelSku, error)
- func (s *AiModelService) ListModelVersions(ctx context.Context, subscriptionId string, modelName string, location string) ([]AiModelVersion, string, error)
- func (s *AiModelService) ListModels(ctx context.Context, subscriptionId string, locations []string) ([]AiModel, error)
- func (s *AiModelService) ListUsages(ctx context.Context, subscriptionId string, location string) ([]AiModelUsage, error)
- func (s *AiModelService) ResolveModelDeployments(ctx context.Context, subscriptionId string, modelName string, ...) ([]AiModelDeployment, error)
- func (s *AiModelService) ResolveModelDeploymentsWithQuota(ctx context.Context, subscriptionId string, modelName string, ...) ([]AiModelDeployment, error)
- type AiModelSku
- type AiModelUsage
- type AiModelVersion
- type ComponentConfig
- type DeploymentConfig
- type DeploymentOptions
- type EndpointDeploymentConfig
- type FilterOptions
- type Flow
- type ModelLocationQuota
- type PythonBridge
- type QuotaCheckOptions
- type QuotaRequirement
- type Scope
- type ScriptPath
Constants ¶
This section is empty.
Variables ¶
var ( // ErrQuotaLocationRequired indicates quota checks were requested without exactly one location. ErrQuotaLocationRequired = errors.New("quota checking requires exactly one location") // ErrModelNotFound indicates the requested model was not found in the effective model catalog. ErrModelNotFound = errors.New("model not found") // ErrNoDeploymentMatch indicates no deployment candidate matched provided filters/constraints. ErrNoDeploymentMatch = errors.New("no deployment match") )
Functions ¶
func AiStudioDeploymentLink ¶
func AiStudioDeploymentLink( tenantId string, subscriptionId string, resourceGroup string, workspaceName string, endpointName string, deploymentName string, ) string
AzureAiStudioDeploymentLink returns a link to the Azure AI Studio deployment page
func AiStudioWorkspaceLink ¶
func AiStudioWorkspaceLink(tenantId string, subscriptionId string, resourceGroup string, workspaceName string) string
AiStudioWorkspaceLink returns a link to the Azure AI Studio workspace page
func IsFinetuneUsageName ¶
IsFinetuneUsageName reports whether the given usage name represents a fine-tune SKU. Fine-tune usage names end with "-finetune" (case-insensitive).
func ModelHasDefaultVersion ¶
ModelHasDefaultVersion returns true if any version of the model is marked as default.
func ParseConfig ¶
func ParseConfig[T comparable](config any) (*T, error)
ParseConfig parses a config from a generic interface.
func ResolveCapacity ¶
func ResolveCapacity(sku AiModelSku, preferred *int32) int32
ResolveCapacity resolves the deployment capacity for a SKU. If preferred is set and valid within the SKU's min/max/step constraints, it's used. Otherwise falls back to the SKU's default capacity.
func ResolveCapacityWithQuota ¶ added in v1.23.14
func ResolveCapacityWithQuota(sku AiModelSku, preferred *int32, remaining float64) (int32, bool)
ResolveCapacityWithQuota resolves the deployment capacity for a SKU while considering remaining quota. If preferred is set, it must fit within the remaining quota or resolution fails. When preferred is unset and the default capacity exceeds remaining quota, it falls back to the highest valid capacity within the SKU constraints that still fits in the remaining quota.
Types ¶
type AiModel ¶
type AiModel struct {
// Name is the model name, e.g. "gpt-4o".
Name string
// Format is the model format, e.g. "OpenAI".
Format string
// LifecycleStatus is the model lifecycle status, e.g. "preview", "stable".
LifecycleStatus string
// Capabilities lists the model's capabilities, e.g. ["chat", "embeddings"].
Capabilities []string
// Versions lists the available versions of this model.
Versions []AiModelVersion
// Locations lists the Azure locations where this model is available.
Locations []string
}
AiModel represents an AI model available in the Azure Cognitive Services catalog. It is SDK-agnostic and decoupled from armcognitiveservices types.
func FilterModels ¶
func FilterModels(models []AiModel, options *FilterOptions) []AiModel
FilterModels applies FilterOptions to a list of models.
func FilterModelsByQuota ¶
func FilterModelsByQuota( models []AiModel, usages []AiModelUsage, minRemaining float64, ) []AiModel
FilterModelsByQuota cross-references models' SKU usage names against usage data to filter out models without sufficient remaining capacity.
type AiModelDeployment ¶
type AiModelDeployment struct {
// ModelName is the model name, e.g. "gpt-4o".
ModelName string
// Format is the model format, e.g. "OpenAI".
Format string
// Version is the model version, e.g. "2024-05-13".
Version string
// Location is the Azure location for this deployment.
Location string
// Sku is the selected SKU for this deployment.
Sku AiModelSku
// Capacity is the resolved deployment capacity in units.
// Resolved from: DeploymentOptions.Capacity → Sku.DefaultCapacity → 0 (caller must handle).
Capacity int32
// RemainingQuota is the subscription quota remaining at this location for this SKU.
// Only populated when a quota check is performed. nil means no quota check was done.
RemainingQuota *float64
}
AiModelDeployment is a fully resolved deployment configuration.
Capacity vs Quota:
- Capacity is deployment-level: how many units this specific deployment will consume.
- RemainingQuota is subscription-level: how much total capacity remains at this location for this SKU across all deployments (limit - current_value from usage API).
Constraint: Capacity must be <= RemainingQuota for the deployment to succeed.
type AiModelService ¶
type AiModelService struct {
// contains filtered or unexported fields
}
AiModelService provides operations for querying AI model availability, resolving deployments, and checking quota/usage from Azure Cognitive Services.
func NewAiModelService ¶
func NewAiModelService( azureClient *azapi.AzureClient, subManager *account.SubscriptionsManager, ) *AiModelService
NewAiModelService creates a new AiModelService.
func (*AiModelService) FilterModelsByQuotaAcrossLocations ¶
func (s *AiModelService) FilterModelsByQuotaAcrossLocations( ctx context.Context, subscriptionId string, models []AiModel, locations []string, minRemaining float64, ) ([]AiModel, error)
FilterModelsByQuotaAcrossLocations filters models to those having sufficient quota in at least one location. When locations is empty, model-declared locations are used.
func (*AiModelService) ListFilteredModels ¶
func (s *AiModelService) ListFilteredModels( ctx context.Context, subscriptionId string, options *FilterOptions, ) ([]AiModel, error)
ListFilteredModels fetches and filters AI models based on the provided criteria.
func (*AiModelService) ListLocations ¶
func (s *AiModelService) ListLocations( ctx context.Context, subscriptionId string, ) ([]string, error)
ListLocations returns AI Services-supported location names that can be used for model queries.
func (*AiModelService) ListLocationsWithQuota ¶
func (s *AiModelService) ListLocationsWithQuota( ctx context.Context, subscriptionId string, allowedLocations []string, requirements []QuotaRequirement, ) ([]string, error)
ListLocationsWithQuota returns locations with sufficient quota for all given requirements. When allowedLocations are provided, they are intersected with AI Services-supported locations to avoid querying locations where AI Services are not available.
func (*AiModelService) ListModelLocationsWithQuota ¶
func (s *AiModelService) ListModelLocationsWithQuota( ctx context.Context, subscriptionId string, modelName string, allowedLocations []string, minRemaining float64, ) ([]ModelLocationQuota, error)
ListModelLocationsWithQuota returns model locations that have sufficient remaining quota. MaxRemainingQuota is the max remaining quota across the model's SKU usage names in each location where usage data exists.
func (*AiModelService) ListModelSkus ¶
func (s *AiModelService) ListModelSkus( ctx context.Context, subscriptionId string, modelName string, location string, version string, ) ([]AiModelSku, error)
ListModelSkus returns available SKUs for a model+version at a location.
func (*AiModelService) ListModelVersions ¶
func (s *AiModelService) ListModelVersions( ctx context.Context, subscriptionId string, modelName string, location string, ) ([]AiModelVersion, string, error)
ListModelVersions returns available versions for a specific model at a location.
func (*AiModelService) ListModels ¶
func (s *AiModelService) ListModels( ctx context.Context, subscriptionId string, locations []string, ) ([]AiModel, error)
ListModels fetches AI models from the Azure Cognitive Services catalog. If locations is empty, fetches across all subscription locations in parallel.
func (*AiModelService) ListUsages ¶
func (s *AiModelService) ListUsages( ctx context.Context, subscriptionId string, location string, ) ([]AiModelUsage, error)
ListUsages returns quota/usage data for a location.
func (*AiModelService) ResolveModelDeployments ¶
func (s *AiModelService) ResolveModelDeployments( ctx context.Context, subscriptionId string, modelName string, options *DeploymentOptions, ) ([]AiModelDeployment, error)
ResolveModelDeployments returns all valid deployment configurations for the given model. Returns multiple candidates when multiple version/SKU/location combos are valid. Capacity resolution: options.Capacity → SKU default → 0 (caller must handle).
func (*AiModelService) ResolveModelDeploymentsWithQuota ¶
func (s *AiModelService) ResolveModelDeploymentsWithQuota( ctx context.Context, subscriptionId string, modelName string, options *DeploymentOptions, quotaOpts *QuotaCheckOptions, ) ([]AiModelDeployment, error)
ResolveModelDeploymentsWithQuota resolves deployments and filters by quota. Skips SKUs where resolved capacity exceeds remaining quota. Populates RemainingQuota on results.
type AiModelSku ¶
type AiModelSku struct {
// Name is the SKU name, e.g. "GlobalStandard", "Standard".
Name string
// UsageName is the quota usage name used to join with usage/quota data,
// e.g. "OpenAI.Standard.gpt-4o".
UsageName string
// DefaultCapacity is the suggested deployment capacity (0 if unavailable).
DefaultCapacity int32
// MinCapacity is the minimum allowed deployment capacity.
MinCapacity int32
// MaxCapacity is the maximum allowed deployment capacity.
MaxCapacity int32
// CapacityStep is the capacity increment granularity.
CapacityStep int32
}
AiModelSku represents a deployment SKU with its capacity constraints.
type AiModelUsage ¶
type AiModelUsage struct {
// Name is the quota usage name, e.g. "OpenAI.Standard.gpt-4o".
Name string
// CurrentValue is the amount of quota currently consumed.
CurrentValue float64
// Limit is the total quota limit for this usage name.
Limit float64
}
AiModelUsage represents a subscription-level quota/usage entry for a specific model SKU at a location.
type AiModelVersion ¶
type AiModelVersion struct {
// Version is the version string, e.g. "2024-05-13".
Version string
// IsDefault indicates whether this is the default version.
IsDefault bool
// Skus lists the available SKUs for this version.
Skus []AiModelSku
}
AiModelVersion represents a specific version of an AI model.
type ComponentConfig ¶
type ComponentConfig struct {
Name osutil.ExpandableString `yaml:"name,omitempty"`
Path string `yaml:"path,omitempty"`
Overrides map[string]osutil.ExpandableString `yaml:"overrides,omitempty"`
}
ComponentConfig is a base configuration structure used by multiple AI components
type DeploymentConfig ¶
type DeploymentConfig struct {
ComponentConfig `yaml:",inline"`
// A map of environment variables to set for the deployment
Environment osutil.ExpandableMap `yaml:"environment,omitempty"`
}
type DeploymentOptions ¶
type DeploymentOptions struct {
// Locations lists preferred locations. If empty, location is left unset on results.
Locations []string
// Versions lists preferred versions. If empty, all versions are included.
Versions []string
// Skus lists preferred SKU names, e.g. ["GlobalStandard", "Standard"]. If empty, all SKUs are included.
Skus []string
// Capacity is the preferred deployment capacity. If set and valid
// (within min/max, aligned to step), used directly. If nil, uses SKU default.
Capacity *int32
// IncludeFinetuneSkus controls whether fine-tune SKUs (usage names ending with
// "-finetune") are included. Defaults to false (excluded).
IncludeFinetuneSkus bool
}
DeploymentOptions specifies preferences for resolving a model deployment. All fields are optional filters. When empty, no filtering is applied for that dimension.
type EndpointDeploymentConfig ¶
type EndpointDeploymentConfig struct {
Workspace osutil.ExpandableString `yaml:"workspace,omitempty"`
Environment *ComponentConfig `yaml:"environment,omitempty"`
Model *ComponentConfig `yaml:"model,omitempty"`
Flow *ComponentConfig `yaml:"flow,omitempty"`
Deployment *DeploymentConfig `yaml:"deployment,omitempty"`
}
EndpointDeploymentConfig is a configuration structure for an ML online endpoint deployment
type FilterOptions ¶
type FilterOptions struct {
// Locations filters to models available at these locations.
Locations []string
// Capabilities filters by model capabilities, e.g. ["chat", "embeddings"].
Capabilities []string
// Formats filters by model format, e.g. ["OpenAI"].
Formats []string
// Statuses filters by lifecycle status, e.g. ["preview", "stable"].
Statuses []string
// ExcludeModelNames excludes models by name (for multi-model selection flows).
ExcludeModelNames []string
}
FilterOptions specifies criteria for filtering AI models.
type Flow ¶
type Flow struct {
Name string `json:"name"`
Description string `json:"description"`
Type string `json:"type"`
Path string `json:"path"`
DisplayName string `json:"display_name"`
Tags map[string]string `json:"tags"`
}
Flow is a configuration to defined a Prompt flow component
type ModelLocationQuota ¶
type ModelLocationQuota struct {
// Location is the Azure location name.
Location string
// MaxRemainingQuota is the maximum remaining quota across model SKUs with usage entries.
MaxRemainingQuota float64
}
ModelLocationQuota represents model quota availability in a specific location.
type PythonBridge ¶
type PythonBridge interface {
Initialize(ctx context.Context) error
RequiredExternalTools(ctx context.Context) []tools.ExternalTool
Run(ctx context.Context, scriptName ScriptPath, args ...string) (*exec.RunResult, error)
}
PythonBridge is an interface to execute python components from the embedded AI resources project
func NewPythonBridge ¶
func NewPythonBridge( azdCtx *azdcontext.AzdContext, pythonCli *python.Cli, ) PythonBridge
NewPythonBridge creates a new PythonBridge instance
type QuotaCheckOptions ¶
type QuotaCheckOptions struct {
// MinRemainingCapacity is the minimum remaining quota required per SKU.
// Models/deployments where no SKU meets this threshold are excluded.
// 0 means "any remaining > 0" (i.e. not fully exhausted).
MinRemainingCapacity float64
}
QuotaCheckOptions enables quota-aware model/deployment selection. When provided, the service fetches usage data alongside the model catalog and cross-references via AiModelSku.UsageName == AiModelUsage.Name.
type QuotaRequirement ¶
type QuotaRequirement struct {
// UsageName is the quota usage name to check, e.g. "OpenAI.Standard.gpt-4o".
UsageName string
// MinCapacity is the minimum remaining capacity needed. If 0, defaults to 1.
MinCapacity float64
}
QuotaRequirement specifies a single quota check: the usage name to check and the minimum remaining capacity needed.
type Scope ¶
type Scope struct {
// contains filtered or unexported fields
}
Scope is a context based structure to define the Azure scope of a AI component
func (*Scope) ResourceGroup ¶
ResourceGroup returns the resource group from the scope
func (*Scope) SubscriptionId ¶
SubscriptionId returns the subscription ID from the scope
type ScriptPath ¶
type ScriptPath string
ScriptPath is a type to represent the path of a Python script
const ( // PromptFlowClient is the path to the PromptFlow Client Python script PromptFlowClient ScriptPath = "pf_client.py" // MLClient is the path to the ML Client Python script MLClient ScriptPath = "ml_client.py" )