Documentation
¶
Overview ¶
These APIs allow you to manage Serving Endpoints, Serving Endpoints Data Plane, etc.
Index ¶
- type Ai21LabsConfig
- type AiGatewayConfig
- type AiGatewayGuardrailParameters
- type AiGatewayGuardrailPiiBehavior
- type AiGatewayGuardrailPiiBehaviorBehavior
- type AiGatewayGuardrails
- type AiGatewayInferenceTableConfig
- type AiGatewayRateLimit
- type AiGatewayRateLimitKey
- type AiGatewayRateLimitRenewalPeriod
- type AiGatewayUsageTrackingConfig
- type AmazonBedrockConfig
- type AmazonBedrockConfigBedrockProvider
- type AnthropicConfig
- type ApiKeyAuth
- type AutoCaptureConfigInput
- type AutoCaptureConfigOutput
- type AutoCaptureState
- type BearerTokenAuth
- type BuildLogsRequest
- type BuildLogsResponse
- type ChatMessage
- type ChatMessageRole
- type CohereConfig
- type CreatePtEndpointRequest
- type CreateServingEndpoint
- type CustomProviderConfig
- type DataPlaneInfo
- type DataPlaneService
- type DatabricksModelServingConfig
- type DataframeSplitInput
- type DeleteResponse
- type DeleteServingEndpointRequest
- type EmbeddingsV1ResponseEmbeddingElement
- type EmbeddingsV1ResponseEmbeddingElementObject
- func (f *EmbeddingsV1ResponseEmbeddingElementObject) Set(v string) error
- func (f *EmbeddingsV1ResponseEmbeddingElementObject) String() string
- func (f *EmbeddingsV1ResponseEmbeddingElementObject) Type() string
- func (f *EmbeddingsV1ResponseEmbeddingElementObject) Values() []EmbeddingsV1ResponseEmbeddingElementObject
- type EndpointCoreConfigInput
- type EndpointCoreConfigOutput
- type EndpointCoreConfigSummary
- type EndpointPendingConfig
- type EndpointState
- type EndpointStateConfigUpdate
- type EndpointStateReady
- type EndpointTag
- type EndpointTags
- type ExportMetricsRequest
- type ExportMetricsResponse
- type ExternalFunctionRequest
- type ExternalFunctionRequestHttpMethod
- type ExternalModel
- type ExternalModelProvider
- type ExternalModelUsageElement
- type FallbackConfig
- type FoundationModel
- type GetOpenApiRequest
- type GetOpenApiResponse
- type GetServingEndpointPermissionLevelsRequest
- type GetServingEndpointPermissionLevelsResponse
- type GetServingEndpointPermissionsRequest
- type GetServingEndpointRequest
- type GoogleCloudVertexAiConfig
- type HttpRequestResponse
- type ListEndpointsResponse
- type LogsRequest
- type ModelDataPlaneInfo
- type OpenAiConfig
- type PaLmConfig
- type PatchServingEndpointTags
- type PayloadTable
- type PtEndpointCoreConfig
- type PtServedModel
- type PutAiGatewayRequest
- type PutAiGatewayResponse
- type PutRequest
- type PutResponse
- type QueryEndpointInput
- type QueryEndpointResponse
- type QueryEndpointResponseObject
- type RateLimit
- type RateLimitKey
- type RateLimitRenewalPeriod
- type Route
- type ServedEntityInput
- type ServedEntityOutput
- type ServedEntitySpec
- type ServedModelInput
- type ServedModelInputWorkloadType
- type ServedModelOutput
- type ServedModelSpec
- type ServedModelState
- type ServedModelStateDeployment
- type ServerLogsResponse
- type ServingEndpoint
- type ServingEndpointAccessControlRequest
- type ServingEndpointAccessControlResponse
- type ServingEndpointDetailed
- type ServingEndpointDetailedPermissionLevel
- type ServingEndpointPermission
- type ServingEndpointPermissionLevel
- type ServingEndpointPermissions
- type ServingEndpointPermissionsDescription
- type ServingEndpointPermissionsRequest
- type ServingEndpointsAPI
- func (a *ServingEndpointsAPI) BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
- func (a *ServingEndpointsAPI) BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error)
- func (a *ServingEndpointsAPI) Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) CreateProvisionedThroughputEndpoint(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) CreateProvisionedThroughputEndpointAndWait(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) Delete(ctx context.Context, request DeleteServingEndpointRequest) error
- func (a *ServingEndpointsAPI) DeleteByName(ctx context.Context, name string) error
- func (a *ServingEndpointsAPI) ExportMetrics(ctx context.Context, request ExportMetricsRequest) (*ExportMetricsResponse, error)
- func (a *ServingEndpointsAPI) ExportMetricsByName(ctx context.Context, name string) (*ExportMetricsResponse, error)
- func (a *ServingEndpointsAPI) Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
- func (a *ServingEndpointsAPI) GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error)
- func (a *ServingEndpointsAPI) GetOpenApi(ctx context.Context, request GetOpenApiRequest) (*GetOpenApiResponse, error)
- func (a *ServingEndpointsAPI) GetOpenApiByName(ctx context.Context, name string) (*GetOpenApiResponse, error)
- func (a *ServingEndpointsAPI) GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
- func (a *ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error)
- func (a *ServingEndpointsAPI) GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) HttpRequest(ctx context.Context, request ExternalFunctionRequest) (*HttpRequestResponse, error)
- func (a *ServingEndpointsAPI) List(ctx context.Context) listing.Iterator[ServingEndpoint]
- func (a *ServingEndpointsAPI) ListAll(ctx context.Context) ([]ServingEndpoint, error)
- func (a *ServingEndpointsAPI) Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
- func (a *ServingEndpointsAPI) LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error)
- func (a *ServingEndpointsAPI) Patch(ctx context.Context, request PatchServingEndpointTags) (*EndpointTags, error)
- func (a *ServingEndpointsAPI) Put(ctx context.Context, request PutRequest) (*PutResponse, error)
- func (a *ServingEndpointsAPI) PutAiGateway(ctx context.Context, request PutAiGatewayRequest) (*PutAiGatewayResponse, error)
- func (a *ServingEndpointsAPI) Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
- func (a *ServingEndpointsAPI) SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfig(ctx context.Context, ...) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfigAndWait(ctx context.Context, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) WaitGetServingEndpointNotUpdating(ctx context.Context, name string, timeout time.Duration, ...) (*ServingEndpointDetailed, error)
- type ServingEndpointsDataPlaneAPI
- type ServingEndpointsDataPlaneInterface
- type ServingEndpointsDataPlaneServicedeprecated
- type ServingEndpointsInterface
- type ServingEndpointsServicedeprecated
- type ServingModelWorkloadType
- type TrafficConfig
- type UpdateProvisionedThroughputEndpointConfigRequest
- type V1ResponseChoiceElement
- type WaitGetServingEndpointNotUpdating
- func (w *WaitGetServingEndpointNotUpdating[R]) Get() (*ServingEndpointDetailed, error)
- func (w *WaitGetServingEndpointNotUpdating[R]) GetWithTimeout(timeout time.Duration) (*ServingEndpointDetailed, error)
- func (w *WaitGetServingEndpointNotUpdating[R]) OnProgress(callback func(*ServingEndpointDetailed)) *WaitGetServingEndpointNotUpdating[R]
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Ai21LabsConfig ¶ added in v0.27.0
type Ai21LabsConfig struct {
// The Databricks secret key reference for an AI21 Labs API key. If you
// prefer to paste your API key directly, see `ai21labs_api_key_plaintext`.
// You must provide an API key using one of the following fields:
// `ai21labs_api_key` or `ai21labs_api_key_plaintext`.
// Wire name: 'ai21labs_api_key'
Ai21labsApiKey string `json:"ai21labs_api_key,omitempty"`
// An AI21 Labs API key provided as a plaintext string. If you prefer to
// reference your key using Databricks Secrets, see `ai21labs_api_key`. You
// must provide an API key using one of the following fields:
// `ai21labs_api_key` or `ai21labs_api_key_plaintext`.
// Wire name: 'ai21labs_api_key_plaintext'
Ai21labsApiKeyPlaintext string `json:"ai21labs_api_key_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (Ai21LabsConfig) MarshalJSON ¶ added in v0.44.0
func (st Ai21LabsConfig) MarshalJSON() ([]byte, error)
func (*Ai21LabsConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *Ai21LabsConfig) UnmarshalJSON(b []byte) error
type AiGatewayConfig ¶ added in v0.47.0
type AiGatewayConfig struct {
// Configuration for traffic fallback which auto fallbacks to other served
// entities if the request to a served entity fails with certain error
// codes, to increase availability.
// Wire name: 'fallback_config'
FallbackConfig *FallbackConfig `json:"fallback_config,omitempty"`
// Configuration for AI Guardrails to prevent unwanted data and unsafe data
// in requests and responses.
// Wire name: 'guardrails'
Guardrails *AiGatewayGuardrails `json:"guardrails,omitempty"`
// Configuration for payload logging using inference tables. Use these
// tables to monitor and audit data being sent to and received from model
// APIs and to improve model quality.
// Wire name: 'inference_table_config'
InferenceTableConfig *AiGatewayInferenceTableConfig `json:"inference_table_config,omitempty"`
// Configuration for rate limits which can be set to limit endpoint traffic.
// Wire name: 'rate_limits'
RateLimits []AiGatewayRateLimit `json:"rate_limits,omitempty"`
// Configuration to enable usage tracking using system tables. These tables
// allow you to monitor operational usage on endpoints and their associated
// costs.
// Wire name: 'usage_tracking_config'
UsageTrackingConfig *AiGatewayUsageTrackingConfig `json:"usage_tracking_config,omitempty"`
}
func (AiGatewayConfig) MarshalJSON ¶ added in v0.73.0
func (st AiGatewayConfig) MarshalJSON() ([]byte, error)
func (*AiGatewayConfig) UnmarshalJSON ¶ added in v0.73.0
func (st *AiGatewayConfig) UnmarshalJSON(b []byte) error
type AiGatewayGuardrailParameters ¶ added in v0.47.0
type AiGatewayGuardrailParameters struct {
// List of invalid keywords. AI guardrail uses keyword or string matching to
// decide if the keyword exists in the request or response content.
// Wire name: 'invalid_keywords'
InvalidKeywords []string `json:"invalid_keywords,omitempty"`
// Configuration for guardrail PII filter.
// Wire name: 'pii'
Pii *AiGatewayGuardrailPiiBehavior `json:"pii,omitempty"`
// Indicates whether the safety filter is enabled.
// Wire name: 'safety'
Safety bool `json:"safety,omitempty"`
// The list of allowed topics. Given a chat request, this guardrail flags
// the request if its topic is not in the allowed topics.
// Wire name: 'valid_topics'
ValidTopics []string `json:"valid_topics,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AiGatewayGuardrailParameters) MarshalJSON ¶ added in v0.47.0
func (st AiGatewayGuardrailParameters) MarshalJSON() ([]byte, error)
func (*AiGatewayGuardrailParameters) UnmarshalJSON ¶ added in v0.47.0
func (st *AiGatewayGuardrailParameters) UnmarshalJSON(b []byte) error
type AiGatewayGuardrailPiiBehavior ¶ added in v0.47.0
type AiGatewayGuardrailPiiBehavior struct {
// Configuration for input guardrail filters.
// Wire name: 'behavior'
Behavior AiGatewayGuardrailPiiBehaviorBehavior `json:"behavior,omitempty"`
}
func (AiGatewayGuardrailPiiBehavior) MarshalJSON ¶ added in v0.73.0
func (st AiGatewayGuardrailPiiBehavior) MarshalJSON() ([]byte, error)
func (*AiGatewayGuardrailPiiBehavior) UnmarshalJSON ¶ added in v0.73.0
func (st *AiGatewayGuardrailPiiBehavior) UnmarshalJSON(b []byte) error
type AiGatewayGuardrailPiiBehaviorBehavior ¶ added in v0.47.0
type AiGatewayGuardrailPiiBehaviorBehavior string
const AiGatewayGuardrailPiiBehaviorBehaviorBlock AiGatewayGuardrailPiiBehaviorBehavior = `BLOCK`
const AiGatewayGuardrailPiiBehaviorBehaviorNone AiGatewayGuardrailPiiBehaviorBehavior = `NONE`
func (*AiGatewayGuardrailPiiBehaviorBehavior) Set ¶ added in v0.47.0
func (f *AiGatewayGuardrailPiiBehaviorBehavior) Set(v string) error
Set raw string value and validate it against allowed values
func (*AiGatewayGuardrailPiiBehaviorBehavior) String ¶ added in v0.47.0
func (f *AiGatewayGuardrailPiiBehaviorBehavior) String() string
String representation for fmt.Print
func (*AiGatewayGuardrailPiiBehaviorBehavior) Type ¶ added in v0.47.0
func (f *AiGatewayGuardrailPiiBehaviorBehavior) Type() string
Type always returns AiGatewayGuardrailPiiBehaviorBehavior to satisfy [pflag.Value] interface
func (*AiGatewayGuardrailPiiBehaviorBehavior) Values ¶ added in v0.72.0
func (f *AiGatewayGuardrailPiiBehaviorBehavior) Values() []AiGatewayGuardrailPiiBehaviorBehavior
Values returns all possible values for AiGatewayGuardrailPiiBehaviorBehavior.
There is no guarantee on the order of the values in the slice.
type AiGatewayGuardrails ¶ added in v0.47.0
type AiGatewayGuardrails struct {
// Configuration for input guardrail filters.
// Wire name: 'input'
Input *AiGatewayGuardrailParameters `json:"input,omitempty"`
// Configuration for output guardrail filters.
// Wire name: 'output'
Output *AiGatewayGuardrailParameters `json:"output,omitempty"`
}
func (AiGatewayGuardrails) MarshalJSON ¶ added in v0.73.0
func (st AiGatewayGuardrails) MarshalJSON() ([]byte, error)
func (*AiGatewayGuardrails) UnmarshalJSON ¶ added in v0.73.0
func (st *AiGatewayGuardrails) UnmarshalJSON(b []byte) error
type AiGatewayInferenceTableConfig ¶ added in v0.47.0
type AiGatewayInferenceTableConfig struct {
// The name of the catalog in Unity Catalog. Required when enabling
// inference tables. NOTE: On update, you have to disable inference table
// first in order to change the catalog name.
// Wire name: 'catalog_name'
CatalogName string `json:"catalog_name,omitempty"`
// Indicates whether the inference table is enabled.
// Wire name: 'enabled'
Enabled bool `json:"enabled,omitempty"`
// The name of the schema in Unity Catalog. Required when enabling inference
// tables. NOTE: On update, you have to disable inference table first in
// order to change the schema name.
// Wire name: 'schema_name'
SchemaName string `json:"schema_name,omitempty"`
// The prefix of the table in Unity Catalog. NOTE: On update, you have to
// disable inference table first in order to change the prefix name.
// Wire name: 'table_name_prefix'
TableNamePrefix string `json:"table_name_prefix,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AiGatewayInferenceTableConfig) MarshalJSON ¶ added in v0.47.0
func (st AiGatewayInferenceTableConfig) MarshalJSON() ([]byte, error)
func (*AiGatewayInferenceTableConfig) UnmarshalJSON ¶ added in v0.47.0
func (st *AiGatewayInferenceTableConfig) UnmarshalJSON(b []byte) error
type AiGatewayRateLimit ¶ added in v0.47.0
type AiGatewayRateLimit struct {
// Used to specify how many calls are allowed for a key within the
// renewal_period.
// Wire name: 'calls'
Calls int64 `json:"calls"`
// Key field for a rate limit. Currently, only 'user' and 'endpoint' are
// supported, with 'endpoint' being the default if not specified.
// Wire name: 'key'
Key AiGatewayRateLimitKey `json:"key,omitempty"`
// Renewal period field for a rate limit. Currently, only 'minute' is
// supported.
// Wire name: 'renewal_period'
RenewalPeriod AiGatewayRateLimitRenewalPeriod `json:"renewal_period"`
}
func (AiGatewayRateLimit) MarshalJSON ¶ added in v0.73.0
func (st AiGatewayRateLimit) MarshalJSON() ([]byte, error)
func (*AiGatewayRateLimit) UnmarshalJSON ¶ added in v0.73.0
func (st *AiGatewayRateLimit) UnmarshalJSON(b []byte) error
type AiGatewayRateLimitKey ¶ added in v0.47.0
type AiGatewayRateLimitKey string
const AiGatewayRateLimitKeyEndpoint AiGatewayRateLimitKey = `endpoint`
const AiGatewayRateLimitKeyUser AiGatewayRateLimitKey = `user`
func (*AiGatewayRateLimitKey) Set ¶ added in v0.47.0
func (f *AiGatewayRateLimitKey) Set(v string) error
Set raw string value and validate it against allowed values
func (*AiGatewayRateLimitKey) String ¶ added in v0.47.0
func (f *AiGatewayRateLimitKey) String() string
String representation for fmt.Print
func (*AiGatewayRateLimitKey) Type ¶ added in v0.47.0
func (f *AiGatewayRateLimitKey) Type() string
Type always returns AiGatewayRateLimitKey to satisfy [pflag.Value] interface
func (*AiGatewayRateLimitKey) Values ¶ added in v0.72.0
func (f *AiGatewayRateLimitKey) Values() []AiGatewayRateLimitKey
Values returns all possible values for AiGatewayRateLimitKey.
There is no guarantee on the order of the values in the slice.
type AiGatewayRateLimitRenewalPeriod ¶ added in v0.47.0
type AiGatewayRateLimitRenewalPeriod string
const AiGatewayRateLimitRenewalPeriodMinute AiGatewayRateLimitRenewalPeriod = `minute`
func (*AiGatewayRateLimitRenewalPeriod) Set ¶ added in v0.47.0
func (f *AiGatewayRateLimitRenewalPeriod) Set(v string) error
Set raw string value and validate it against allowed values
func (*AiGatewayRateLimitRenewalPeriod) String ¶ added in v0.47.0
func (f *AiGatewayRateLimitRenewalPeriod) String() string
String representation for fmt.Print
func (*AiGatewayRateLimitRenewalPeriod) Type ¶ added in v0.47.0
func (f *AiGatewayRateLimitRenewalPeriod) Type() string
Type always returns AiGatewayRateLimitRenewalPeriod to satisfy [pflag.Value] interface
func (*AiGatewayRateLimitRenewalPeriod) Values ¶ added in v0.72.0
func (f *AiGatewayRateLimitRenewalPeriod) Values() []AiGatewayRateLimitRenewalPeriod
Values returns all possible values for AiGatewayRateLimitRenewalPeriod.
There is no guarantee on the order of the values in the slice.
type AiGatewayUsageTrackingConfig ¶ added in v0.47.0
type AiGatewayUsageTrackingConfig struct {
// Whether to enable usage tracking.
// Wire name: 'enabled'
Enabled bool `json:"enabled,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AiGatewayUsageTrackingConfig) MarshalJSON ¶ added in v0.47.0
func (st AiGatewayUsageTrackingConfig) MarshalJSON() ([]byte, error)
func (*AiGatewayUsageTrackingConfig) UnmarshalJSON ¶ added in v0.47.0
func (st *AiGatewayUsageTrackingConfig) UnmarshalJSON(b []byte) error
type AmazonBedrockConfig ¶ added in v0.37.0
type AmazonBedrockConfig struct {
// The Databricks secret key reference for an AWS access key ID with
// permissions to interact with Bedrock services. If you prefer to paste
// your API key directly, see `aws_access_key_id_plaintext`. You must
// provide an API key using one of the following fields: `aws_access_key_id`
// or `aws_access_key_id_plaintext`.
// Wire name: 'aws_access_key_id'
AwsAccessKeyId string `json:"aws_access_key_id,omitempty"`
// An AWS access key ID with permissions to interact with Bedrock services
// provided as a plaintext string. If you prefer to reference your key using
// Databricks Secrets, see `aws_access_key_id`. You must provide an API key
// using one of the following fields: `aws_access_key_id` or
// `aws_access_key_id_plaintext`.
// Wire name: 'aws_access_key_id_plaintext'
AwsAccessKeyIdPlaintext string `json:"aws_access_key_id_plaintext,omitempty"`
// The AWS region to use. Bedrock has to be enabled there.
// Wire name: 'aws_region'
AwsRegion string `json:"aws_region"`
// The Databricks secret key reference for an AWS secret access key paired
// with the access key ID, with permissions to interact with Bedrock
// services. If you prefer to paste your API key directly, see
// `aws_secret_access_key_plaintext`. You must provide an API key using one
// of the following fields: `aws_secret_access_key` or
// `aws_secret_access_key_plaintext`.
// Wire name: 'aws_secret_access_key'
AwsSecretAccessKey string `json:"aws_secret_access_key,omitempty"`
// An AWS secret access key paired with the access key ID, with permissions
// to interact with Bedrock services provided as a plaintext string. If you
// prefer to reference your key using Databricks Secrets, see
// `aws_secret_access_key`. You must provide an API key using one of the
// following fields: `aws_secret_access_key` or
// `aws_secret_access_key_plaintext`.
// Wire name: 'aws_secret_access_key_plaintext'
AwsSecretAccessKeyPlaintext string `json:"aws_secret_access_key_plaintext,omitempty"`
// The underlying provider in Amazon Bedrock. Supported values (case
// insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
// Wire name: 'bedrock_provider'
BedrockProvider AmazonBedrockConfigBedrockProvider `json:"bedrock_provider"`
// ARN of the instance profile that the external model will use to access
// AWS resources. You must authenticate using an instance profile or access
// keys. If you prefer to authenticate using access keys, see
// `aws_access_key_id`, `aws_access_key_id_plaintext`,
// `aws_secret_access_key` and `aws_secret_access_key_plaintext`.
// Wire name: 'instance_profile_arn'
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AmazonBedrockConfig) MarshalJSON ¶ added in v0.44.0
func (st AmazonBedrockConfig) MarshalJSON() ([]byte, error)
func (*AmazonBedrockConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *AmazonBedrockConfig) UnmarshalJSON(b []byte) error
type AmazonBedrockConfigBedrockProvider ¶ added in v0.37.0
type AmazonBedrockConfigBedrockProvider string
const AmazonBedrockConfigBedrockProviderAi21labs AmazonBedrockConfigBedrockProvider = `ai21labs`
const AmazonBedrockConfigBedrockProviderAmazon AmazonBedrockConfigBedrockProvider = `amazon`
const AmazonBedrockConfigBedrockProviderAnthropic AmazonBedrockConfigBedrockProvider = `anthropic`
const AmazonBedrockConfigBedrockProviderCohere AmazonBedrockConfigBedrockProvider = `cohere`
func (*AmazonBedrockConfigBedrockProvider) Set ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) Set(v string) error
Set raw string value and validate it against allowed values
func (*AmazonBedrockConfigBedrockProvider) String ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) String() string
String representation for fmt.Print
func (*AmazonBedrockConfigBedrockProvider) Type ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) Type() string
Type always returns AmazonBedrockConfigBedrockProvider to satisfy [pflag.Value] interface
func (*AmazonBedrockConfigBedrockProvider) Values ¶ added in v0.72.0
func (f *AmazonBedrockConfigBedrockProvider) Values() []AmazonBedrockConfigBedrockProvider
Values returns all possible values for AmazonBedrockConfigBedrockProvider.
There is no guarantee on the order of the values in the slice.
type AnthropicConfig ¶ added in v0.27.0
type AnthropicConfig struct {
// The Databricks secret key reference for an Anthropic API key. If you
// prefer to paste your API key directly, see `anthropic_api_key_plaintext`.
// You must provide an API key using one of the following fields:
// `anthropic_api_key` or `anthropic_api_key_plaintext`.
// Wire name: 'anthropic_api_key'
AnthropicApiKey string `json:"anthropic_api_key,omitempty"`
// The Anthropic API key provided as a plaintext string. If you prefer to
// reference your key using Databricks Secrets, see `anthropic_api_key`. You
// must provide an API key using one of the following fields:
// `anthropic_api_key` or `anthropic_api_key_plaintext`.
// Wire name: 'anthropic_api_key_plaintext'
AnthropicApiKeyPlaintext string `json:"anthropic_api_key_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AnthropicConfig) MarshalJSON ¶ added in v0.44.0
func (st AnthropicConfig) MarshalJSON() ([]byte, error)
func (*AnthropicConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *AnthropicConfig) UnmarshalJSON(b []byte) error
type ApiKeyAuth ¶ added in v0.61.0
type ApiKeyAuth struct {
// The name of the API key parameter used for authentication.
// Wire name: 'key'
Key string `json:"key"`
// The Databricks secret key reference for an API Key. If you prefer to
// paste your token directly, see `value_plaintext`.
// Wire name: 'value'
Value string `json:"value,omitempty"`
// The API Key provided as a plaintext string. If you prefer to reference
// your token using Databricks Secrets, see `value`.
// Wire name: 'value_plaintext'
ValuePlaintext string `json:"value_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ApiKeyAuth) MarshalJSON ¶ added in v0.61.0
func (st ApiKeyAuth) MarshalJSON() ([]byte, error)
func (*ApiKeyAuth) UnmarshalJSON ¶ added in v0.61.0
func (st *ApiKeyAuth) UnmarshalJSON(b []byte) error
type AutoCaptureConfigInput ¶ added in v0.27.0
type AutoCaptureConfigInput struct {
// The name of the catalog in Unity Catalog. NOTE: On update, you cannot
// change the catalog name if the inference table is already enabled.
// Wire name: 'catalog_name'
CatalogName string `json:"catalog_name,omitempty"`
// Indicates whether the inference table is enabled.
// Wire name: 'enabled'
Enabled bool `json:"enabled,omitempty"`
// The name of the schema in Unity Catalog. NOTE: On update, you cannot
// change the schema name if the inference table is already enabled.
// Wire name: 'schema_name'
SchemaName string `json:"schema_name,omitempty"`
// The prefix of the table in Unity Catalog. NOTE: On update, you cannot
// change the prefix name if the inference table is already enabled.
// Wire name: 'table_name_prefix'
TableNamePrefix string `json:"table_name_prefix,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AutoCaptureConfigInput) MarshalJSON ¶ added in v0.27.0
func (st AutoCaptureConfigInput) MarshalJSON() ([]byte, error)
func (*AutoCaptureConfigInput) UnmarshalJSON ¶ added in v0.27.0
func (st *AutoCaptureConfigInput) UnmarshalJSON(b []byte) error
type AutoCaptureConfigOutput ¶ added in v0.27.0
type AutoCaptureConfigOutput struct {
// The name of the catalog in Unity Catalog. NOTE: On update, you cannot
// change the catalog name if the inference table is already enabled.
// Wire name: 'catalog_name'
CatalogName string `json:"catalog_name,omitempty"`
// Indicates whether the inference table is enabled.
// Wire name: 'enabled'
Enabled bool `json:"enabled,omitempty"`
// The name of the schema in Unity Catalog. NOTE: On update, you cannot
// change the schema name if the inference table is already enabled.
// Wire name: 'schema_name'
SchemaName string `json:"schema_name,omitempty"`
// Wire name: 'state'
State *AutoCaptureState `json:"state,omitempty"`
// The prefix of the table in Unity Catalog. NOTE: On update, you cannot
// change the prefix name if the inference table is already enabled.
// Wire name: 'table_name_prefix'
TableNamePrefix string `json:"table_name_prefix,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (AutoCaptureConfigOutput) MarshalJSON ¶ added in v0.27.0
func (st AutoCaptureConfigOutput) MarshalJSON() ([]byte, error)
func (*AutoCaptureConfigOutput) UnmarshalJSON ¶ added in v0.27.0
func (st *AutoCaptureConfigOutput) UnmarshalJSON(b []byte) error
type AutoCaptureState ¶ added in v0.27.0
type AutoCaptureState struct {
// Wire name: 'payload_table'
PayloadTable *PayloadTable `json:"payload_table,omitempty"`
}
func (AutoCaptureState) MarshalJSON ¶ added in v0.73.0
func (st AutoCaptureState) MarshalJSON() ([]byte, error)
func (*AutoCaptureState) UnmarshalJSON ¶ added in v0.73.0
func (st *AutoCaptureState) UnmarshalJSON(b []byte) error
type BearerTokenAuth ¶ added in v0.61.0
type BearerTokenAuth struct {
// The Databricks secret key reference for a token. If you prefer to paste
// your token directly, see `token_plaintext`.
// Wire name: 'token'
Token string `json:"token,omitempty"`
// The token provided as a plaintext string. If you prefer to reference your
// token using Databricks Secrets, see `token`.
// Wire name: 'token_plaintext'
TokenPlaintext string `json:"token_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (BearerTokenAuth) MarshalJSON ¶ added in v0.61.0
func (st BearerTokenAuth) MarshalJSON() ([]byte, error)
func (*BearerTokenAuth) UnmarshalJSON ¶ added in v0.61.0
func (st *BearerTokenAuth) UnmarshalJSON(b []byte) error
type BuildLogsRequest ¶
type BuildLogsRequest struct {
// The name of the serving endpoint that the served model belongs to. This
// field is required.
Name string `json:"-" tf:"-"`
// The name of the served model that build logs will be retrieved for. This
// field is required.
ServedModelName string `json:"-" tf:"-"`
}
Get build logs for a served model
func (BuildLogsRequest) MarshalJSON ¶ added in v0.73.0
func (st BuildLogsRequest) MarshalJSON() ([]byte, error)
func (*BuildLogsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *BuildLogsRequest) UnmarshalJSON(b []byte) error
type BuildLogsResponse ¶
type BuildLogsResponse struct {
// The logs associated with building the served entity's environment.
// Wire name: 'logs'
Logs string `json:"logs"`
}
func (BuildLogsResponse) MarshalJSON ¶ added in v0.73.0
func (st BuildLogsResponse) MarshalJSON() ([]byte, error)
func (*BuildLogsResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *BuildLogsResponse) UnmarshalJSON(b []byte) error
type ChatMessage ¶ added in v0.27.0
type ChatMessage struct {
// The content of the message.
// Wire name: 'content'
Content string `json:"content,omitempty"`
// The role of the message. One of [system, user, assistant].
// Wire name: 'role'
Role ChatMessageRole `json:"role,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ChatMessage) MarshalJSON ¶ added in v0.27.0
func (st ChatMessage) MarshalJSON() ([]byte, error)
func (*ChatMessage) UnmarshalJSON ¶ added in v0.27.0
func (st *ChatMessage) UnmarshalJSON(b []byte) error
type ChatMessageRole ¶ added in v0.27.0
type ChatMessageRole string
The role of the message. One of [system, user, assistant].
const ChatMessageRoleAssistant ChatMessageRole = `assistant`
const ChatMessageRoleSystem ChatMessageRole = `system`
const ChatMessageRoleUser ChatMessageRole = `user`
func (*ChatMessageRole) Set ¶ added in v0.27.0
func (f *ChatMessageRole) Set(v string) error
Set raw string value and validate it against allowed values
func (*ChatMessageRole) String ¶ added in v0.27.0
func (f *ChatMessageRole) String() string
String representation for fmt.Print
func (*ChatMessageRole) Type ¶ added in v0.27.0
func (f *ChatMessageRole) Type() string
Type always returns ChatMessageRole to satisfy [pflag.Value] interface
func (*ChatMessageRole) Values ¶ added in v0.72.0
func (f *ChatMessageRole) Values() []ChatMessageRole
Values returns all possible values for ChatMessageRole.
There is no guarantee on the order of the values in the slice.
type CohereConfig ¶ added in v0.27.0
type CohereConfig struct {
// This is an optional field to provide a customized base URL for the Cohere
// API. If left unspecified, the standard Cohere base URL is used.
// Wire name: 'cohere_api_base'
CohereApiBase string `json:"cohere_api_base,omitempty"`
// The Databricks secret key reference for a Cohere API key. If you prefer
// to paste your API key directly, see `cohere_api_key_plaintext`. You must
// provide an API key using one of the following fields: `cohere_api_key` or
// `cohere_api_key_plaintext`.
// Wire name: 'cohere_api_key'
CohereApiKey string `json:"cohere_api_key,omitempty"`
// The Cohere API key provided as a plaintext string. If you prefer to
// reference your key using Databricks Secrets, see `cohere_api_key`. You
// must provide an API key using one of the following fields:
// `cohere_api_key` or `cohere_api_key_plaintext`.
// Wire name: 'cohere_api_key_plaintext'
CohereApiKeyPlaintext string `json:"cohere_api_key_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (CohereConfig) MarshalJSON ¶ added in v0.44.0
func (st CohereConfig) MarshalJSON() ([]byte, error)
func (*CohereConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *CohereConfig) UnmarshalJSON(b []byte) error
type CreatePtEndpointRequest ¶ added in v0.69.0
type CreatePtEndpointRequest struct {
// The AI Gateway configuration for the serving endpoint.
// Wire name: 'ai_gateway'
AiGateway *AiGatewayConfig `json:"ai_gateway,omitempty"`
// The budget policy associated with the endpoint.
// Wire name: 'budget_policy_id'
BudgetPolicyId string `json:"budget_policy_id,omitempty"`
// The core config of the serving endpoint.
// Wire name: 'config'
Config PtEndpointCoreConfig `json:"config"`
// The name of the serving endpoint. This field is required and must be
// unique across a Databricks workspace. An endpoint name can consist of
// alphanumeric characters, dashes, and underscores.
// Wire name: 'name'
Name string `json:"name"`
// Tags to be attached to the serving endpoint and automatically propagated
// to billing logs.
// Wire name: 'tags'
Tags []EndpointTag `json:"tags,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (CreatePtEndpointRequest) MarshalJSON ¶ added in v0.69.0
func (st CreatePtEndpointRequest) MarshalJSON() ([]byte, error)
func (*CreatePtEndpointRequest) UnmarshalJSON ¶ added in v0.69.0
func (st *CreatePtEndpointRequest) UnmarshalJSON(b []byte) error
type CreateServingEndpoint ¶
type CreateServingEndpoint struct {
// The AI Gateway configuration for the serving endpoint. NOTE: External
// model, provisioned throughput, and pay-per-token endpoints are fully
// supported; agent endpoints currently only support inference tables.
// Wire name: 'ai_gateway'
AiGateway *AiGatewayConfig `json:"ai_gateway,omitempty"`
// The budget policy to be applied to the serving endpoint.
// Wire name: 'budget_policy_id'
BudgetPolicyId string `json:"budget_policy_id,omitempty"`
// The core config of the serving endpoint.
// Wire name: 'config'
Config *EndpointCoreConfigInput `json:"config,omitempty"`
// The name of the serving endpoint. This field is required and must be
// unique across a Databricks workspace. An endpoint name can consist of
// alphanumeric characters, dashes, and underscores.
// Wire name: 'name'
Name string `json:"name"`
// Rate limits to be applied to the serving endpoint. NOTE: this field is
// deprecated, please use AI Gateway to manage rate limits.
// Wire name: 'rate_limits'
RateLimits []RateLimit `json:"rate_limits,omitempty"`
// Enable route optimization for the serving endpoint.
// Wire name: 'route_optimized'
RouteOptimized bool `json:"route_optimized,omitempty"`
// Tags to be attached to the serving endpoint and automatically propagated
// to billing logs.
// Wire name: 'tags'
Tags []EndpointTag `json:"tags,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (CreateServingEndpoint) MarshalJSON ¶ added in v0.41.0
func (st CreateServingEndpoint) MarshalJSON() ([]byte, error)
func (*CreateServingEndpoint) UnmarshalJSON ¶ added in v0.41.0
func (st *CreateServingEndpoint) UnmarshalJSON(b []byte) error
type CustomProviderConfig ¶ added in v0.61.0
type CustomProviderConfig struct {
// This is a field to provide API key authentication for the custom provider
// API. You can only specify one authentication method.
// Wire name: 'api_key_auth'
ApiKeyAuth *ApiKeyAuth `json:"api_key_auth,omitempty"`
// This is a field to provide bearer token authentication for the custom
// provider API. You can only specify one authentication method.
// Wire name: 'bearer_token_auth'
BearerTokenAuth *BearerTokenAuth `json:"bearer_token_auth,omitempty"`
// This is a field to provide the URL of the custom provider API.
// Wire name: 'custom_provider_url'
CustomProviderUrl string `json:"custom_provider_url"`
}
Configs needed to create a custom provider model route.
func (CustomProviderConfig) MarshalJSON ¶ added in v0.73.0
func (st CustomProviderConfig) MarshalJSON() ([]byte, error)
func (*CustomProviderConfig) UnmarshalJSON ¶ added in v0.73.0
func (st *CustomProviderConfig) UnmarshalJSON(b []byte) error
type DataPlaneInfo ¶ added in v0.55.0
type DataPlaneInfo struct {
// Authorization details as a string.
// Wire name: 'authorization_details'
AuthorizationDetails string `json:"authorization_details,omitempty"`
// The URL of the endpoint for this operation in the dataplane.
// Wire name: 'endpoint_url'
EndpointUrl string `json:"endpoint_url,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
Details necessary to query this object's API through the DataPlane APIs.
func (DataPlaneInfo) MarshalJSON ¶ added in v0.55.0
func (st DataPlaneInfo) MarshalJSON() ([]byte, error)
func (*DataPlaneInfo) UnmarshalJSON ¶ added in v0.55.0
func (st *DataPlaneInfo) UnmarshalJSON(b []byte) error
type DataPlaneService ¶ added in v0.55.0
type DataPlaneService interface {
GetDataPlaneDetails(method string, params []string, refresh func(*DataPlaneInfo) (*goauth.Token, error), infoGetter func() (*DataPlaneInfo, error)) (string, *goauth.Token, error)
}
DataPlaneService is an interface for services that access DataPlane.
func NewDataPlaneService ¶ added in v0.55.0
func NewDataPlaneService() DataPlaneService
type DatabricksModelServingConfig ¶ added in v0.27.0
type DatabricksModelServingConfig struct {
// The Databricks secret key reference for a Databricks API token that
// corresponds to a user or service principal with Can Query access to the
// model serving endpoint pointed to by this external model. If you prefer
// to paste your API key directly, see `databricks_api_token_plaintext`. You
// must provide an API key using one of the following fields:
// `databricks_api_token` or `databricks_api_token_plaintext`.
// Wire name: 'databricks_api_token'
DatabricksApiToken string `json:"databricks_api_token,omitempty"`
// The Databricks API token that corresponds to a user or service principal
// with Can Query access to the model serving endpoint pointed to by this
// external model provided as a plaintext string. If you prefer to reference
// your key using Databricks Secrets, see `databricks_api_token`. You must
// provide an API key using one of the following fields:
// `databricks_api_token` or `databricks_api_token_plaintext`.
// Wire name: 'databricks_api_token_plaintext'
DatabricksApiTokenPlaintext string `json:"databricks_api_token_plaintext,omitempty"`
// The URL of the Databricks workspace containing the model serving endpoint
// pointed to by this external model.
// Wire name: 'databricks_workspace_url'
DatabricksWorkspaceUrl string `json:"databricks_workspace_url"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (DatabricksModelServingConfig) MarshalJSON ¶ added in v0.44.0
func (st DatabricksModelServingConfig) MarshalJSON() ([]byte, error)
func (*DatabricksModelServingConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *DatabricksModelServingConfig) UnmarshalJSON(b []byte) error
type DataframeSplitInput ¶ added in v0.21.0
type DataframeSplitInput struct {
// Wire name: 'columns'
Columns []any `json:"columns,omitempty"`
// Wire name: 'data'
Data []any `json:"data,omitempty"`
// Wire name: 'index'
Index []int `json:"index,omitempty"`
}
func (DataframeSplitInput) MarshalJSON ¶ added in v0.73.0
func (st DataframeSplitInput) MarshalJSON() ([]byte, error)
func (*DataframeSplitInput) UnmarshalJSON ¶ added in v0.73.0
func (st *DataframeSplitInput) UnmarshalJSON(b []byte) error
type DeleteResponse ¶ added in v0.34.0
type DeleteResponse struct {
}
func (DeleteResponse) MarshalJSON ¶ added in v0.73.0
func (st DeleteResponse) MarshalJSON() ([]byte, error)
func (*DeleteResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *DeleteResponse) UnmarshalJSON(b []byte) error
type DeleteServingEndpointRequest ¶
type DeleteServingEndpointRequest struct {
Name string `json:"-" tf:"-"`
}
Delete a serving endpoint
func (DeleteServingEndpointRequest) MarshalJSON ¶ added in v0.73.0
func (st DeleteServingEndpointRequest) MarshalJSON() ([]byte, error)
func (*DeleteServingEndpointRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *DeleteServingEndpointRequest) UnmarshalJSON(b []byte) error
type EmbeddingsV1ResponseEmbeddingElement ¶ added in v0.27.0
type EmbeddingsV1ResponseEmbeddingElement struct {
// Wire name: 'embedding'
Embedding []float64 `json:"embedding,omitempty"`
// The index of the embedding in the response.
// Wire name: 'index'
Index int `json:"index,omitempty"`
// This will always be 'embedding'.
// Wire name: 'object'
Object EmbeddingsV1ResponseEmbeddingElementObject `json:"object,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (EmbeddingsV1ResponseEmbeddingElement) MarshalJSON ¶ added in v0.27.0
func (st EmbeddingsV1ResponseEmbeddingElement) MarshalJSON() ([]byte, error)
func (*EmbeddingsV1ResponseEmbeddingElement) UnmarshalJSON ¶ added in v0.27.0
func (st *EmbeddingsV1ResponseEmbeddingElement) UnmarshalJSON(b []byte) error
type EmbeddingsV1ResponseEmbeddingElementObject ¶ added in v0.27.0
type EmbeddingsV1ResponseEmbeddingElementObject string
This will always be 'embedding'.
const EmbeddingsV1ResponseEmbeddingElementObjectEmbedding EmbeddingsV1ResponseEmbeddingElementObject = `embedding`
func (*EmbeddingsV1ResponseEmbeddingElementObject) Set ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) Set(v string) error
Set raw string value and validate it against allowed values
func (*EmbeddingsV1ResponseEmbeddingElementObject) String ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) String() string
String representation for fmt.Print
func (*EmbeddingsV1ResponseEmbeddingElementObject) Type ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) Type() string
Type always returns EmbeddingsV1ResponseEmbeddingElementObject to satisfy [pflag.Value] interface
func (*EmbeddingsV1ResponseEmbeddingElementObject) Values ¶ added in v0.72.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) Values() []EmbeddingsV1ResponseEmbeddingElementObject
Values returns all possible values for EmbeddingsV1ResponseEmbeddingElementObject.
There is no guarantee on the order of the values in the slice.
type EndpointCoreConfigInput ¶
type EndpointCoreConfigInput struct {
// Configuration for Inference Tables which automatically logs requests and
// responses to Unity Catalog. Note: this field is deprecated for creating
// new provisioned throughput endpoints, or updating existing provisioned
// throughput endpoints that never have inference table configured; in these
// cases please use AI Gateway to manage inference tables.
// Wire name: 'auto_capture_config'
AutoCaptureConfig *AutoCaptureConfigInput `json:"auto_capture_config,omitempty"`
// The name of the serving endpoint to update. This field is required.
Name string `json:"-" tf:"-"`
// The list of served entities under the serving endpoint config.
// Wire name: 'served_entities'
ServedEntities []ServedEntityInput `json:"served_entities,omitempty"`
// (Deprecated, use served_entities instead) The list of served models under
// the serving endpoint config.
// Wire name: 'served_models'
ServedModels []ServedModelInput `json:"served_models,omitempty"`
// The traffic configuration associated with the serving endpoint config.
// Wire name: 'traffic_config'
TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"`
}
func (EndpointCoreConfigInput) MarshalJSON ¶ added in v0.73.0
func (st EndpointCoreConfigInput) MarshalJSON() ([]byte, error)
func (*EndpointCoreConfigInput) UnmarshalJSON ¶ added in v0.73.0
func (st *EndpointCoreConfigInput) UnmarshalJSON(b []byte) error
type EndpointCoreConfigOutput ¶
type EndpointCoreConfigOutput struct {
// Configuration for Inference Tables which automatically logs requests and
// responses to Unity Catalog. Note: this field is deprecated for creating
// new provisioned throughput endpoints, or updating existing provisioned
// throughput endpoints that never have inference table configured; in these
// cases please use AI Gateway to manage inference tables.
// Wire name: 'auto_capture_config'
AutoCaptureConfig *AutoCaptureConfigOutput `json:"auto_capture_config,omitempty"`
// The config version that the serving endpoint is currently serving.
// Wire name: 'config_version'
ConfigVersion int64 `json:"config_version,omitempty"`
// The list of served entities under the serving endpoint config.
// Wire name: 'served_entities'
ServedEntities []ServedEntityOutput `json:"served_entities,omitempty"`
// (Deprecated, use served_entities instead) The list of served models under
// the serving endpoint config.
// Wire name: 'served_models'
ServedModels []ServedModelOutput `json:"served_models,omitempty"`
// The traffic configuration associated with the serving endpoint config.
// Wire name: 'traffic_config'
TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (EndpointCoreConfigOutput) MarshalJSON ¶ added in v0.23.0
func (st EndpointCoreConfigOutput) MarshalJSON() ([]byte, error)
func (*EndpointCoreConfigOutput) UnmarshalJSON ¶ added in v0.23.0
func (st *EndpointCoreConfigOutput) UnmarshalJSON(b []byte) error
type EndpointCoreConfigSummary ¶
type EndpointCoreConfigSummary struct {
// The list of served entities under the serving endpoint config.
// Wire name: 'served_entities'
ServedEntities []ServedEntitySpec `json:"served_entities,omitempty"`
// (Deprecated, use served_entities instead) The list of served models under
// the serving endpoint config.
// Wire name: 'served_models'
ServedModels []ServedModelSpec `json:"served_models,omitempty"`
}
func (EndpointCoreConfigSummary) MarshalJSON ¶ added in v0.73.0
func (st EndpointCoreConfigSummary) MarshalJSON() ([]byte, error)
func (*EndpointCoreConfigSummary) UnmarshalJSON ¶ added in v0.73.0
func (st *EndpointCoreConfigSummary) UnmarshalJSON(b []byte) error
type EndpointPendingConfig ¶
type EndpointPendingConfig struct {
// Configuration for Inference Tables which automatically logs requests and
// responses to Unity Catalog. Note: this field is deprecated for creating
// new provisioned throughput endpoints, or updating existing provisioned
// throughput endpoints that never have inference table configured; in these
// cases please use AI Gateway to manage inference tables.
// Wire name: 'auto_capture_config'
AutoCaptureConfig *AutoCaptureConfigOutput `json:"auto_capture_config,omitempty"`
// The config version that the serving endpoint is currently serving.
// Wire name: 'config_version'
ConfigVersion int `json:"config_version,omitempty"`
// The list of served entities belonging to the last issued update to the
// serving endpoint.
// Wire name: 'served_entities'
ServedEntities []ServedEntityOutput `json:"served_entities,omitempty"`
// (Deprecated, use served_entities instead) The list of served models
// belonging to the last issued update to the serving endpoint.
// Wire name: 'served_models'
ServedModels []ServedModelOutput `json:"served_models,omitempty"`
// The timestamp when the update to the pending config started.
// Wire name: 'start_time'
StartTime int64 `json:"start_time,omitempty"`
// The traffic config defining how invocations to the serving endpoint
// should be routed.
// Wire name: 'traffic_config'
TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (EndpointPendingConfig) MarshalJSON ¶ added in v0.23.0
func (st EndpointPendingConfig) MarshalJSON() ([]byte, error)
func (*EndpointPendingConfig) UnmarshalJSON ¶ added in v0.23.0
func (st *EndpointPendingConfig) UnmarshalJSON(b []byte) error
type EndpointState ¶
type EndpointState struct {
// The state of an endpoint's config update. This informs the user if the
// pending_config is in progress, if the update failed, or if there is no
// update in progress. Note that if the endpoint's config_update state value
// is IN_PROGRESS, another update can not be made until the update completes
// or fails.
// Wire name: 'config_update'
ConfigUpdate EndpointStateConfigUpdate `json:"config_update,omitempty"`
// The state of an endpoint, indicating whether or not the endpoint is
// queryable. An endpoint is READY if all of the served entities in its
// active configuration are ready. If any of the actively served entities
// are in a non-ready state, the endpoint state will be NOT_READY.
// Wire name: 'ready'
Ready EndpointStateReady `json:"ready,omitempty"`
}
func (EndpointState) MarshalJSON ¶ added in v0.73.0
func (st EndpointState) MarshalJSON() ([]byte, error)
func (*EndpointState) UnmarshalJSON ¶ added in v0.73.0
func (st *EndpointState) UnmarshalJSON(b []byte) error
type EndpointStateConfigUpdate ¶
type EndpointStateConfigUpdate string
const EndpointStateConfigUpdateInProgress EndpointStateConfigUpdate = `IN_PROGRESS`
const EndpointStateConfigUpdateNotUpdating EndpointStateConfigUpdate = `NOT_UPDATING`
const EndpointStateConfigUpdateUpdateCanceled EndpointStateConfigUpdate = `UPDATE_CANCELED`
const EndpointStateConfigUpdateUpdateFailed EndpointStateConfigUpdate = `UPDATE_FAILED`
func (*EndpointStateConfigUpdate) Set ¶
func (f *EndpointStateConfigUpdate) Set(v string) error
Set raw string value and validate it against allowed values
func (*EndpointStateConfigUpdate) String ¶
func (f *EndpointStateConfigUpdate) String() string
String representation for fmt.Print
func (*EndpointStateConfigUpdate) Type ¶
func (f *EndpointStateConfigUpdate) Type() string
Type always returns EndpointStateConfigUpdate to satisfy [pflag.Value] interface
func (*EndpointStateConfigUpdate) Values ¶ added in v0.72.0
func (f *EndpointStateConfigUpdate) Values() []EndpointStateConfigUpdate
Values returns all possible values for EndpointStateConfigUpdate.
There is no guarantee on the order of the values in the slice.
type EndpointStateReady ¶
type EndpointStateReady string
const EndpointStateReadyNotReady EndpointStateReady = `NOT_READY`
const EndpointStateReadyReady EndpointStateReady = `READY`
func (*EndpointStateReady) Set ¶
func (f *EndpointStateReady) Set(v string) error
Set raw string value and validate it against allowed values
func (*EndpointStateReady) String ¶
func (f *EndpointStateReady) String() string
String representation for fmt.Print
func (*EndpointStateReady) Type ¶
func (f *EndpointStateReady) Type() string
Type always returns EndpointStateReady to satisfy [pflag.Value] interface
func (*EndpointStateReady) Values ¶ added in v0.72.0
func (f *EndpointStateReady) Values() []EndpointStateReady
Values returns all possible values for EndpointStateReady.
There is no guarantee on the order of the values in the slice.
type EndpointTag ¶ added in v0.20.0
type EndpointTag struct {
// Key field for a serving endpoint tag.
// Wire name: 'key'
Key string `json:"key"`
// Optional value field for a serving endpoint tag.
// Wire name: 'value'
Value string `json:"value,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (EndpointTag) MarshalJSON ¶ added in v0.23.0
func (st EndpointTag) MarshalJSON() ([]byte, error)
func (*EndpointTag) UnmarshalJSON ¶ added in v0.23.0
func (st *EndpointTag) UnmarshalJSON(b []byte) error
type EndpointTags ¶ added in v0.56.0
type EndpointTags struct {
// Wire name: 'tags'
Tags []EndpointTag `json:"tags,omitempty"`
}
func (EndpointTags) MarshalJSON ¶ added in v0.73.0
func (st EndpointTags) MarshalJSON() ([]byte, error)
func (*EndpointTags) UnmarshalJSON ¶ added in v0.73.0
func (st *EndpointTags) UnmarshalJSON(b []byte) error
type ExportMetricsRequest ¶
type ExportMetricsRequest struct {
// The name of the serving endpoint to retrieve metrics for. This field is
// required.
Name string `json:"-" tf:"-"`
}
Get metrics of a serving endpoint
func (ExportMetricsRequest) MarshalJSON ¶ added in v0.73.0
func (st ExportMetricsRequest) MarshalJSON() ([]byte, error)
func (*ExportMetricsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *ExportMetricsRequest) UnmarshalJSON(b []byte) error
type ExportMetricsResponse ¶ added in v0.34.0
type ExportMetricsResponse struct {
Contents io.ReadCloser `json:"-" tf:"-"`
}
func (ExportMetricsResponse) MarshalJSON ¶ added in v0.73.0
func (st ExportMetricsResponse) MarshalJSON() ([]byte, error)
func (*ExportMetricsResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *ExportMetricsResponse) UnmarshalJSON(b []byte) error
type ExternalFunctionRequest ¶ added in v0.56.0
type ExternalFunctionRequest struct {
// The connection name to use. This is required to identify the external
// connection.
// Wire name: 'connection_name'
ConnectionName string `json:"connection_name"`
// Additional headers for the request. If not provided, only auth headers
// from connections would be passed.
// Wire name: 'headers'
Headers string `json:"headers,omitempty"`
// The JSON payload to send in the request body.
// Wire name: 'json'
Json string `json:"json,omitempty"`
// The HTTP method to use (e.g., 'GET', 'POST').
// Wire name: 'method'
Method ExternalFunctionRequestHttpMethod `json:"method"`
// Query parameters for the request.
// Wire name: 'params'
Params string `json:"params,omitempty"`
// The relative path for the API endpoint. This is required.
// Wire name: 'path'
Path string `json:"path"`
ForceSendFields []string `json:"-" tf:"-"`
}
Simple Proto message for testing
func (ExternalFunctionRequest) MarshalJSON ¶ added in v0.56.0
func (st ExternalFunctionRequest) MarshalJSON() ([]byte, error)
func (*ExternalFunctionRequest) UnmarshalJSON ¶ added in v0.56.0
func (st *ExternalFunctionRequest) UnmarshalJSON(b []byte) error
type ExternalFunctionRequestHttpMethod ¶ added in v0.56.0
type ExternalFunctionRequestHttpMethod string
const ExternalFunctionRequestHttpMethodDelete ExternalFunctionRequestHttpMethod = `DELETE`
const ExternalFunctionRequestHttpMethodGet ExternalFunctionRequestHttpMethod = `GET`
const ExternalFunctionRequestHttpMethodPatch ExternalFunctionRequestHttpMethod = `PATCH`
const ExternalFunctionRequestHttpMethodPost ExternalFunctionRequestHttpMethod = `POST`
const ExternalFunctionRequestHttpMethodPut ExternalFunctionRequestHttpMethod = `PUT`
func (*ExternalFunctionRequestHttpMethod) Set ¶ added in v0.56.0
func (f *ExternalFunctionRequestHttpMethod) Set(v string) error
Set raw string value and validate it against allowed values
func (*ExternalFunctionRequestHttpMethod) String ¶ added in v0.56.0
func (f *ExternalFunctionRequestHttpMethod) String() string
String representation for fmt.Print
func (*ExternalFunctionRequestHttpMethod) Type ¶ added in v0.56.0
func (f *ExternalFunctionRequestHttpMethod) Type() string
Type always returns ExternalFunctionRequestHttpMethod to satisfy [pflag.Value] interface
func (*ExternalFunctionRequestHttpMethod) Values ¶ added in v0.72.0
func (f *ExternalFunctionRequestHttpMethod) Values() []ExternalFunctionRequestHttpMethod
Values returns all possible values for ExternalFunctionRequestHttpMethod.
There is no guarantee on the order of the values in the slice.
type ExternalModel ¶ added in v0.27.0
type ExternalModel struct {
// AI21Labs Config. Only required if the provider is 'ai21labs'.
// Wire name: 'ai21labs_config'
Ai21labsConfig *Ai21LabsConfig `json:"ai21labs_config,omitempty"`
// Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.
// Wire name: 'amazon_bedrock_config'
AmazonBedrockConfig *AmazonBedrockConfig `json:"amazon_bedrock_config,omitempty"`
// Anthropic Config. Only required if the provider is 'anthropic'.
// Wire name: 'anthropic_config'
AnthropicConfig *AnthropicConfig `json:"anthropic_config,omitempty"`
// Cohere Config. Only required if the provider is 'cohere'.
// Wire name: 'cohere_config'
CohereConfig *CohereConfig `json:"cohere_config,omitempty"`
// Custom Provider Config. Only required if the provider is 'custom'.
// Wire name: 'custom_provider_config'
CustomProviderConfig *CustomProviderConfig `json:"custom_provider_config,omitempty"`
// Databricks Model Serving Config. Only required if the provider is
// 'databricks-model-serving'.
// Wire name: 'databricks_model_serving_config'
DatabricksModelServingConfig *DatabricksModelServingConfig `json:"databricks_model_serving_config,omitempty"`
// Google Cloud Vertex AI Config. Only required if the provider is
// 'google-cloud-vertex-ai'.
// Wire name: 'google_cloud_vertex_ai_config'
GoogleCloudVertexAiConfig *GoogleCloudVertexAiConfig `json:"google_cloud_vertex_ai_config,omitempty"`
// The name of the external model.
// Wire name: 'name'
Name string `json:"name"`
// OpenAI Config. Only required if the provider is 'openai'.
// Wire name: 'openai_config'
OpenaiConfig *OpenAiConfig `json:"openai_config,omitempty"`
// PaLM Config. Only required if the provider is 'palm'.
// Wire name: 'palm_config'
PalmConfig *PaLmConfig `json:"palm_config,omitempty"`
// The name of the provider for the external model. Currently, the supported
// providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere',
// 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm',
// and 'custom'.
// Wire name: 'provider'
Provider ExternalModelProvider `json:"provider"`
// The task type of the external model.
// Wire name: 'task'
Task string `json:"task"`
}
func (ExternalModel) MarshalJSON ¶ added in v0.73.0
func (st ExternalModel) MarshalJSON() ([]byte, error)
func (*ExternalModel) UnmarshalJSON ¶ added in v0.73.0
func (st *ExternalModel) UnmarshalJSON(b []byte) error
type ExternalModelProvider ¶ added in v0.27.0
type ExternalModelProvider string
const ExternalModelProviderAi21labs ExternalModelProvider = `ai21labs`
const ExternalModelProviderAmazonBedrock ExternalModelProvider = `amazon-bedrock`
const ExternalModelProviderAnthropic ExternalModelProvider = `anthropic`
const ExternalModelProviderCohere ExternalModelProvider = `cohere`
const ExternalModelProviderCustom ExternalModelProvider = `custom`
const ExternalModelProviderDatabricksModelServing ExternalModelProvider = `databricks-model-serving`
const ExternalModelProviderGoogleCloudVertexAi ExternalModelProvider = `google-cloud-vertex-ai`
const ExternalModelProviderOpenai ExternalModelProvider = `openai`
const ExternalModelProviderPalm ExternalModelProvider = `palm`
func (*ExternalModelProvider) Set ¶ added in v0.27.0
func (f *ExternalModelProvider) Set(v string) error
Set raw string value and validate it against allowed values
func (*ExternalModelProvider) String ¶ added in v0.27.0
func (f *ExternalModelProvider) String() string
String representation for fmt.Print
func (*ExternalModelProvider) Type ¶ added in v0.27.0
func (f *ExternalModelProvider) Type() string
Type always returns ExternalModelProvider to satisfy [pflag.Value] interface
func (*ExternalModelProvider) Values ¶ added in v0.72.0
func (f *ExternalModelProvider) Values() []ExternalModelProvider
Values returns all possible values for ExternalModelProvider.
There is no guarantee on the order of the values in the slice.
type ExternalModelUsageElement ¶ added in v0.27.0
type ExternalModelUsageElement struct {
// The number of tokens in the chat/completions response.
// Wire name: 'completion_tokens'
CompletionTokens int `json:"completion_tokens,omitempty"`
// The number of tokens in the prompt.
// Wire name: 'prompt_tokens'
PromptTokens int `json:"prompt_tokens,omitempty"`
// The total number of tokens in the prompt and response.
// Wire name: 'total_tokens'
TotalTokens int `json:"total_tokens,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ExternalModelUsageElement) MarshalJSON ¶ added in v0.27.0
func (st ExternalModelUsageElement) MarshalJSON() ([]byte, error)
func (*ExternalModelUsageElement) UnmarshalJSON ¶ added in v0.27.0
func (st *ExternalModelUsageElement) UnmarshalJSON(b []byte) error
type FallbackConfig ¶ added in v0.61.0
type FallbackConfig struct {
// Whether to enable traffic fallback. When a served entity in the serving
// endpoint returns specific error codes (e.g. 500), the request will
// automatically be round-robin attempted with other served entities in the
// same endpoint, following the order of served entity list, until a
// successful response is returned. If all attempts fail, return the last
// response with the error code.
// Wire name: 'enabled'
Enabled bool `json:"enabled"`
}
func (FallbackConfig) MarshalJSON ¶ added in v0.73.0
func (st FallbackConfig) MarshalJSON() ([]byte, error)
func (*FallbackConfig) UnmarshalJSON ¶ added in v0.73.0
func (st *FallbackConfig) UnmarshalJSON(b []byte) error
type FoundationModel ¶ added in v0.27.0
type FoundationModel struct {
// Wire name: 'description'
Description string `json:"description,omitempty"`
// Wire name: 'display_name'
DisplayName string `json:"display_name,omitempty"`
// Wire name: 'docs'
Docs string `json:"docs,omitempty"`
// Wire name: 'name'
Name string `json:"name,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
All fields are not sensitive as they are hard-coded in the system and made available to customers.
func (FoundationModel) MarshalJSON ¶ added in v0.27.0
func (st FoundationModel) MarshalJSON() ([]byte, error)
func (*FoundationModel) UnmarshalJSON ¶ added in v0.27.0
func (st *FoundationModel) UnmarshalJSON(b []byte) error
type GetOpenApiRequest ¶ added in v0.39.0
type GetOpenApiRequest struct {
// The name of the serving endpoint that the served model belongs to. This
// field is required.
Name string `json:"-" tf:"-"`
}
Get the schema for a serving endpoint
func (GetOpenApiRequest) MarshalJSON ¶ added in v0.73.0
func (st GetOpenApiRequest) MarshalJSON() ([]byte, error)
func (*GetOpenApiRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *GetOpenApiRequest) UnmarshalJSON(b []byte) error
type GetOpenApiResponse ¶ added in v0.39.0
type GetOpenApiResponse struct {
Contents io.ReadCloser `json:"-" tf:"-"`
}
func (GetOpenApiResponse) MarshalJSON ¶ added in v0.73.0
func (st GetOpenApiResponse) MarshalJSON() ([]byte, error)
func (*GetOpenApiResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *GetOpenApiResponse) UnmarshalJSON(b []byte) error
type GetServingEndpointPermissionLevelsRequest ¶ added in v0.15.0
type GetServingEndpointPermissionLevelsRequest struct {
// The serving endpoint for which to get or manage permissions.
ServingEndpointId string `json:"-" tf:"-"`
}
Get serving endpoint permission levels
func (GetServingEndpointPermissionLevelsRequest) MarshalJSON ¶ added in v0.73.0
func (st GetServingEndpointPermissionLevelsRequest) MarshalJSON() ([]byte, error)
func (*GetServingEndpointPermissionLevelsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *GetServingEndpointPermissionLevelsRequest) UnmarshalJSON(b []byte) error
type GetServingEndpointPermissionLevelsResponse ¶ added in v0.15.0
type GetServingEndpointPermissionLevelsResponse struct {
// Specific permission levels
// Wire name: 'permission_levels'
PermissionLevels []ServingEndpointPermissionsDescription `json:"permission_levels,omitempty"`
}
func (GetServingEndpointPermissionLevelsResponse) MarshalJSON ¶ added in v0.73.0
func (st GetServingEndpointPermissionLevelsResponse) MarshalJSON() ([]byte, error)
func (*GetServingEndpointPermissionLevelsResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *GetServingEndpointPermissionLevelsResponse) UnmarshalJSON(b []byte) error
type GetServingEndpointPermissionsRequest ¶ added in v0.15.0
type GetServingEndpointPermissionsRequest struct {
// The serving endpoint for which to get or manage permissions.
ServingEndpointId string `json:"-" tf:"-"`
}
Get serving endpoint permissions
func (GetServingEndpointPermissionsRequest) MarshalJSON ¶ added in v0.73.0
func (st GetServingEndpointPermissionsRequest) MarshalJSON() ([]byte, error)
func (*GetServingEndpointPermissionsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *GetServingEndpointPermissionsRequest) UnmarshalJSON(b []byte) error
type GetServingEndpointRequest ¶
type GetServingEndpointRequest struct {
// The name of the serving endpoint. This field is required.
Name string `json:"-" tf:"-"`
}
Get a single serving endpoint
func (GetServingEndpointRequest) MarshalJSON ¶ added in v0.73.0
func (st GetServingEndpointRequest) MarshalJSON() ([]byte, error)
func (*GetServingEndpointRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *GetServingEndpointRequest) UnmarshalJSON(b []byte) error
type GoogleCloudVertexAiConfig ¶ added in v0.44.0
type GoogleCloudVertexAiConfig struct {
// The Databricks secret key reference for a private key for the service
// account which has access to the Google Cloud Vertex AI Service. See [Best
// practices for managing service account keys]. If you prefer to paste your
// API key directly, see `private_key_plaintext`. You must provide an API
// key using one of the following fields: `private_key` or
// `private_key_plaintext`
//
// [Best practices for managing service account keys]:
// https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys
// Wire name: 'private_key'
PrivateKey string `json:"private_key,omitempty"`
// The private key for the service account which has access to the Google
// Cloud Vertex AI Service provided as a plaintext secret. See [Best
// practices for managing service account keys]. If you prefer to reference
// your key using Databricks Secrets, see `private_key`. You must provide an
// API key using one of the following fields: `private_key` or
// `private_key_plaintext`.
//
// [Best practices for managing service account keys]:
// https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys
// Wire name: 'private_key_plaintext'
PrivateKeyPlaintext string `json:"private_key_plaintext,omitempty"`
// This is the Google Cloud project id that the service account is
// associated with.
// Wire name: 'project_id'
ProjectId string `json:"project_id"`
// This is the region for the Google Cloud Vertex AI Service. See [supported
// regions] for more details. Some models are only available in specific
// regions.
//
// [supported regions]:
// https://cloud.google.com/vertex-ai/docs/general/locations
// Wire name: 'region'
Region string `json:"region"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (GoogleCloudVertexAiConfig) MarshalJSON ¶ added in v0.44.0
func (st GoogleCloudVertexAiConfig) MarshalJSON() ([]byte, error)
func (*GoogleCloudVertexAiConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *GoogleCloudVertexAiConfig) UnmarshalJSON(b []byte) error
type HttpRequestResponse ¶ added in v0.57.0
type HttpRequestResponse struct {
Contents io.ReadCloser `json:"-" tf:"-"`
}
func (HttpRequestResponse) MarshalJSON ¶ added in v0.73.0
func (st HttpRequestResponse) MarshalJSON() ([]byte, error)
func (*HttpRequestResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *HttpRequestResponse) UnmarshalJSON(b []byte) error
type ListEndpointsResponse ¶
type ListEndpointsResponse struct {
// The list of endpoints.
// Wire name: 'endpoints'
Endpoints []ServingEndpoint `json:"endpoints,omitempty"`
}
func (ListEndpointsResponse) MarshalJSON ¶ added in v0.73.0
func (st ListEndpointsResponse) MarshalJSON() ([]byte, error)
func (*ListEndpointsResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *ListEndpointsResponse) UnmarshalJSON(b []byte) error
type LogsRequest ¶
type LogsRequest struct {
// The name of the serving endpoint that the served model belongs to. This
// field is required.
Name string `json:"-" tf:"-"`
// The name of the served model that logs will be retrieved for. This field
// is required.
ServedModelName string `json:"-" tf:"-"`
}
Get the latest logs for a served model
func (LogsRequest) MarshalJSON ¶ added in v0.73.0
func (st LogsRequest) MarshalJSON() ([]byte, error)
func (*LogsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *LogsRequest) UnmarshalJSON(b []byte) error
type ModelDataPlaneInfo ¶ added in v0.42.0
type ModelDataPlaneInfo struct {
// Information required to query DataPlane API 'query' endpoint.
// Wire name: 'query_info'
QueryInfo *DataPlaneInfo `json:"query_info,omitempty"`
}
A representation of all DataPlaneInfo for operations that can be done on a model through Data Plane APIs.
func (ModelDataPlaneInfo) MarshalJSON ¶ added in v0.73.0
func (st ModelDataPlaneInfo) MarshalJSON() ([]byte, error)
func (*ModelDataPlaneInfo) UnmarshalJSON ¶ added in v0.73.0
func (st *ModelDataPlaneInfo) UnmarshalJSON(b []byte) error
type OpenAiConfig ¶ added in v0.27.0
type OpenAiConfig struct {
// This field is only required for Azure AD OpenAI and is the Microsoft
// Entra Client ID.
// Wire name: 'microsoft_entra_client_id'
MicrosoftEntraClientId string `json:"microsoft_entra_client_id,omitempty"`
// The Databricks secret key reference for a client secret used for
// Microsoft Entra ID authentication. If you prefer to paste your client
// secret directly, see `microsoft_entra_client_secret_plaintext`. You must
// provide an API key using one of the following fields:
// `microsoft_entra_client_secret` or
// `microsoft_entra_client_secret_plaintext`.
// Wire name: 'microsoft_entra_client_secret'
MicrosoftEntraClientSecret string `json:"microsoft_entra_client_secret,omitempty"`
// The client secret used for Microsoft Entra ID authentication provided as
// a plaintext string. If you prefer to reference your key using Databricks
// Secrets, see `microsoft_entra_client_secret`. You must provide an API key
// using one of the following fields: `microsoft_entra_client_secret` or
// `microsoft_entra_client_secret_plaintext`.
// Wire name: 'microsoft_entra_client_secret_plaintext'
MicrosoftEntraClientSecretPlaintext string `json:"microsoft_entra_client_secret_plaintext,omitempty"`
// This field is only required for Azure AD OpenAI and is the Microsoft
// Entra Tenant ID.
// Wire name: 'microsoft_entra_tenant_id'
MicrosoftEntraTenantId string `json:"microsoft_entra_tenant_id,omitempty"`
// This is a field to provide a customized base URl for the OpenAI API. For
// Azure OpenAI, this field is required, and is the base URL for the Azure
// OpenAI API service provided by Azure. For other OpenAI API types, this
// field is optional, and if left unspecified, the standard OpenAI base URL
// is used.
// Wire name: 'openai_api_base'
OpenaiApiBase string `json:"openai_api_base,omitempty"`
// The Databricks secret key reference for an OpenAI API key using the
// OpenAI or Azure service. If you prefer to paste your API key directly,
// see `openai_api_key_plaintext`. You must provide an API key using one of
// the following fields: `openai_api_key` or `openai_api_key_plaintext`.
// Wire name: 'openai_api_key'
OpenaiApiKey string `json:"openai_api_key,omitempty"`
// The OpenAI API key using the OpenAI or Azure service provided as a
// plaintext string. If you prefer to reference your key using Databricks
// Secrets, see `openai_api_key`. You must provide an API key using one of
// the following fields: `openai_api_key` or `openai_api_key_plaintext`.
// Wire name: 'openai_api_key_plaintext'
OpenaiApiKeyPlaintext string `json:"openai_api_key_plaintext,omitempty"`
// This is an optional field to specify the type of OpenAI API to use. For
// Azure OpenAI, this field is required, and adjust this parameter to
// represent the preferred security access validation protocol. For access
// token validation, use azure. For authentication using Azure Active
// Directory (Azure AD) use, azuread.
// Wire name: 'openai_api_type'
OpenaiApiType string `json:"openai_api_type,omitempty"`
// This is an optional field to specify the OpenAI API version. For Azure
// OpenAI, this field is required, and is the version of the Azure OpenAI
// service to utilize, specified by a date.
// Wire name: 'openai_api_version'
OpenaiApiVersion string `json:"openai_api_version,omitempty"`
// This field is only required for Azure OpenAI and is the name of the
// deployment resource for the Azure OpenAI service.
// Wire name: 'openai_deployment_name'
OpenaiDeploymentName string `json:"openai_deployment_name,omitempty"`
// This is an optional field to specify the organization in OpenAI or Azure
// OpenAI.
// Wire name: 'openai_organization'
OpenaiOrganization string `json:"openai_organization,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
Configs needed to create an OpenAI model route.
func (OpenAiConfig) MarshalJSON ¶ added in v0.27.0
func (st OpenAiConfig) MarshalJSON() ([]byte, error)
func (*OpenAiConfig) UnmarshalJSON ¶ added in v0.27.0
func (st *OpenAiConfig) UnmarshalJSON(b []byte) error
type PaLmConfig ¶ added in v0.27.0
type PaLmConfig struct {
// The Databricks secret key reference for a PaLM API key. If you prefer to
// paste your API key directly, see `palm_api_key_plaintext`. You must
// provide an API key using one of the following fields: `palm_api_key` or
// `palm_api_key_plaintext`.
// Wire name: 'palm_api_key'
PalmApiKey string `json:"palm_api_key,omitempty"`
// The PaLM API key provided as a plaintext string. If you prefer to
// reference your key using Databricks Secrets, see `palm_api_key`. You must
// provide an API key using one of the following fields: `palm_api_key` or
// `palm_api_key_plaintext`.
// Wire name: 'palm_api_key_plaintext'
PalmApiKeyPlaintext string `json:"palm_api_key_plaintext,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (PaLmConfig) MarshalJSON ¶ added in v0.44.0
func (st PaLmConfig) MarshalJSON() ([]byte, error)
func (*PaLmConfig) UnmarshalJSON ¶ added in v0.44.0
func (st *PaLmConfig) UnmarshalJSON(b []byte) error
type PatchServingEndpointTags ¶ added in v0.20.0
type PatchServingEndpointTags struct {
// List of endpoint tags to add
// Wire name: 'add_tags'
AddTags []EndpointTag `json:"add_tags,omitempty"`
// List of tag keys to delete
// Wire name: 'delete_tags'
DeleteTags []string `json:"delete_tags,omitempty"`
// The name of the serving endpoint who's tags to patch. This field is
// required.
Name string `json:"-" tf:"-"`
}
func (PatchServingEndpointTags) MarshalJSON ¶ added in v0.73.0
func (st PatchServingEndpointTags) MarshalJSON() ([]byte, error)
func (*PatchServingEndpointTags) UnmarshalJSON ¶ added in v0.73.0
func (st *PatchServingEndpointTags) UnmarshalJSON(b []byte) error
type PayloadTable ¶ added in v0.27.0
type PayloadTable struct {
// Wire name: 'name'
Name string `json:"name,omitempty"`
// Wire name: 'status'
Status string `json:"status,omitempty"`
// Wire name: 'status_message'
StatusMessage string `json:"status_message,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (PayloadTable) MarshalJSON ¶ added in v0.27.0
func (st PayloadTable) MarshalJSON() ([]byte, error)
func (*PayloadTable) UnmarshalJSON ¶ added in v0.27.0
func (st *PayloadTable) UnmarshalJSON(b []byte) error
type PtEndpointCoreConfig ¶ added in v0.69.0
type PtEndpointCoreConfig struct {
// The list of served entities under the serving endpoint config.
// Wire name: 'served_entities'
ServedEntities []PtServedModel `json:"served_entities,omitempty"`
// Wire name: 'traffic_config'
TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"`
}
func (PtEndpointCoreConfig) MarshalJSON ¶ added in v0.73.0
func (st PtEndpointCoreConfig) MarshalJSON() ([]byte, error)
func (*PtEndpointCoreConfig) UnmarshalJSON ¶ added in v0.73.0
func (st *PtEndpointCoreConfig) UnmarshalJSON(b []byte) error
type PtServedModel ¶ added in v0.69.0
type PtServedModel struct {
// The name of the entity to be served. The entity may be a model in the
// Databricks Model Registry, a model in the Unity Catalog (UC), or a
// function of type FEATURE_SPEC in the UC. If it is a UC object, the full
// name of the object should be given in the form of
// **catalog_name.schema_name.model_name**.
// Wire name: 'entity_name'
EntityName string `json:"entity_name"`
// Wire name: 'entity_version'
EntityVersion string `json:"entity_version,omitempty"`
// The name of a served entity. It must be unique across an endpoint. A
// served entity name can consist of alphanumeric characters, dashes, and
// underscores. If not specified for an external model, this field defaults
// to external_model.name, with '.' and ':' replaced with '-', and if not
// specified for other entities, it defaults to entity_name-entity_version.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The number of model units to be provisioned.
// Wire name: 'provisioned_model_units'
ProvisionedModelUnits int64 `json:"provisioned_model_units"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (PtServedModel) MarshalJSON ¶ added in v0.69.0
func (st PtServedModel) MarshalJSON() ([]byte, error)
func (*PtServedModel) UnmarshalJSON ¶ added in v0.69.0
func (st *PtServedModel) UnmarshalJSON(b []byte) error
type PutAiGatewayRequest ¶ added in v0.47.0
type PutAiGatewayRequest struct {
// Configuration for traffic fallback which auto fallbacks to other served
// entities if the request to a served entity fails with certain error
// codes, to increase availability.
// Wire name: 'fallback_config'
FallbackConfig *FallbackConfig `json:"fallback_config,omitempty"`
// Configuration for AI Guardrails to prevent unwanted data and unsafe data
// in requests and responses.
// Wire name: 'guardrails'
Guardrails *AiGatewayGuardrails `json:"guardrails,omitempty"`
// Configuration for payload logging using inference tables. Use these
// tables to monitor and audit data being sent to and received from model
// APIs and to improve model quality.
// Wire name: 'inference_table_config'
InferenceTableConfig *AiGatewayInferenceTableConfig `json:"inference_table_config,omitempty"`
// The name of the serving endpoint whose AI Gateway is being updated. This
// field is required.
Name string `json:"-" tf:"-"`
// Configuration for rate limits which can be set to limit endpoint traffic.
// Wire name: 'rate_limits'
RateLimits []AiGatewayRateLimit `json:"rate_limits,omitempty"`
// Configuration to enable usage tracking using system tables. These tables
// allow you to monitor operational usage on endpoints and their associated
// costs.
// Wire name: 'usage_tracking_config'
UsageTrackingConfig *AiGatewayUsageTrackingConfig `json:"usage_tracking_config,omitempty"`
}
func (PutAiGatewayRequest) MarshalJSON ¶ added in v0.73.0
func (st PutAiGatewayRequest) MarshalJSON() ([]byte, error)
func (*PutAiGatewayRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *PutAiGatewayRequest) UnmarshalJSON(b []byte) error
type PutAiGatewayResponse ¶ added in v0.47.0
type PutAiGatewayResponse struct {
// Configuration for traffic fallback which auto fallbacks to other served
// entities if the request to a served entity fails with certain error
// codes, to increase availability.
// Wire name: 'fallback_config'
FallbackConfig *FallbackConfig `json:"fallback_config,omitempty"`
// Configuration for AI Guardrails to prevent unwanted data and unsafe data
// in requests and responses.
// Wire name: 'guardrails'
Guardrails *AiGatewayGuardrails `json:"guardrails,omitempty"`
// Configuration for payload logging using inference tables. Use these
// tables to monitor and audit data being sent to and received from model
// APIs and to improve model quality.
// Wire name: 'inference_table_config'
InferenceTableConfig *AiGatewayInferenceTableConfig `json:"inference_table_config,omitempty"`
// Configuration for rate limits which can be set to limit endpoint traffic.
// Wire name: 'rate_limits'
RateLimits []AiGatewayRateLimit `json:"rate_limits,omitempty"`
// Configuration to enable usage tracking using system tables. These tables
// allow you to monitor operational usage on endpoints and their associated
// costs.
// Wire name: 'usage_tracking_config'
UsageTrackingConfig *AiGatewayUsageTrackingConfig `json:"usage_tracking_config,omitempty"`
}
func (PutAiGatewayResponse) MarshalJSON ¶ added in v0.73.0
func (st PutAiGatewayResponse) MarshalJSON() ([]byte, error)
func (*PutAiGatewayResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *PutAiGatewayResponse) UnmarshalJSON(b []byte) error
type PutRequest ¶ added in v0.27.0
type PutRequest struct {
// The name of the serving endpoint whose rate limits are being updated.
// This field is required.
Name string `json:"-" tf:"-"`
// The list of endpoint rate limits.
// Wire name: 'rate_limits'
RateLimits []RateLimit `json:"rate_limits,omitempty"`
}
func (PutRequest) MarshalJSON ¶ added in v0.73.0
func (st PutRequest) MarshalJSON() ([]byte, error)
func (*PutRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *PutRequest) UnmarshalJSON(b []byte) error
type PutResponse ¶ added in v0.27.0
type PutResponse struct {
// The list of endpoint rate limits.
// Wire name: 'rate_limits'
RateLimits []RateLimit `json:"rate_limits,omitempty"`
}
func (PutResponse) MarshalJSON ¶ added in v0.73.0
func (st PutResponse) MarshalJSON() ([]byte, error)
func (*PutResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *PutResponse) UnmarshalJSON(b []byte) error
type QueryEndpointInput ¶ added in v0.21.0
type QueryEndpointInput struct {
// Pandas Dataframe input in the records orientation.
// Wire name: 'dataframe_records'
DataframeRecords []any `json:"dataframe_records,omitempty"`
// Pandas Dataframe input in the split orientation.
// Wire name: 'dataframe_split'
DataframeSplit *DataframeSplitInput `json:"dataframe_split,omitempty"`
// The extra parameters field used ONLY for __completions, chat,__ and
// __embeddings external & foundation model__ serving endpoints. This is a
// map of strings and should only be used with other external/foundation
// model query fields.
// Wire name: 'extra_params'
ExtraParams map[string]string `json:"extra_params,omitempty"`
// The input string (or array of strings) field used ONLY for __embeddings
// external & foundation model__ serving endpoints and is the only field
// (along with extra_params if needed) used by embeddings queries.
// Wire name: 'input'
Input any `json:"input,omitempty"`
// Tensor-based input in columnar format.
// Wire name: 'inputs'
Inputs any `json:"inputs,omitempty"`
// Tensor-based input in row format.
// Wire name: 'instances'
Instances []any `json:"instances,omitempty"`
// The max tokens field used ONLY for __completions__ and __chat external &
// foundation model__ serving endpoints. This is an integer and should only
// be used with other chat/completions query fields.
// Wire name: 'max_tokens'
MaxTokens int `json:"max_tokens,omitempty"`
// The messages field used ONLY for __chat external & foundation model__
// serving endpoints. This is a map of strings and should only be used with
// other chat query fields.
// Wire name: 'messages'
Messages []ChatMessage `json:"messages,omitempty"`
// The n (number of candidates) field used ONLY for __completions__ and
// __chat external & foundation model__ serving endpoints. This is an
// integer between 1 and 5 with a default of 1 and should only be used with
// other chat/completions query fields.
// Wire name: 'n'
N int `json:"n,omitempty"`
// The name of the serving endpoint. This field is required.
Name string `json:"-" tf:"-"`
// The prompt string (or array of strings) field used ONLY for __completions
// external & foundation model__ serving endpoints and should only be used
// with other completions query fields.
// Wire name: 'prompt'
Prompt any `json:"prompt,omitempty"`
// The stop sequences field used ONLY for __completions__ and __chat
// external & foundation model__ serving endpoints. This is a list of
// strings and should only be used with other chat/completions query fields.
// Wire name: 'stop'
Stop []string `json:"stop,omitempty"`
// The stream field used ONLY for __completions__ and __chat external &
// foundation model__ serving endpoints. This is a boolean defaulting to
// false and should only be used with other chat/completions query fields.
// Wire name: 'stream'
Stream bool `json:"stream,omitempty"`
// The temperature field used ONLY for __completions__ and __chat external &
// foundation model__ serving endpoints. This is a float between 0.0 and 2.0
// with a default of 1.0 and should only be used with other chat/completions
// query fields.
// Wire name: 'temperature'
Temperature float64 `json:"temperature,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (QueryEndpointInput) MarshalJSON ¶ added in v0.27.0
func (st QueryEndpointInput) MarshalJSON() ([]byte, error)
func (*QueryEndpointInput) UnmarshalJSON ¶ added in v0.27.0
func (st *QueryEndpointInput) UnmarshalJSON(b []byte) error
type QueryEndpointResponse ¶
type QueryEndpointResponse struct {
// The list of choices returned by the __chat or completions
// external/foundation model__ serving endpoint.
// Wire name: 'choices'
Choices []V1ResponseChoiceElement `json:"choices,omitempty"`
// The timestamp in seconds when the query was created in Unix time returned
// by a __completions or chat external/foundation model__ serving endpoint.
// Wire name: 'created'
Created int64 `json:"created,omitempty"`
// The list of the embeddings returned by the __embeddings
// external/foundation model__ serving endpoint.
// Wire name: 'data'
Data []EmbeddingsV1ResponseEmbeddingElement `json:"data,omitempty"`
// The ID of the query that may be returned by a __completions or chat
// external/foundation model__ serving endpoint.
// Wire name: 'id'
Id string `json:"id,omitempty"`
// The name of the __external/foundation model__ used for querying. This is
// the name of the model that was specified in the endpoint config.
// Wire name: 'model'
Model string `json:"model,omitempty"`
// The type of object returned by the __external/foundation model__ serving
// endpoint, one of [text_completion, chat.completion, list (of
// embeddings)].
// Wire name: 'object'
Object QueryEndpointResponseObject `json:"object,omitempty"`
// The predictions returned by the serving endpoint.
// Wire name: 'predictions'
Predictions []any `json:"predictions,omitempty"`
// The name of the served model that served the request. This is useful when
// there are multiple models behind the same endpoint with traffic split.
ServedModelName string `json:"-" tf:"-"`
// The usage object that may be returned by the __external/foundation
// model__ serving endpoint. This contains information about the number of
// tokens used in the prompt and response.
// Wire name: 'usage'
Usage *ExternalModelUsageElement `json:"usage,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (QueryEndpointResponse) MarshalJSON ¶ added in v0.27.0
func (st QueryEndpointResponse) MarshalJSON() ([]byte, error)
func (*QueryEndpointResponse) UnmarshalJSON ¶ added in v0.27.0
func (st *QueryEndpointResponse) UnmarshalJSON(b []byte) error
type QueryEndpointResponseObject ¶ added in v0.27.0
type QueryEndpointResponseObject string
The type of object returned by the __external/foundation model__ serving endpoint, one of [text_completion, chat.completion, list (of embeddings)].
const QueryEndpointResponseObjectChatCompletion QueryEndpointResponseObject = `chat.completion`
const QueryEndpointResponseObjectList QueryEndpointResponseObject = `list`
const QueryEndpointResponseObjectTextCompletion QueryEndpointResponseObject = `text_completion`
func (*QueryEndpointResponseObject) Set ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) Set(v string) error
Set raw string value and validate it against allowed values
func (*QueryEndpointResponseObject) String ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) String() string
String representation for fmt.Print
func (*QueryEndpointResponseObject) Type ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) Type() string
Type always returns QueryEndpointResponseObject to satisfy [pflag.Value] interface
func (*QueryEndpointResponseObject) Values ¶ added in v0.72.0
func (f *QueryEndpointResponseObject) Values() []QueryEndpointResponseObject
Values returns all possible values for QueryEndpointResponseObject.
There is no guarantee on the order of the values in the slice.
type RateLimit ¶ added in v0.27.0
type RateLimit struct {
// Used to specify how many calls are allowed for a key within the
// renewal_period.
// Wire name: 'calls'
Calls int64 `json:"calls"`
// Key field for a serving endpoint rate limit. Currently, only 'user' and
// 'endpoint' are supported, with 'endpoint' being the default if not
// specified.
// Wire name: 'key'
Key RateLimitKey `json:"key,omitempty"`
// Renewal period field for a serving endpoint rate limit. Currently, only
// 'minute' is supported.
// Wire name: 'renewal_period'
RenewalPeriod RateLimitRenewalPeriod `json:"renewal_period"`
}
func (RateLimit) MarshalJSON ¶ added in v0.73.0
func (*RateLimit) UnmarshalJSON ¶ added in v0.73.0
type RateLimitKey ¶ added in v0.27.0
type RateLimitKey string
const RateLimitKeyEndpoint RateLimitKey = `endpoint`
const RateLimitKeyUser RateLimitKey = `user`
func (*RateLimitKey) Set ¶ added in v0.27.0
func (f *RateLimitKey) Set(v string) error
Set raw string value and validate it against allowed values
func (*RateLimitKey) String ¶ added in v0.27.0
func (f *RateLimitKey) String() string
String representation for fmt.Print
func (*RateLimitKey) Type ¶ added in v0.27.0
func (f *RateLimitKey) Type() string
Type always returns RateLimitKey to satisfy [pflag.Value] interface
func (*RateLimitKey) Values ¶ added in v0.72.0
func (f *RateLimitKey) Values() []RateLimitKey
Values returns all possible values for RateLimitKey.
There is no guarantee on the order of the values in the slice.
type RateLimitRenewalPeriod ¶ added in v0.27.0
type RateLimitRenewalPeriod string
const RateLimitRenewalPeriodMinute RateLimitRenewalPeriod = `minute`
func (*RateLimitRenewalPeriod) Set ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) Set(v string) error
Set raw string value and validate it against allowed values
func (*RateLimitRenewalPeriod) String ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) String() string
String representation for fmt.Print
func (*RateLimitRenewalPeriod) Type ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) Type() string
Type always returns RateLimitRenewalPeriod to satisfy [pflag.Value] interface
func (*RateLimitRenewalPeriod) Values ¶ added in v0.72.0
func (f *RateLimitRenewalPeriod) Values() []RateLimitRenewalPeriod
Values returns all possible values for RateLimitRenewalPeriod.
There is no guarantee on the order of the values in the slice.
type Route ¶
type Route struct {
// The name of the served model this route configures traffic for.
// Wire name: 'served_model_name'
ServedModelName string `json:"served_model_name"`
// The percentage of endpoint traffic to send to this route. It must be an
// integer between 0 and 100 inclusive.
// Wire name: 'traffic_percentage'
TrafficPercentage int `json:"traffic_percentage"`
}
func (Route) MarshalJSON ¶ added in v0.73.0
func (*Route) UnmarshalJSON ¶ added in v0.73.0
type ServedEntityInput ¶ added in v0.27.0
type ServedEntityInput struct {
// The name of the entity to be served. The entity may be a model in the
// Databricks Model Registry, a model in the Unity Catalog (UC), or a
// function of type FEATURE_SPEC in the UC. If it is a UC object, the full
// name of the object should be given in the form of
// **catalog_name.schema_name.model_name**.
// Wire name: 'entity_name'
EntityName string `json:"entity_name,omitempty"`
// Wire name: 'entity_version'
EntityVersion string `json:"entity_version,omitempty"`
// An object containing a set of optional, user-specified environment
// variable key-value pairs used for serving this entity. Note: this is an
// experimental feature and subject to change. Example entity environment
// variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
// "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN":
// "{{secrets/my_scope2/my_key2}}"}`
// Wire name: 'environment_vars'
EnvironmentVars map[string]string `json:"environment_vars,omitempty"`
// The external model to be served. NOTE: Only one of external_model and
// (entity_name, entity_version, workload_size, workload_type, and
// scale_to_zero_enabled) can be specified with the latter set being used
// for custom model serving for a Databricks registered model. For an
// existing endpoint with external_model, it cannot be updated to an
// endpoint without external_model. If the endpoint is created without
// external_model, users cannot update it to add external_model later. The
// task type of all external models within an endpoint must be the same.
// Wire name: 'external_model'
ExternalModel *ExternalModel `json:"external_model,omitempty"`
// ARN of the instance profile that the served entity uses to access AWS
// resources.
// Wire name: 'instance_profile_arn'
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
// The maximum provisioned concurrency that the endpoint can scale up to. Do
// not use if workload_size is specified.
// Wire name: 'max_provisioned_concurrency'
MaxProvisionedConcurrency int `json:"max_provisioned_concurrency,omitempty"`
// The maximum tokens per second that the endpoint can scale up to.
// Wire name: 'max_provisioned_throughput'
MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"`
// The minimum provisioned concurrency that the endpoint can scale down to.
// Do not use if workload_size is specified.
// Wire name: 'min_provisioned_concurrency'
MinProvisionedConcurrency int `json:"min_provisioned_concurrency,omitempty"`
// The minimum tokens per second that the endpoint can scale down to.
// Wire name: 'min_provisioned_throughput'
MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"`
// The name of a served entity. It must be unique across an endpoint. A
// served entity name can consist of alphanumeric characters, dashes, and
// underscores. If not specified for an external model, this field defaults
// to external_model.name, with '.' and ':' replaced with '-', and if not
// specified for other entities, it defaults to entity_name-entity_version.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The number of model units provisioned.
// Wire name: 'provisioned_model_units'
ProvisionedModelUnits int64 `json:"provisioned_model_units,omitempty"`
// Whether the compute resources for the served entity should scale down to
// zero.
// Wire name: 'scale_to_zero_enabled'
ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"`
// The workload size of the served entity. The workload size corresponds to
// a range of provisioned concurrency that the compute autoscales between. A
// single unit of provisioned concurrency can process one request at a time.
// Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
// "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64
// provisioned concurrency). Additional custom workload sizes can also be
// used when available in the workspace. If scale-to-zero is enabled, the
// lower bound of the provisioned concurrency for each workload size is 0.
// Do not use if min_provisioned_concurrency and max_provisioned_concurrency
// are specified.
// Wire name: 'workload_size'
WorkloadSize string `json:"workload_size,omitempty"`
// The workload type of the served entity. The workload type selects which
// type of compute to use in the endpoint. The default value for this
// parameter is "CPU". For deep learning workloads, GPU acceleration is
// available by selecting workload types like GPU_SMALL and others. See the
// available [GPU types].
//
// [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types
// Wire name: 'workload_type'
WorkloadType ServingModelWorkloadType `json:"workload_type,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedEntityInput) MarshalJSON ¶ added in v0.27.0
func (st ServedEntityInput) MarshalJSON() ([]byte, error)
func (*ServedEntityInput) UnmarshalJSON ¶ added in v0.27.0
func (st *ServedEntityInput) UnmarshalJSON(b []byte) error
type ServedEntityOutput ¶ added in v0.27.0
type ServedEntityOutput struct {
// Wire name: 'creation_timestamp'
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
// Wire name: 'creator'
Creator string `json:"creator,omitempty"`
// The name of the entity to be served. The entity may be a model in the
// Databricks Model Registry, a model in the Unity Catalog (UC), or a
// function of type FEATURE_SPEC in the UC. If it is a UC object, the full
// name of the object should be given in the form of
// **catalog_name.schema_name.model_name**.
// Wire name: 'entity_name'
EntityName string `json:"entity_name,omitempty"`
// Wire name: 'entity_version'
EntityVersion string `json:"entity_version,omitempty"`
// An object containing a set of optional, user-specified environment
// variable key-value pairs used for serving this entity. Note: this is an
// experimental feature and subject to change. Example entity environment
// variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
// "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN":
// "{{secrets/my_scope2/my_key2}}"}`
// Wire name: 'environment_vars'
EnvironmentVars map[string]string `json:"environment_vars,omitempty"`
// The external model to be served. NOTE: Only one of external_model and
// (entity_name, entity_version, workload_size, workload_type, and
// scale_to_zero_enabled) can be specified with the latter set being used
// for custom model serving for a Databricks registered model. For an
// existing endpoint with external_model, it cannot be updated to an
// endpoint without external_model. If the endpoint is created without
// external_model, users cannot update it to add external_model later. The
// task type of all external models within an endpoint must be the same.
// Wire name: 'external_model'
ExternalModel *ExternalModel `json:"external_model,omitempty"`
// All fields are not sensitive as they are hard-coded in the system and
// made available to customers.
// Wire name: 'foundation_model'
FoundationModel *FoundationModel `json:"foundation_model,omitempty"`
// ARN of the instance profile that the served entity uses to access AWS
// resources.
// Wire name: 'instance_profile_arn'
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
// The maximum provisioned concurrency that the endpoint can scale up to. Do
// not use if workload_size is specified.
// Wire name: 'max_provisioned_concurrency'
MaxProvisionedConcurrency int `json:"max_provisioned_concurrency,omitempty"`
// The maximum tokens per second that the endpoint can scale up to.
// Wire name: 'max_provisioned_throughput'
MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"`
// The minimum provisioned concurrency that the endpoint can scale down to.
// Do not use if workload_size is specified.
// Wire name: 'min_provisioned_concurrency'
MinProvisionedConcurrency int `json:"min_provisioned_concurrency,omitempty"`
// The minimum tokens per second that the endpoint can scale down to.
// Wire name: 'min_provisioned_throughput'
MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"`
// The name of a served entity. It must be unique across an endpoint. A
// served entity name can consist of alphanumeric characters, dashes, and
// underscores. If not specified for an external model, this field defaults
// to external_model.name, with '.' and ':' replaced with '-', and if not
// specified for other entities, it defaults to entity_name-entity_version.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The number of model units provisioned.
// Wire name: 'provisioned_model_units'
ProvisionedModelUnits int64 `json:"provisioned_model_units,omitempty"`
// Whether the compute resources for the served entity should scale down to
// zero.
// Wire name: 'scale_to_zero_enabled'
ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"`
// Wire name: 'state'
State *ServedModelState `json:"state,omitempty"`
// The workload size of the served entity. The workload size corresponds to
// a range of provisioned concurrency that the compute autoscales between. A
// single unit of provisioned concurrency can process one request at a time.
// Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
// "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64
// provisioned concurrency). Additional custom workload sizes can also be
// used when available in the workspace. If scale-to-zero is enabled, the
// lower bound of the provisioned concurrency for each workload size is 0.
// Do not use if min_provisioned_concurrency and max_provisioned_concurrency
// are specified.
// Wire name: 'workload_size'
WorkloadSize string `json:"workload_size,omitempty"`
// The workload type of the served entity. The workload type selects which
// type of compute to use in the endpoint. The default value for this
// parameter is "CPU". For deep learning workloads, GPU acceleration is
// available by selecting workload types like GPU_SMALL and others. See the
// available [GPU types].
//
// [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types
// Wire name: 'workload_type'
WorkloadType ServingModelWorkloadType `json:"workload_type,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedEntityOutput) MarshalJSON ¶ added in v0.27.0
func (st ServedEntityOutput) MarshalJSON() ([]byte, error)
func (*ServedEntityOutput) UnmarshalJSON ¶ added in v0.27.0
func (st *ServedEntityOutput) UnmarshalJSON(b []byte) error
type ServedEntitySpec ¶ added in v0.27.0
type ServedEntitySpec struct {
// Wire name: 'entity_name'
EntityName string `json:"entity_name,omitempty"`
// Wire name: 'entity_version'
EntityVersion string `json:"entity_version,omitempty"`
// Wire name: 'external_model'
ExternalModel *ExternalModel `json:"external_model,omitempty"`
// All fields are not sensitive as they are hard-coded in the system and
// made available to customers.
// Wire name: 'foundation_model'
FoundationModel *FoundationModel `json:"foundation_model,omitempty"`
// Wire name: 'name'
Name string `json:"name,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedEntitySpec) MarshalJSON ¶ added in v0.27.0
func (st ServedEntitySpec) MarshalJSON() ([]byte, error)
func (*ServedEntitySpec) UnmarshalJSON ¶ added in v0.27.0
func (st *ServedEntitySpec) UnmarshalJSON(b []byte) error
type ServedModelInput ¶
type ServedModelInput struct {
// An object containing a set of optional, user-specified environment
// variable key-value pairs used for serving this entity. Note: this is an
// experimental feature and subject to change. Example entity environment
// variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
// "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN":
// "{{secrets/my_scope2/my_key2}}"}`
// Wire name: 'environment_vars'
EnvironmentVars map[string]string `json:"environment_vars,omitempty"`
// ARN of the instance profile that the served entity uses to access AWS
// resources.
// Wire name: 'instance_profile_arn'
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
// The maximum provisioned concurrency that the endpoint can scale up to. Do
// not use if workload_size is specified.
// Wire name: 'max_provisioned_concurrency'
MaxProvisionedConcurrency int `json:"max_provisioned_concurrency,omitempty"`
// The maximum tokens per second that the endpoint can scale up to.
// Wire name: 'max_provisioned_throughput'
MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"`
// The minimum provisioned concurrency that the endpoint can scale down to.
// Do not use if workload_size is specified.
// Wire name: 'min_provisioned_concurrency'
MinProvisionedConcurrency int `json:"min_provisioned_concurrency,omitempty"`
// The minimum tokens per second that the endpoint can scale down to.
// Wire name: 'min_provisioned_throughput'
MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"`
// Wire name: 'model_name'
ModelName string `json:"model_name"`
// Wire name: 'model_version'
ModelVersion string `json:"model_version"`
// The name of a served entity. It must be unique across an endpoint. A
// served entity name can consist of alphanumeric characters, dashes, and
// underscores. If not specified for an external model, this field defaults
// to external_model.name, with '.' and ':' replaced with '-', and if not
// specified for other entities, it defaults to entity_name-entity_version.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The number of model units provisioned.
// Wire name: 'provisioned_model_units'
ProvisionedModelUnits int64 `json:"provisioned_model_units,omitempty"`
// Whether the compute resources for the served entity should scale down to
// zero.
// Wire name: 'scale_to_zero_enabled'
ScaleToZeroEnabled bool `json:"scale_to_zero_enabled"`
// The workload size of the served entity. The workload size corresponds to
// a range of provisioned concurrency that the compute autoscales between. A
// single unit of provisioned concurrency can process one request at a time.
// Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
// "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64
// provisioned concurrency). Additional custom workload sizes can also be
// used when available in the workspace. If scale-to-zero is enabled, the
// lower bound of the provisioned concurrency for each workload size is 0.
// Do not use if min_provisioned_concurrency and max_provisioned_concurrency
// are specified.
// Wire name: 'workload_size'
WorkloadSize string `json:"workload_size,omitempty"`
// The workload type of the served entity. The workload type selects which
// type of compute to use in the endpoint. The default value for this
// parameter is "CPU". For deep learning workloads, GPU acceleration is
// available by selecting workload types like GPU_SMALL and others. See the
// available [GPU types].
//
// [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types
// Wire name: 'workload_type'
WorkloadType ServedModelInputWorkloadType `json:"workload_type,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedModelInput) MarshalJSON ¶ added in v0.23.0
func (st ServedModelInput) MarshalJSON() ([]byte, error)
func (*ServedModelInput) UnmarshalJSON ¶ added in v0.23.0
func (st *ServedModelInput) UnmarshalJSON(b []byte) error
type ServedModelInputWorkloadType ¶ added in v0.27.0
type ServedModelInputWorkloadType string
Please keep this in sync with with workload types in InferenceEndpointEntities.scala
const ServedModelInputWorkloadTypeCpu ServedModelInputWorkloadType = `CPU`
const ServedModelInputWorkloadTypeGpuLarge ServedModelInputWorkloadType = `GPU_LARGE`
const ServedModelInputWorkloadTypeGpuMedium ServedModelInputWorkloadType = `GPU_MEDIUM`
const ServedModelInputWorkloadTypeGpuSmall ServedModelInputWorkloadType = `GPU_SMALL`
const ServedModelInputWorkloadTypeMultigpuMedium ServedModelInputWorkloadType = `MULTIGPU_MEDIUM`
func (*ServedModelInputWorkloadType) Set ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServedModelInputWorkloadType) String ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) String() string
String representation for fmt.Print
func (*ServedModelInputWorkloadType) Type ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) Type() string
Type always returns ServedModelInputWorkloadType to satisfy [pflag.Value] interface
func (*ServedModelInputWorkloadType) Values ¶ added in v0.72.0
func (f *ServedModelInputWorkloadType) Values() []ServedModelInputWorkloadType
Values returns all possible values for ServedModelInputWorkloadType.
There is no guarantee on the order of the values in the slice.
type ServedModelOutput ¶
type ServedModelOutput struct {
// Wire name: 'creation_timestamp'
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
// Wire name: 'creator'
Creator string `json:"creator,omitempty"`
// An object containing a set of optional, user-specified environment
// variable key-value pairs used for serving this entity. Note: this is an
// experimental feature and subject to change. Example entity environment
// variables that refer to Databricks secrets: `{"OPENAI_API_KEY":
// "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN":
// "{{secrets/my_scope2/my_key2}}"}`
// Wire name: 'environment_vars'
EnvironmentVars map[string]string `json:"environment_vars,omitempty"`
// ARN of the instance profile that the served entity uses to access AWS
// resources.
// Wire name: 'instance_profile_arn'
InstanceProfileArn string `json:"instance_profile_arn,omitempty"`
// The maximum provisioned concurrency that the endpoint can scale up to. Do
// not use if workload_size is specified.
// Wire name: 'max_provisioned_concurrency'
MaxProvisionedConcurrency int `json:"max_provisioned_concurrency,omitempty"`
// The minimum provisioned concurrency that the endpoint can scale down to.
// Do not use if workload_size is specified.
// Wire name: 'min_provisioned_concurrency'
MinProvisionedConcurrency int `json:"min_provisioned_concurrency,omitempty"`
// Wire name: 'model_name'
ModelName string `json:"model_name,omitempty"`
// Wire name: 'model_version'
ModelVersion string `json:"model_version,omitempty"`
// The name of a served entity. It must be unique across an endpoint. A
// served entity name can consist of alphanumeric characters, dashes, and
// underscores. If not specified for an external model, this field defaults
// to external_model.name, with '.' and ':' replaced with '-', and if not
// specified for other entities, it defaults to entity_name-entity_version.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The number of model units provisioned.
// Wire name: 'provisioned_model_units'
ProvisionedModelUnits int64 `json:"provisioned_model_units,omitempty"`
// Whether the compute resources for the served entity should scale down to
// zero.
// Wire name: 'scale_to_zero_enabled'
ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"`
// Wire name: 'state'
State *ServedModelState `json:"state,omitempty"`
// The workload size of the served entity. The workload size corresponds to
// a range of provisioned concurrency that the compute autoscales between. A
// single unit of provisioned concurrency can process one request at a time.
// Valid workload sizes are "Small" (4 - 4 provisioned concurrency),
// "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64
// provisioned concurrency). Additional custom workload sizes can also be
// used when available in the workspace. If scale-to-zero is enabled, the
// lower bound of the provisioned concurrency for each workload size is 0.
// Do not use if min_provisioned_concurrency and max_provisioned_concurrency
// are specified.
// Wire name: 'workload_size'
WorkloadSize string `json:"workload_size,omitempty"`
// The workload type of the served entity. The workload type selects which
// type of compute to use in the endpoint. The default value for this
// parameter is "CPU". For deep learning workloads, GPU acceleration is
// available by selecting workload types like GPU_SMALL and others. See the
// available [GPU types].
//
// [GPU types]: https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types
// Wire name: 'workload_type'
WorkloadType ServingModelWorkloadType `json:"workload_type,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedModelOutput) MarshalJSON ¶ added in v0.23.0
func (st ServedModelOutput) MarshalJSON() ([]byte, error)
func (*ServedModelOutput) UnmarshalJSON ¶ added in v0.23.0
func (st *ServedModelOutput) UnmarshalJSON(b []byte) error
type ServedModelSpec ¶
type ServedModelSpec struct {
// Only one of model_name and entity_name should be populated
// Wire name: 'model_name'
ModelName string `json:"model_name,omitempty"`
// Only one of model_version and entity_version should be populated
// Wire name: 'model_version'
ModelVersion string `json:"model_version,omitempty"`
// Wire name: 'name'
Name string `json:"name,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedModelSpec) MarshalJSON ¶ added in v0.23.0
func (st ServedModelSpec) MarshalJSON() ([]byte, error)
func (*ServedModelSpec) UnmarshalJSON ¶ added in v0.23.0
func (st *ServedModelSpec) UnmarshalJSON(b []byte) error
type ServedModelState ¶
type ServedModelState struct {
// Wire name: 'deployment'
Deployment ServedModelStateDeployment `json:"deployment,omitempty"`
// Wire name: 'deployment_state_message'
DeploymentStateMessage string `json:"deployment_state_message,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServedModelState) MarshalJSON ¶ added in v0.23.0
func (st ServedModelState) MarshalJSON() ([]byte, error)
func (*ServedModelState) UnmarshalJSON ¶ added in v0.23.0
func (st *ServedModelState) UnmarshalJSON(b []byte) error
type ServedModelStateDeployment ¶
type ServedModelStateDeployment string
const ServedModelStateDeploymentAborted ServedModelStateDeployment = `DEPLOYMENT_ABORTED`
const ServedModelStateDeploymentCreating ServedModelStateDeployment = `DEPLOYMENT_CREATING`
const ServedModelStateDeploymentFailed ServedModelStateDeployment = `DEPLOYMENT_FAILED`
const ServedModelStateDeploymentReady ServedModelStateDeployment = `DEPLOYMENT_READY`
const ServedModelStateDeploymentRecovering ServedModelStateDeployment = `DEPLOYMENT_RECOVERING`
func (*ServedModelStateDeployment) Set ¶
func (f *ServedModelStateDeployment) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServedModelStateDeployment) String ¶
func (f *ServedModelStateDeployment) String() string
String representation for fmt.Print
func (*ServedModelStateDeployment) Type ¶
func (f *ServedModelStateDeployment) Type() string
Type always returns ServedModelStateDeployment to satisfy [pflag.Value] interface
func (*ServedModelStateDeployment) Values ¶ added in v0.72.0
func (f *ServedModelStateDeployment) Values() []ServedModelStateDeployment
Values returns all possible values for ServedModelStateDeployment.
There is no guarantee on the order of the values in the slice.
type ServerLogsResponse ¶
type ServerLogsResponse struct {
// The most recent log lines of the model server processing invocation
// requests.
// Wire name: 'logs'
Logs string `json:"logs"`
}
func (ServerLogsResponse) MarshalJSON ¶ added in v0.73.0
func (st ServerLogsResponse) MarshalJSON() ([]byte, error)
func (*ServerLogsResponse) UnmarshalJSON ¶ added in v0.73.0
func (st *ServerLogsResponse) UnmarshalJSON(b []byte) error
type ServingEndpoint ¶
type ServingEndpoint struct {
// The AI Gateway configuration for the serving endpoint. NOTE: External
// model, provisioned throughput, and pay-per-token endpoints are fully
// supported; agent endpoints currently only support inference tables.
// Wire name: 'ai_gateway'
AiGateway *AiGatewayConfig `json:"ai_gateway,omitempty"`
// The budget policy associated with the endpoint.
// Wire name: 'budget_policy_id'
BudgetPolicyId string `json:"budget_policy_id,omitempty"`
// The config that is currently being served by the endpoint.
// Wire name: 'config'
Config *EndpointCoreConfigSummary `json:"config,omitempty"`
// The timestamp when the endpoint was created in Unix time.
// Wire name: 'creation_timestamp'
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
// The email of the user who created the serving endpoint.
// Wire name: 'creator'
Creator string `json:"creator,omitempty"`
// Description of the endpoint
// Wire name: 'description'
Description string `json:"description,omitempty"`
// System-generated ID of the endpoint, included to be used by the
// Permissions API.
// Wire name: 'id'
Id string `json:"id,omitempty"`
// The timestamp when the endpoint was last updated by a user in Unix time.
// Wire name: 'last_updated_timestamp'
LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"`
// The name of the serving endpoint.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// Information corresponding to the state of the serving endpoint.
// Wire name: 'state'
State *EndpointState `json:"state,omitempty"`
// Tags attached to the serving endpoint.
// Wire name: 'tags'
Tags []EndpointTag `json:"tags,omitempty"`
// The task type of the serving endpoint.
// Wire name: 'task'
Task string `json:"task,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpoint) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpoint) MarshalJSON() ([]byte, error)
func (*ServingEndpoint) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpoint) UnmarshalJSON(b []byte) error
type ServingEndpointAccessControlRequest ¶ added in v0.15.0
type ServingEndpointAccessControlRequest struct {
// name of the group
// Wire name: 'group_name'
GroupName string `json:"group_name,omitempty"`
// Permission level
// Wire name: 'permission_level'
PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"`
// application ID of a service principal
// Wire name: 'service_principal_name'
ServicePrincipalName string `json:"service_principal_name,omitempty"`
// name of the user
// Wire name: 'user_name'
UserName string `json:"user_name,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointAccessControlRequest) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointAccessControlRequest) MarshalJSON() ([]byte, error)
func (*ServingEndpointAccessControlRequest) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointAccessControlRequest) UnmarshalJSON(b []byte) error
type ServingEndpointAccessControlResponse ¶ added in v0.15.0
type ServingEndpointAccessControlResponse struct {
// All permissions.
// Wire name: 'all_permissions'
AllPermissions []ServingEndpointPermission `json:"all_permissions,omitempty"`
// Display name of the user or service principal.
// Wire name: 'display_name'
DisplayName string `json:"display_name,omitempty"`
// name of the group
// Wire name: 'group_name'
GroupName string `json:"group_name,omitempty"`
// Name of the service principal.
// Wire name: 'service_principal_name'
ServicePrincipalName string `json:"service_principal_name,omitempty"`
// name of the user
// Wire name: 'user_name'
UserName string `json:"user_name,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointAccessControlResponse) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointAccessControlResponse) MarshalJSON() ([]byte, error)
func (*ServingEndpointAccessControlResponse) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointAccessControlResponse) UnmarshalJSON(b []byte) error
type ServingEndpointDetailed ¶
type ServingEndpointDetailed struct {
// The AI Gateway configuration for the serving endpoint. NOTE: External
// model, provisioned throughput, and pay-per-token endpoints are fully
// supported; agent endpoints currently only support inference tables.
// Wire name: 'ai_gateway'
AiGateway *AiGatewayConfig `json:"ai_gateway,omitempty"`
// The budget policy associated with the endpoint.
// Wire name: 'budget_policy_id'
BudgetPolicyId string `json:"budget_policy_id,omitempty"`
// The config that is currently being served by the endpoint.
// Wire name: 'config'
Config *EndpointCoreConfigOutput `json:"config,omitempty"`
// The timestamp when the endpoint was created in Unix time.
// Wire name: 'creation_timestamp'
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
// The email of the user who created the serving endpoint.
// Wire name: 'creator'
Creator string `json:"creator,omitempty"`
// Information required to query DataPlane APIs.
// Wire name: 'data_plane_info'
DataPlaneInfo *ModelDataPlaneInfo `json:"data_plane_info,omitempty"`
// Description of the serving model
// Wire name: 'description'
Description string `json:"description,omitempty"`
// Endpoint invocation url if route optimization is enabled for endpoint
// Wire name: 'endpoint_url'
EndpointUrl string `json:"endpoint_url,omitempty"`
// System-generated ID of the endpoint. This is used to refer to the
// endpoint in the Permissions API
// Wire name: 'id'
Id string `json:"id,omitempty"`
// The timestamp when the endpoint was last updated by a user in Unix time.
// Wire name: 'last_updated_timestamp'
LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"`
// The name of the serving endpoint.
// Wire name: 'name'
Name string `json:"name,omitempty"`
// The config that the endpoint is attempting to update to.
// Wire name: 'pending_config'
PendingConfig *EndpointPendingConfig `json:"pending_config,omitempty"`
// The permission level of the principal making the request.
// Wire name: 'permission_level'
PermissionLevel ServingEndpointDetailedPermissionLevel `json:"permission_level,omitempty"`
// Boolean representing if route optimization has been enabled for the
// endpoint
// Wire name: 'route_optimized'
RouteOptimized bool `json:"route_optimized,omitempty"`
// Information corresponding to the state of the serving endpoint.
// Wire name: 'state'
State *EndpointState `json:"state,omitempty"`
// Tags attached to the serving endpoint.
// Wire name: 'tags'
Tags []EndpointTag `json:"tags,omitempty"`
// The task type of the serving endpoint.
// Wire name: 'task'
Task string `json:"task,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointDetailed) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointDetailed) MarshalJSON() ([]byte, error)
func (*ServingEndpointDetailed) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointDetailed) UnmarshalJSON(b []byte) error
type ServingEndpointDetailedPermissionLevel ¶
type ServingEndpointDetailedPermissionLevel string
const ServingEndpointDetailedPermissionLevelCanManage ServingEndpointDetailedPermissionLevel = `CAN_MANAGE`
const ServingEndpointDetailedPermissionLevelCanQuery ServingEndpointDetailedPermissionLevel = `CAN_QUERY`
const ServingEndpointDetailedPermissionLevelCanView ServingEndpointDetailedPermissionLevel = `CAN_VIEW`
func (*ServingEndpointDetailedPermissionLevel) Set ¶
func (f *ServingEndpointDetailedPermissionLevel) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServingEndpointDetailedPermissionLevel) String ¶
func (f *ServingEndpointDetailedPermissionLevel) String() string
String representation for fmt.Print
func (*ServingEndpointDetailedPermissionLevel) Type ¶
func (f *ServingEndpointDetailedPermissionLevel) Type() string
Type always returns ServingEndpointDetailedPermissionLevel to satisfy [pflag.Value] interface
func (*ServingEndpointDetailedPermissionLevel) Values ¶ added in v0.72.0
func (f *ServingEndpointDetailedPermissionLevel) Values() []ServingEndpointDetailedPermissionLevel
Values returns all possible values for ServingEndpointDetailedPermissionLevel.
There is no guarantee on the order of the values in the slice.
type ServingEndpointPermission ¶ added in v0.15.0
type ServingEndpointPermission struct {
// Wire name: 'inherited'
Inherited bool `json:"inherited,omitempty"`
// Wire name: 'inherited_from_object'
InheritedFromObject []string `json:"inherited_from_object,omitempty"`
// Permission level
// Wire name: 'permission_level'
PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointPermission) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointPermission) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermission) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointPermission) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionLevel ¶ added in v0.15.0
type ServingEndpointPermissionLevel string
Permission level
const ServingEndpointPermissionLevelCanManage ServingEndpointPermissionLevel = `CAN_MANAGE`
const ServingEndpointPermissionLevelCanQuery ServingEndpointPermissionLevel = `CAN_QUERY`
const ServingEndpointPermissionLevelCanView ServingEndpointPermissionLevel = `CAN_VIEW`
func (*ServingEndpointPermissionLevel) Set ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServingEndpointPermissionLevel) String ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) String() string
String representation for fmt.Print
func (*ServingEndpointPermissionLevel) Type ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) Type() string
Type always returns ServingEndpointPermissionLevel to satisfy [pflag.Value] interface
func (*ServingEndpointPermissionLevel) Values ¶ added in v0.72.0
func (f *ServingEndpointPermissionLevel) Values() []ServingEndpointPermissionLevel
Values returns all possible values for ServingEndpointPermissionLevel.
There is no guarantee on the order of the values in the slice.
type ServingEndpointPermissions ¶ added in v0.15.0
type ServingEndpointPermissions struct {
// Wire name: 'access_control_list'
AccessControlList []ServingEndpointAccessControlResponse `json:"access_control_list,omitempty"`
// Wire name: 'object_id'
ObjectId string `json:"object_id,omitempty"`
// Wire name: 'object_type'
ObjectType string `json:"object_type,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointPermissions) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointPermissions) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermissions) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointPermissions) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionsDescription ¶ added in v0.15.0
type ServingEndpointPermissionsDescription struct {
// Wire name: 'description'
Description string `json:"description,omitempty"`
// Permission level
// Wire name: 'permission_level'
PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (ServingEndpointPermissionsDescription) MarshalJSON ¶ added in v0.23.0
func (st ServingEndpointPermissionsDescription) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermissionsDescription) UnmarshalJSON ¶ added in v0.23.0
func (st *ServingEndpointPermissionsDescription) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionsRequest ¶ added in v0.15.0
type ServingEndpointPermissionsRequest struct {
// Wire name: 'access_control_list'
AccessControlList []ServingEndpointAccessControlRequest `json:"access_control_list,omitempty"`
// The serving endpoint for which to get or manage permissions.
ServingEndpointId string `json:"-" tf:"-"`
}
func (ServingEndpointPermissionsRequest) MarshalJSON ¶ added in v0.73.0
func (st ServingEndpointPermissionsRequest) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermissionsRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *ServingEndpointPermissionsRequest) UnmarshalJSON(b []byte) error
type ServingEndpointsAPI ¶
type ServingEndpointsAPI struct {
// contains filtered or unexported fields
}
The Serving Endpoints API allows you to create, update, and delete model serving endpoints.
You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means the endpoints and associated compute resources are fully managed by Databricks and will not appear in your cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model Registry, called served entities. A serving endpoint can have at most ten served entities. You can configure traffic settings to define how requests should be routed to your served entities behind an endpoint. Additionally, you can configure the scale of resources that should be applied to each served entity.
func NewServingEndpoints ¶
func NewServingEndpoints(client *client.DatabricksClient) *ServingEndpointsAPI
func (*ServingEndpointsAPI) BuildLogs ¶
func (a *ServingEndpointsAPI) BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
func (*ServingEndpointsAPI) BuildLogsByNameAndServedModelName ¶
func (a *ServingEndpointsAPI) BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error)
Retrieves the build logs associated with the provided served model.
func (*ServingEndpointsAPI) Create ¶
func (a *ServingEndpointsAPI) Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Create a new serving endpoint.
func (*ServingEndpointsAPI) CreateAndWait
deprecated
func (a *ServingEndpointsAPI) CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.Create and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.Create.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) CreateProvisionedThroughputEndpoint ¶ added in v0.69.0
func (a *ServingEndpointsAPI) CreateProvisionedThroughputEndpoint(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Create a new PT serving endpoint.
func (*ServingEndpointsAPI) CreateProvisionedThroughputEndpointAndWait
deprecated
added in
v0.69.0
func (a *ServingEndpointsAPI) CreateProvisionedThroughputEndpointAndWait(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.CreateProvisionedThroughputEndpoint and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.CreateProvisionedThroughputEndpoint.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) Delete ¶
func (a *ServingEndpointsAPI) Delete(ctx context.Context, request DeleteServingEndpointRequest) error
func (*ServingEndpointsAPI) DeleteByName ¶
func (a *ServingEndpointsAPI) DeleteByName(ctx context.Context, name string) error
Delete a serving endpoint.
func (*ServingEndpointsAPI) ExportMetrics ¶
func (a *ServingEndpointsAPI) ExportMetrics(ctx context.Context, request ExportMetricsRequest) (*ExportMetricsResponse, error)
func (*ServingEndpointsAPI) ExportMetricsByName ¶
func (a *ServingEndpointsAPI) ExportMetricsByName(ctx context.Context, name string) (*ExportMetricsResponse, error)
Retrieves the metrics associated with the provided serving endpoint in either Prometheus or OpenMetrics exposition format.
func (*ServingEndpointsAPI) Get ¶
func (a *ServingEndpointsAPI) Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
func (*ServingEndpointsAPI) GetByName ¶
func (a *ServingEndpointsAPI) GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error)
Retrieves the details for a single serving endpoint.
func (*ServingEndpointsAPI) GetOpenApi ¶ added in v0.39.0
func (a *ServingEndpointsAPI) GetOpenApi(ctx context.Context, request GetOpenApiRequest) (*GetOpenApiResponse, error)
func (*ServingEndpointsAPI) GetOpenApiByName ¶ added in v0.39.0
func (a *ServingEndpointsAPI) GetOpenApiByName(ctx context.Context, name string) (*GetOpenApiResponse, error)
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for the supported paths, input and output format and datatypes.
func (*ServingEndpointsAPI) GetPermissionLevels ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
func (*ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error)
Gets the permission levels that a user can have on an object.
func (*ServingEndpointsAPI) GetPermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
func (*ServingEndpointsAPI) GetPermissionsByServingEndpointId ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error)
Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root object.
func (*ServingEndpointsAPI) HttpRequest ¶ added in v0.56.0
func (a *ServingEndpointsAPI) HttpRequest(ctx context.Context, request ExternalFunctionRequest) (*HttpRequestResponse, error)
func (*ServingEndpointsAPI) List ¶
func (a *ServingEndpointsAPI) List(ctx context.Context) listing.Iterator[ServingEndpoint]
Get all serving endpoints.
func (*ServingEndpointsAPI) ListAll ¶ added in v0.10.0
func (a *ServingEndpointsAPI) ListAll(ctx context.Context) ([]ServingEndpoint, error)
Get all serving endpoints.
func (*ServingEndpointsAPI) Logs ¶
func (a *ServingEndpointsAPI) Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
func (*ServingEndpointsAPI) LogsByNameAndServedModelName ¶
func (a *ServingEndpointsAPI) LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error)
Retrieves the service logs associated with the provided served model.
func (*ServingEndpointsAPI) Patch ¶ added in v0.20.0
func (a *ServingEndpointsAPI) Patch(ctx context.Context, request PatchServingEndpointTags) (*EndpointTags, error)
func (*ServingEndpointsAPI) Put ¶ added in v0.27.0
func (a *ServingEndpointsAPI) Put(ctx context.Context, request PutRequest) (*PutResponse, error)
func (*ServingEndpointsAPI) PutAiGateway ¶ added in v0.47.0
func (a *ServingEndpointsAPI) PutAiGateway(ctx context.Context, request PutAiGatewayRequest) (*PutAiGatewayResponse, error)
func (*ServingEndpointsAPI) Query ¶
func (a *ServingEndpointsAPI) Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
func (*ServingEndpointsAPI) SetPermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
func (*ServingEndpointsAPI) UpdateConfig ¶
func (a *ServingEndpointsAPI) UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Updates any combination of the serving endpoint's served entities, the compute configuration of those served entities, and the endpoint's traffic config. An endpoint that already has an update in progress can not be updated until the current update completes or fails.
func (*ServingEndpointsAPI) UpdateConfigAndWait
deprecated
func (a *ServingEndpointsAPI) UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.UpdateConfig and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.UpdateConfig.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) UpdatePermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
func (*ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfig ¶ added in v0.69.0
func (a *ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfig(ctx context.Context, updateProvisionedThroughputEndpointConfigRequest UpdateProvisionedThroughputEndpointConfigRequest) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Updates any combination of the pt endpoint's served entities, the compute configuration of those served entities, and the endpoint's traffic config. Updates are instantaneous and endpoint should be updated instantly
func (*ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfigAndWait
deprecated
added in
v0.69.0
func (a *ServingEndpointsAPI) UpdateProvisionedThroughputEndpointConfigAndWait(ctx context.Context, updateProvisionedThroughputEndpointConfigRequest UpdateProvisionedThroughputEndpointConfigRequest, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.UpdateProvisionedThroughputEndpointConfig and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.UpdateProvisionedThroughputEndpointConfig.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) WaitGetServingEndpointNotUpdating ¶ added in v0.10.0
func (a *ServingEndpointsAPI) WaitGetServingEndpointNotUpdating(ctx context.Context, name string, timeout time.Duration, callback func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error)
WaitGetServingEndpointNotUpdating repeatedly calls ServingEndpointsAPI.Get and waits to reach NOT_UPDATING state
type ServingEndpointsDataPlaneAPI ¶ added in v0.43.1
type ServingEndpointsDataPlaneAPI struct {
// contains filtered or unexported fields
}
Serving endpoints DataPlane provides a set of operations to interact with data plane endpoints for Serving endpoints service.
func NewServingEndpointsDataPlane ¶ added in v0.43.1
func NewServingEndpointsDataPlane(client *client.DatabricksClient, controlPlane *ServingEndpointsAPI, ) *ServingEndpointsDataPlaneAPI
func (*ServingEndpointsDataPlaneAPI) Query ¶ added in v0.43.1
func (a *ServingEndpointsDataPlaneAPI) Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
type ServingEndpointsDataPlaneInterface ¶ added in v0.43.1
type ServingEndpointsDataPlaneInterface interface {
// Query a serving endpoint.
Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
}
type ServingEndpointsDataPlaneService
deprecated
added in
v0.43.1
type ServingEndpointsDataPlaneService interface {
// Query a serving endpoint.
Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
}
Serving endpoints DataPlane provides a set of operations to interact with data plane endpoints for Serving endpoints service.
Deprecated: Do not use this interface, it will be removed in a future version of the SDK.
type ServingEndpointsInterface ¶ added in v0.29.0
type ServingEndpointsInterface interface {
// WaitGetServingEndpointNotUpdating repeatedly calls [ServingEndpointsAPI.Get] and waits to reach NOT_UPDATING state
WaitGetServingEndpointNotUpdating(ctx context.Context, name string,
timeout time.Duration, callback func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error)
// Retrieves the build logs associated with the provided served model.
BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
// Retrieves the build logs associated with the provided served model.
BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error)
// Create a new serving endpoint.
Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
// Calls [ServingEndpointsAPIInterface.Create] and waits to reach NOT_UPDATING state
//
// You can override the default timeout of 20 minutes by calling adding
// retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
//
// Deprecated: use [ServingEndpointsAPIInterface.Create].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating]
CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
// Create a new PT serving endpoint.
CreateProvisionedThroughputEndpoint(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
// Calls [ServingEndpointsAPIInterface.CreateProvisionedThroughputEndpoint] and waits to reach NOT_UPDATING state
//
// You can override the default timeout of 20 minutes by calling adding
// retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
//
// Deprecated: use [ServingEndpointsAPIInterface.CreateProvisionedThroughputEndpoint].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating]
CreateProvisionedThroughputEndpointAndWait(ctx context.Context, createPtEndpointRequest CreatePtEndpointRequest, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
// Delete a serving endpoint.
Delete(ctx context.Context, request DeleteServingEndpointRequest) error
// Delete a serving endpoint.
DeleteByName(ctx context.Context, name string) error
// Retrieves the metrics associated with the provided serving endpoint in either
// Prometheus or OpenMetrics exposition format.
ExportMetrics(ctx context.Context, request ExportMetricsRequest) (*ExportMetricsResponse, error)
// Retrieves the metrics associated with the provided serving endpoint in either
// Prometheus or OpenMetrics exposition format.
ExportMetricsByName(ctx context.Context, name string) (*ExportMetricsResponse, error)
// Retrieves the details for a single serving endpoint.
Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
// Retrieves the details for a single serving endpoint.
GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error)
// Get the query schema of the serving endpoint in OpenAPI format. The schema
// contains information for the supported paths, input and output format and
// datatypes.
GetOpenApi(ctx context.Context, request GetOpenApiRequest) (*GetOpenApiResponse, error)
// Get the query schema of the serving endpoint in OpenAPI format. The schema
// contains information for the supported paths, input and output format and
// datatypes.
GetOpenApiByName(ctx context.Context, name string) (*GetOpenApiResponse, error)
// Gets the permission levels that a user can have on an object.
GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
// Gets the permission levels that a user can have on an object.
GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error)
// Gets the permissions of a serving endpoint. Serving endpoints can inherit
// permissions from their root object.
GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Gets the permissions of a serving endpoint. Serving endpoints can inherit
// permissions from their root object.
GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error)
// Make external services call using the credentials stored in UC Connection.
HttpRequest(ctx context.Context, request ExternalFunctionRequest) (*HttpRequestResponse, error)
// Get all serving endpoints.
//
// This method is generated by Databricks SDK Code Generator.
List(ctx context.Context) listing.Iterator[ServingEndpoint]
// Get all serving endpoints.
//
// This method is generated by Databricks SDK Code Generator.
ListAll(ctx context.Context) ([]ServingEndpoint, error)
// Retrieves the service logs associated with the provided served model.
Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
// Retrieves the service logs associated with the provided served model.
LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error)
// Used to batch add and delete tags from a serving endpoint with a single API
// call.
Patch(ctx context.Context, request PatchServingEndpointTags) (*EndpointTags, error)
// Deprecated: Please use AI Gateway to manage rate limits instead.
Put(ctx context.Context, request PutRequest) (*PutResponse, error)
// Used to update the AI Gateway of a serving endpoint. NOTE: External model,
// provisioned throughput, and pay-per-token endpoints are fully supported;
// agent endpoints currently only support inference tables.
PutAiGateway(ctx context.Context, request PutAiGatewayRequest) (*PutAiGatewayResponse, error)
// Query a serving endpoint.
Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
// Sets permissions on an object, replacing existing permissions if they exist.
// Deletes all direct permissions if none are specified. Objects can inherit
// permissions from their root object.
SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Updates any combination of the serving endpoint's served entities, the
// compute configuration of those served entities, and the endpoint's traffic
// config. An endpoint that already has an update in progress can not be updated
// until the current update completes or fails.
UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
// Calls [ServingEndpointsAPIInterface.UpdateConfig] and waits to reach NOT_UPDATING state
//
// You can override the default timeout of 20 minutes by calling adding
// retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
//
// Deprecated: use [ServingEndpointsAPIInterface.UpdateConfig].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating]
UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
// Updates the permissions on a serving endpoint. Serving endpoints can inherit
// permissions from their root object.
UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Updates any combination of the pt endpoint's served entities, the compute
// configuration of those served entities, and the endpoint's traffic config.
// Updates are instantaneous and endpoint should be updated instantly
UpdateProvisionedThroughputEndpointConfig(ctx context.Context, updateProvisionedThroughputEndpointConfigRequest UpdateProvisionedThroughputEndpointConfigRequest) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
// Calls [ServingEndpointsAPIInterface.UpdateProvisionedThroughputEndpointConfig] and waits to reach NOT_UPDATING state
//
// You can override the default timeout of 20 minutes by calling adding
// retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
//
// Deprecated: use [ServingEndpointsAPIInterface.UpdateProvisionedThroughputEndpointConfig].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating]
UpdateProvisionedThroughputEndpointConfigAndWait(ctx context.Context, updateProvisionedThroughputEndpointConfigRequest UpdateProvisionedThroughputEndpointConfigRequest, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
}
type ServingEndpointsService
deprecated
type ServingEndpointsService interface {
// Retrieves the build logs associated with the provided served model.
BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
// Create a new serving endpoint.
Create(ctx context.Context, request CreateServingEndpoint) (*ServingEndpointDetailed, error)
// Create a new PT serving endpoint.
CreateProvisionedThroughputEndpoint(ctx context.Context, request CreatePtEndpointRequest) (*ServingEndpointDetailed, error)
// Delete a serving endpoint.
Delete(ctx context.Context, request DeleteServingEndpointRequest) error
// Retrieves the metrics associated with the provided serving endpoint in
// either Prometheus or OpenMetrics exposition format.
ExportMetrics(ctx context.Context, request ExportMetricsRequest) (*ExportMetricsResponse, error)
// Retrieves the details for a single serving endpoint.
Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
// Get the query schema of the serving endpoint in OpenAPI format. The
// schema contains information for the supported paths, input and output
// format and datatypes.
GetOpenApi(ctx context.Context, request GetOpenApiRequest) (*GetOpenApiResponse, error)
// Gets the permission levels that a user can have on an object.
GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
// Gets the permissions of a serving endpoint. Serving endpoints can inherit
// permissions from their root object.
GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Make external services call using the credentials stored in UC
// Connection.
HttpRequest(ctx context.Context, request ExternalFunctionRequest) (*HttpRequestResponse, error)
// Get all serving endpoints.
List(ctx context.Context) (*ListEndpointsResponse, error)
// Retrieves the service logs associated with the provided served model.
Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
// Used to batch add and delete tags from a serving endpoint with a single
// API call.
Patch(ctx context.Context, request PatchServingEndpointTags) (*EndpointTags, error)
// Deprecated: Please use AI Gateway to manage rate limits instead.
Put(ctx context.Context, request PutRequest) (*PutResponse, error)
// Used to update the AI Gateway of a serving endpoint. NOTE: External
// model, provisioned throughput, and pay-per-token endpoints are fully
// supported; agent endpoints currently only support inference tables.
PutAiGateway(ctx context.Context, request PutAiGatewayRequest) (*PutAiGatewayResponse, error)
// Query a serving endpoint.
Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
// Sets permissions on an object, replacing existing permissions if they
// exist. Deletes all direct permissions if none are specified. Objects can
// inherit permissions from their root object.
SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Updates any combination of the serving endpoint's served entities, the
// compute configuration of those served entities, and the endpoint's
// traffic config. An endpoint that already has an update in progress can
// not be updated until the current update completes or fails.
UpdateConfig(ctx context.Context, request EndpointCoreConfigInput) (*ServingEndpointDetailed, error)
// Updates the permissions on a serving endpoint. Serving endpoints can
// inherit permissions from their root object.
UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
// Updates any combination of the pt endpoint's served entities, the compute
// configuration of those served entities, and the endpoint's traffic
// config. Updates are instantaneous and endpoint should be updated
// instantly
UpdateProvisionedThroughputEndpointConfig(ctx context.Context, request UpdateProvisionedThroughputEndpointConfigRequest) (*ServingEndpointDetailed, error)
}
The Serving Endpoints API allows you to create, update, and delete model serving endpoints.
You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means the endpoints and associated compute resources are fully managed by Databricks and will not appear in your cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model Registry, called served entities. A serving endpoint can have at most ten served entities. You can configure traffic settings to define how requests should be routed to your served entities behind an endpoint. Additionally, you can configure the scale of resources that should be applied to each served entity.
Deprecated: Do not use this interface, it will be removed in a future version of the SDK.
type ServingModelWorkloadType ¶ added in v0.56.0
type ServingModelWorkloadType string
Please keep this in sync with with workload types in InferenceEndpointEntities.scala
const ServingModelWorkloadTypeCpu ServingModelWorkloadType = `CPU`
const ServingModelWorkloadTypeGpuLarge ServingModelWorkloadType = `GPU_LARGE`
const ServingModelWorkloadTypeGpuMedium ServingModelWorkloadType = `GPU_MEDIUM`
const ServingModelWorkloadTypeGpuSmall ServingModelWorkloadType = `GPU_SMALL`
const ServingModelWorkloadTypeMultigpuMedium ServingModelWorkloadType = `MULTIGPU_MEDIUM`
func (*ServingModelWorkloadType) Set ¶ added in v0.56.0
func (f *ServingModelWorkloadType) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServingModelWorkloadType) String ¶ added in v0.56.0
func (f *ServingModelWorkloadType) String() string
String representation for fmt.Print
func (*ServingModelWorkloadType) Type ¶ added in v0.56.0
func (f *ServingModelWorkloadType) Type() string
Type always returns ServingModelWorkloadType to satisfy [pflag.Value] interface
func (*ServingModelWorkloadType) Values ¶ added in v0.72.0
func (f *ServingModelWorkloadType) Values() []ServingModelWorkloadType
Values returns all possible values for ServingModelWorkloadType.
There is no guarantee on the order of the values in the slice.
type TrafficConfig ¶
type TrafficConfig struct {
// The list of routes that define traffic to each served entity.
// Wire name: 'routes'
Routes []Route `json:"routes,omitempty"`
}
func (TrafficConfig) MarshalJSON ¶ added in v0.73.0
func (st TrafficConfig) MarshalJSON() ([]byte, error)
func (*TrafficConfig) UnmarshalJSON ¶ added in v0.73.0
func (st *TrafficConfig) UnmarshalJSON(b []byte) error
type UpdateProvisionedThroughputEndpointConfigRequest ¶ added in v0.69.0
type UpdateProvisionedThroughputEndpointConfigRequest struct {
// Wire name: 'config'
Config PtEndpointCoreConfig `json:"config"`
// The name of the pt endpoint to update. This field is required.
Name string `json:"-" tf:"-"`
}
func (UpdateProvisionedThroughputEndpointConfigRequest) MarshalJSON ¶ added in v0.73.0
func (st UpdateProvisionedThroughputEndpointConfigRequest) MarshalJSON() ([]byte, error)
func (*UpdateProvisionedThroughputEndpointConfigRequest) UnmarshalJSON ¶ added in v0.73.0
func (st *UpdateProvisionedThroughputEndpointConfigRequest) UnmarshalJSON(b []byte) error
type V1ResponseChoiceElement ¶ added in v0.27.0
type V1ResponseChoiceElement struct {
// The finish reason returned by the endpoint.
// Wire name: 'finishReason'
FinishReason string `json:"finishReason,omitempty"`
// The index of the choice in the __chat or completions__ response.
// Wire name: 'index'
Index int `json:"index,omitempty"`
// The logprobs returned only by the __completions__ endpoint.
// Wire name: 'logprobs'
Logprobs int `json:"logprobs,omitempty"`
// The message response from the __chat__ endpoint.
// Wire name: 'message'
Message *ChatMessage `json:"message,omitempty"`
// The text response from the __completions__ endpoint.
// Wire name: 'text'
Text string `json:"text,omitempty"`
ForceSendFields []string `json:"-" tf:"-"`
}
func (V1ResponseChoiceElement) MarshalJSON ¶ added in v0.27.0
func (st V1ResponseChoiceElement) MarshalJSON() ([]byte, error)
func (*V1ResponseChoiceElement) UnmarshalJSON ¶ added in v0.27.0
func (st *V1ResponseChoiceElement) UnmarshalJSON(b []byte) error
type WaitGetServingEndpointNotUpdating ¶ added in v0.10.0
type WaitGetServingEndpointNotUpdating[R any] struct { Response *R Name string `json:"name"` Poll func(time.Duration, func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error) // contains filtered or unexported fields }
WaitGetServingEndpointNotUpdating is a wrapper that calls ServingEndpointsAPI.WaitGetServingEndpointNotUpdating and waits to reach NOT_UPDATING state.
func (*WaitGetServingEndpointNotUpdating[R]) Get ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) Get() (*ServingEndpointDetailed, error)
Get the ServingEndpointDetailed with the default timeout of 20 minutes.
func (*WaitGetServingEndpointNotUpdating[R]) GetWithTimeout ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) GetWithTimeout(timeout time.Duration) (*ServingEndpointDetailed, error)
Get the ServingEndpointDetailed with custom timeout.
func (*WaitGetServingEndpointNotUpdating[R]) OnProgress ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) OnProgress(callback func(*ServingEndpointDetailed)) *WaitGetServingEndpointNotUpdating[R]
OnProgress invokes a callback every time it polls for the status update.