inference

package

v1.0.0-beta.35 Latest Latest Go to latest Published: Sep 15, 2025 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/scaleway/scaleway-sdk-go

Links

Open Source Insights

Documentation ¶

Overview ¶

Package inference provides methods and message types of the inference v1 API.

Index ¶

Constants
type API
- func NewAPI(client *scw.Client) *API
- func (s *API) CreateDeployment(req *CreateDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)
- func (s *API) CreateEndpoint(req *CreateEndpointRequest, opts ...scw.RequestOption) (*Endpoint, error)
- func (s *API) CreateModel(req *CreateModelRequest, opts ...scw.RequestOption) (*Model, error)
- func (s *API) DeleteDeployment(req *DeleteDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)
- func (s *API) DeleteEndpoint(req *DeleteEndpointRequest, opts ...scw.RequestOption) error
- func (s *API) DeleteModel(req *DeleteModelRequest, opts ...scw.RequestOption) error
- func (s *API) GetDeployment(req *GetDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)
- func (s *API) GetDeploymentCertificate(req *GetDeploymentCertificateRequest, opts ...scw.RequestOption) (*scw.File, error)
- func (s *API) GetModel(req *GetModelRequest, opts ...scw.RequestOption) (*Model, error)
- func (s *API) ListDeployments(req *ListDeploymentsRequest, opts ...scw.RequestOption) (*ListDeploymentsResponse, error)
- func (s *API) ListModels(req *ListModelsRequest, opts ...scw.RequestOption) (*ListModelsResponse, error)
- func (s *API) ListNodeTypes(req *ListNodeTypesRequest, opts ...scw.RequestOption) (*ListNodeTypesResponse, error)
- func (s *API) Regions() []scw.Region
- func (s *API) UpdateDeployment(req *UpdateDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)
- func (s *API) UpdateEndpoint(req *UpdateEndpointRequest, opts ...scw.RequestOption) (*Endpoint, error)
- func (s *API) WaitForDeployment(req *WaitForDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)
- func (s *API) WaitForModel(req *WaitForModelRequest, opts ...scw.RequestOption) (*Model, error)
type CreateDeploymentRequest
type CreateEndpointRequest
type CreateModelRequest
type DeleteDeploymentRequest
type DeleteEndpointRequest
type DeleteModelRequest
type Deployment
type DeploymentQuantization
type DeploymentStatus
- func (enum DeploymentStatus) MarshalJSON() ([]byte, error)
- func (enum DeploymentStatus) String() string
- func (enum *DeploymentStatus) UnmarshalJSON(data []byte) error
- func (enum DeploymentStatus) Values() []DeploymentStatus
type Endpoint
type EndpointPrivateNetworkDetails
type EndpointPublicNetworkDetails
type EndpointSpec
type GetDeploymentCertificateRequest
type GetDeploymentRequest
type GetModelRequest
type ListDeploymentsRequest
type ListDeploymentsRequestOrderBy
- func (enum ListDeploymentsRequestOrderBy) MarshalJSON() ([]byte, error)
- func (enum ListDeploymentsRequestOrderBy) String() string
- func (enum *ListDeploymentsRequestOrderBy) UnmarshalJSON(data []byte) error
- func (enum ListDeploymentsRequestOrderBy) Values() []ListDeploymentsRequestOrderBy
type ListDeploymentsResponse
- func (r *ListDeploymentsResponse) UnsafeAppend(res any) (uint64, error)
- func (r *ListDeploymentsResponse) UnsafeGetTotalCount() uint64
type ListModelsRequest
type ListModelsRequestOrderBy
- func (enum ListModelsRequestOrderBy) MarshalJSON() ([]byte, error)
- func (enum ListModelsRequestOrderBy) String() string
- func (enum *ListModelsRequestOrderBy) UnmarshalJSON(data []byte) error
- func (enum ListModelsRequestOrderBy) Values() []ListModelsRequestOrderBy
type ListModelsResponse
- func (r *ListModelsResponse) UnsafeAppend(res any) (uint64, error)
- func (r *ListModelsResponse) UnsafeGetTotalCount() uint64
type ListNodeTypesRequest
type ListNodeTypesResponse
- func (r *ListNodeTypesResponse) UnsafeAppend(res any) (uint64, error)
- func (r *ListNodeTypesResponse) UnsafeGetTotalCount() uint64
type Model
type ModelSource
type ModelStatus
- func (enum ModelStatus) MarshalJSON() ([]byte, error)
- func (enum ModelStatus) String() string
- func (enum *ModelStatus) UnmarshalJSON(data []byte) error
- func (enum ModelStatus) Values() []ModelStatus
type ModelSupportInfo
type ModelSupportedNode
type ModelSupportedQuantization
type NodeType
type NodeTypeStock
- func (enum NodeTypeStock) MarshalJSON() ([]byte, error)
- func (enum NodeTypeStock) String() string
- func (enum *NodeTypeStock) UnmarshalJSON(data []byte) error
- func (enum NodeTypeStock) Values() []NodeTypeStock
type UpdateDeploymentRequest
type UpdateEndpointRequest
type WaitForDeploymentRequest
type WaitForModelRequest

Constants ¶

View Source

const (
	DeploymentStatusUnknownStatus = DeploymentStatus("unknown_status")
	DeploymentStatusCreating      = DeploymentStatus("creating")
	DeploymentStatusDeploying     = DeploymentStatus("deploying")
	DeploymentStatusReady         = DeploymentStatus("ready")
	DeploymentStatusError         = DeploymentStatus("error")
	DeploymentStatusDeleting      = DeploymentStatus("deleting")
	DeploymentStatusLocked        = DeploymentStatus("locked")
	DeploymentStatusScaling       = DeploymentStatus("scaling")
)

View Source

const (
	ListDeploymentsRequestOrderByCreatedAtDesc = ListDeploymentsRequestOrderBy("created_at_desc")
	ListDeploymentsRequestOrderByCreatedAtAsc  = ListDeploymentsRequestOrderBy("created_at_asc")
	ListDeploymentsRequestOrderByNameAsc       = ListDeploymentsRequestOrderBy("name_asc")
	ListDeploymentsRequestOrderByNameDesc      = ListDeploymentsRequestOrderBy("name_desc")
)

View Source

const (
	ListModelsRequestOrderByDisplayRankAsc = ListModelsRequestOrderBy("display_rank_asc")
	ListModelsRequestOrderByCreatedAtAsc   = ListModelsRequestOrderBy("created_at_asc")
	ListModelsRequestOrderByCreatedAtDesc  = ListModelsRequestOrderBy("created_at_desc")
	ListModelsRequestOrderByNameAsc        = ListModelsRequestOrderBy("name_asc")
	ListModelsRequestOrderByNameDesc       = ListModelsRequestOrderBy("name_desc")
)

View Source

const (
	ModelStatusUnknownStatus = ModelStatus("unknown_status")
	ModelStatusPreparing     = ModelStatus("preparing")
	ModelStatusDownloading   = ModelStatus("downloading")
	ModelStatusReady         = ModelStatus("ready")
	ModelStatusError         = ModelStatus("error")
)

View Source

const (
	NodeTypeStockUnknownStock = NodeTypeStock("unknown_stock")
	NodeTypeStockLowStock     = NodeTypeStock("low_stock")
	NodeTypeStockOutOfStock   = NodeTypeStock("out_of_stock")
	NodeTypeStockAvailable    = NodeTypeStock("available")
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type API ¶

type API struct {
	// contains filtered or unexported fields
}

This API allows you to handle your Managed Inference services.

func NewAPI ¶

func NewAPI(client *scw.Client) *API

NewAPI returns a API object from a Scaleway client.

func (*API) CreateDeployment ¶

func (s *API) CreateDeployment(req *CreateDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)

CreateDeployment: Create a new inference deployment related to a specific model.

func (*API) CreateEndpoint ¶

func (s *API) CreateEndpoint(req *CreateEndpointRequest, opts ...scw.RequestOption) (*Endpoint, error)

CreateEndpoint: Create a new Endpoint related to a specific deployment.

func (*API) CreateModel ¶

func (s *API) CreateModel(req *CreateModelRequest, opts ...scw.RequestOption) (*Model, error)

CreateModel: Import a new model to your model library.

func (*API) DeleteDeployment ¶

func (s *API) DeleteDeployment(req *DeleteDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)

DeleteDeployment: Delete an existing inference deployment.

func (*API) DeleteEndpoint ¶

func (s *API) DeleteEndpoint(req *DeleteEndpointRequest, opts ...scw.RequestOption) error

DeleteEndpoint: Delete an existing Endpoint.

func (*API) DeleteModel ¶

func (s *API) DeleteModel(req *DeleteModelRequest, opts ...scw.RequestOption) error

DeleteModel: Delete an existing model from your model library.

func (*API) GetDeployment ¶

func (s *API) GetDeployment(req *GetDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)

GetDeployment: Get the deployment for the given ID.

func (*API) GetDeploymentCertificate ¶

func (s *API) GetDeploymentCertificate(req *GetDeploymentCertificateRequest, opts ...scw.RequestOption) (*scw.File, error)

GetDeploymentCertificate: Get the CA certificate used for the deployment of private endpoints. The CA certificate will be returned as a PEM file.

func (*API) GetModel ¶

func (s *API) GetModel(req *GetModelRequest, opts ...scw.RequestOption) (*Model, error)

GetModel: Get the model for the given ID.

func (*API) ListDeployments ¶

func (s *API) ListDeployments(req *ListDeploymentsRequest, opts ...scw.RequestOption) (*ListDeploymentsResponse, error)

ListDeployments: List all your inference deployments.

func (*API) ListModels ¶

func (s *API) ListModels(req *ListModelsRequest, opts ...scw.RequestOption) (*ListModelsResponse, error)

ListModels: List all available models.

func (*API) ListNodeTypes ¶

func (s *API) ListNodeTypes(req *ListNodeTypesRequest, opts ...scw.RequestOption) (*ListNodeTypesResponse, error)

ListNodeTypes: List all available node types. By default, the node types returned in the list are ordered by creation date in ascending order, though this can be modified via the `order_by` field.

func (*API) Regions ¶

func (s *API) Regions() []scw.Region

func (*API) UpdateDeployment ¶

func (s *API) UpdateDeployment(req *UpdateDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)

UpdateDeployment: Update an existing inference deployment.

func (*API) UpdateEndpoint ¶

func (s *API) UpdateEndpoint(req *UpdateEndpointRequest, opts ...scw.RequestOption) (*Endpoint, error)

UpdateEndpoint: Update an existing Endpoint.

func (*API) WaitForDeployment ¶

func (s *API) WaitForDeployment(req *WaitForDeploymentRequest, opts ...scw.RequestOption) (*Deployment, error)

func (*API) WaitForModel ¶

func (s *API) WaitForModel(req *WaitForModelRequest, opts ...scw.RequestOption) (*Model, error)

type CreateDeploymentRequest ¶

type CreateDeploymentRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// Name: name of the deployment.
	Name string `json:"name"`

	// ProjectID: ID of the Project to create the deployment in.
	ProjectID string `json:"project_id"`

	// ModelID: ID of the model to use.
	ModelID string `json:"model_id"`

	// AcceptEula: if the model has an EULA, you must accept it before proceeding.
	// The terms of the EULA can be retrieved using the `GetModelEula` API call.
	AcceptEula *bool `json:"accept_eula,omitempty"`

	// NodeTypeName: name of the node type to use.
	NodeTypeName string `json:"node_type_name"`

	// Tags: list of tags to apply to the deployment.
	Tags []string `json:"tags"`

	// MinSize: defines the minimum size of the pool.
	MinSize *uint32 `json:"min_size,omitempty"`

	// MaxSize: defines the maximum size of the pool.
	MaxSize *uint32 `json:"max_size,omitempty"`

	// Endpoints: list of endpoints to create.
	Endpoints []*EndpointSpec `json:"endpoints"`

	// Quantization: quantization settings to apply to this deployment.
	Quantization *DeploymentQuantization `json:"quantization,omitempty"`
}

CreateDeploymentRequest: create deployment request.

type CreateEndpointRequest ¶

type CreateEndpointRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// DeploymentID: ID of the deployment to create the endpoint for.
	DeploymentID string `json:"deployment_id"`

	// Endpoint: specification of the endpoint.
	Endpoint *EndpointSpec `json:"endpoint"`
}

CreateEndpointRequest: create endpoint request.

type CreateModelRequest ¶

type CreateModelRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// Name: name of the model.
	Name string `json:"name"`

	// ProjectID: ID of the Project to import the model in.
	ProjectID string `json:"project_id"`

	// Source: where to import the model from.
	Source *ModelSource `json:"source"`
}

CreateModelRequest: create model request.

type DeleteDeploymentRequest ¶

type DeleteDeploymentRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// DeploymentID: ID of the deployment to delete.
	DeploymentID string `json:"-"`
}

DeleteDeploymentRequest: delete deployment request.

type DeleteEndpointRequest ¶

type DeleteEndpointRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// EndpointID: ID of the endpoint to delete.
	EndpointID string `json:"-"`
}

DeleteEndpointRequest: delete endpoint request.

type DeleteModelRequest ¶

type DeleteModelRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// ModelID: ID of the model to delete.
	ModelID string `json:"-"`
}

DeleteModelRequest: delete model request.

type Deployment ¶

type Deployment struct {
	// ID: unique identifier.
	ID string `json:"id"`

	// Name: name of the deployment.
	Name string `json:"name"`

	// ProjectID: project ID.
	ProjectID string `json:"project_id"`

	// Status: status of the deployment.
	// Default value: unknown_status
	Status DeploymentStatus `json:"status"`

	// Tags: list of tags applied to the deployment.
	Tags []string `json:"tags"`

	// NodeTypeName: node type of the deployment.
	NodeTypeName string `json:"node_type_name"`

	// Endpoints: list of endpoints.
	Endpoints []*Endpoint `json:"endpoints"`

	// Size: current size of the pool.
	Size uint32 `json:"size"`

	// MinSize: defines the minimum size of the pool.
	MinSize uint32 `json:"min_size"`

	// MaxSize: defines the maximum size of the pool.
	MaxSize uint32 `json:"max_size"`

	// ErrorMessage: displays information if your deployment is in error state.
	ErrorMessage *string `json:"error_message"`

	// ModelID: ID of the model used for the deployment.
	ModelID string `json:"model_id"`

	// Quantization: quantization parameters for this deployment.
	Quantization *DeploymentQuantization `json:"quantization"`

	// ModelName: name of the deployed model.
	ModelName string `json:"model_name"`

	// CreatedAt: creation date of the deployment.
	CreatedAt *time.Time `json:"created_at"`

	// UpdatedAt: last modification date of the deployment.
	UpdatedAt *time.Time `json:"updated_at"`

	// Region: region of the deployment.
	Region scw.Region `json:"region"`
}

Deployment: deployment.

type DeploymentQuantization ¶

type DeploymentQuantization struct {
	// Bits: the number of bits each model parameter should be quantized to. The quantization method is chosen based on this value.
	Bits uint32 `json:"bits"`
}

DeploymentQuantization: deployment quantization.

type DeploymentStatus ¶

type DeploymentStatus string

func (DeploymentStatus) MarshalJSON ¶

func (enum DeploymentStatus) MarshalJSON() ([]byte, error)

func (DeploymentStatus) String ¶

func (enum DeploymentStatus) String() string

func (*DeploymentStatus) UnmarshalJSON ¶

func (enum *DeploymentStatus) UnmarshalJSON(data []byte) error

func (DeploymentStatus) Values ¶

func (enum DeploymentStatus) Values() []DeploymentStatus

type Endpoint ¶

type Endpoint struct {
	// ID: unique identifier.
	ID string `json:"id"`

	// URL: for private endpoints, the URL will be accessible only from the Private Network.
	// In addition, private endpoints will expose a CA certificate that can be used to verify the server's identity.
	// This CA certificate can be retrieved using the `GetDeploymentCertificate` API call.
	URL string `json:"url"`

	// PublicNetwork: defines whether the endpoint is public.
	// Precisely one of PublicNetwork, PrivateNetwork must be set.
	PublicNetwork *EndpointPublicNetworkDetails `json:"public_network,omitempty"`

	// PrivateNetwork: details of the Private Network.
	// Precisely one of PublicNetwork, PrivateNetwork must be set.
	PrivateNetwork *EndpointPrivateNetworkDetails `json:"private_network,omitempty"`

	// DisableAuth: defines whether the authentication is disabled.
	DisableAuth bool `json:"disable_auth"`
}

Endpoint: endpoint.

type EndpointPrivateNetworkDetails ¶

type EndpointPrivateNetworkDetails struct {
	PrivateNetworkID string `json:"private_network_id"`
}

EndpointPrivateNetworkDetails: endpoint private network details.

type EndpointPublicNetworkDetails ¶

type EndpointPublicNetworkDetails struct{}

EndpointPublicNetworkDetails: endpoint public network details.

type EndpointSpec ¶

type EndpointSpec struct {
	// PublicNetwork: set the endpoint as public.
	// Precisely one of PublicNetwork, PrivateNetwork must be set.
	PublicNetwork *EndpointPublicNetworkDetails `json:"public_network,omitempty"`

	// PrivateNetwork: private endpoints are only accessible from the Private Network.
	// Precisely one of PublicNetwork, PrivateNetwork must be set.
	PrivateNetwork *EndpointPrivateNetworkDetails `json:"private_network,omitempty"`

	// DisableAuth: by default, deployments are protected by IAM authentication.
	// When setting this field to true, the authentication will be disabled.
	DisableAuth bool `json:"disable_auth"`
}

EndpointSpec: endpoint spec.

type GetDeploymentCertificateRequest ¶

type GetDeploymentCertificateRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	DeploymentID string `json:"-"`
}

GetDeploymentCertificateRequest: get deployment certificate request.

type GetDeploymentRequest ¶

type GetDeploymentRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// DeploymentID: ID of the deployment to get.
	DeploymentID string `json:"-"`
}

GetDeploymentRequest: get deployment request.

type GetModelRequest ¶

type GetModelRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// ModelID: ID of the model to get.
	ModelID string `json:"-"`
}

GetModelRequest: get model request.

type ListDeploymentsRequest ¶

type ListDeploymentsRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// Page: page number to return.
	Page *int32 `json:"-"`

	// PageSize: maximum number of deployments to return per page.
	PageSize *uint32 `json:"-"`

	// OrderBy: order in which to return results.
	// Default value: created_at_desc
	OrderBy ListDeploymentsRequestOrderBy `json:"-"`

	// ProjectID: filter by Project ID.
	ProjectID *string `json:"-"`

	// OrganizationID: filter by Organization ID.
	OrganizationID *string `json:"-"`

	// Name: filter by deployment name.
	Name *string `json:"-"`

	// Tags: filter by tags.
	Tags []string `json:"-"`
}

ListDeploymentsRequest: list deployments request.

type ListDeploymentsRequestOrderBy ¶

type ListDeploymentsRequestOrderBy string

func (ListDeploymentsRequestOrderBy) MarshalJSON ¶

func (enum ListDeploymentsRequestOrderBy) MarshalJSON() ([]byte, error)

func (ListDeploymentsRequestOrderBy) String ¶

func (enum ListDeploymentsRequestOrderBy) String() string

func (*ListDeploymentsRequestOrderBy) UnmarshalJSON ¶

func (enum *ListDeploymentsRequestOrderBy) UnmarshalJSON(data []byte) error

func (ListDeploymentsRequestOrderBy) Values ¶

func (enum ListDeploymentsRequestOrderBy) Values() []ListDeploymentsRequestOrderBy

type ListDeploymentsResponse ¶

type ListDeploymentsResponse struct {
	// Deployments: list of deployments on the current page.
	Deployments []*Deployment `json:"deployments"`

	// TotalCount: total number of deployments.
	TotalCount uint64 `json:"total_count"`
}

ListDeploymentsResponse: list deployments response.

func (*ListDeploymentsResponse) UnsafeAppend ¶

func (r *ListDeploymentsResponse) UnsafeAppend(res any) (uint64, error)

UnsafeAppend should not be used Internal usage only

func (*ListDeploymentsResponse) UnsafeGetTotalCount ¶

func (r *ListDeploymentsResponse) UnsafeGetTotalCount() uint64

UnsafeGetTotalCount should not be used Internal usage only

type ListModelsRequest ¶

type ListModelsRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// OrderBy: order in which to return results.
	// Default value: display_rank_asc
	OrderBy ListModelsRequestOrderBy `json:"-"`

	// Page: page number to return.
	Page *int32 `json:"-"`

	// PageSize: maximum number of models to return per page.
	PageSize *uint32 `json:"-"`

	// ProjectID: filter by Project ID.
	ProjectID *string `json:"-"`

	// Name: filter by model name.
	Name *string `json:"-"`

	// Tags: filter by tags.
	Tags []string `json:"-"`
}

ListModelsRequest: list models request.

type ListModelsRequestOrderBy ¶

type ListModelsRequestOrderBy string

func (ListModelsRequestOrderBy) MarshalJSON ¶

func (enum ListModelsRequestOrderBy) MarshalJSON() ([]byte, error)

func (ListModelsRequestOrderBy) String ¶

func (enum ListModelsRequestOrderBy) String() string

func (*ListModelsRequestOrderBy) UnmarshalJSON ¶

func (enum *ListModelsRequestOrderBy) UnmarshalJSON(data []byte) error

func (ListModelsRequestOrderBy) Values ¶

func (enum ListModelsRequestOrderBy) Values() []ListModelsRequestOrderBy

type ListModelsResponse ¶

type ListModelsResponse struct {
	// Models: list of models on the current page.
	Models []*Model `json:"models"`

	// TotalCount: total number of models.
	TotalCount uint64 `json:"total_count"`
}

ListModelsResponse: list models response.

func (*ListModelsResponse) UnsafeAppend ¶

func (r *ListModelsResponse) UnsafeAppend(res any) (uint64, error)

UnsafeAppend should not be used Internal usage only

func (*ListModelsResponse) UnsafeGetTotalCount ¶

func (r *ListModelsResponse) UnsafeGetTotalCount() uint64

UnsafeGetTotalCount should not be used Internal usage only

type ListNodeTypesRequest ¶

type ListNodeTypesRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// Page: page number to return.
	Page *int32 `json:"-"`

	// PageSize: maximum number of node types to return per page.
	PageSize *uint32 `json:"-"`

	// IncludeDisabledTypes: include disabled node types in the response.
	IncludeDisabledTypes bool `json:"-"`
}

ListNodeTypesRequest: list node types request.

type ListNodeTypesResponse ¶

type ListNodeTypesResponse struct {
	// NodeTypes: list of node types.
	NodeTypes []*NodeType `json:"node_types"`

	// TotalCount: total number of node types.
	TotalCount uint64 `json:"total_count"`
}

ListNodeTypesResponse: list node types response.

func (*ListNodeTypesResponse) UnsafeAppend ¶

func (r *ListNodeTypesResponse) UnsafeAppend(res any) (uint64, error)

UnsafeAppend should not be used Internal usage only

func (*ListNodeTypesResponse) UnsafeGetTotalCount ¶

func (r *ListNodeTypesResponse) UnsafeGetTotalCount() uint64

UnsafeGetTotalCount should not be used Internal usage only

type Model ¶

type Model struct {
	// ID: unique identifier.
	ID string `json:"id"`

	// Name: unique Name identifier.
	Name string `json:"name"`

	// ProjectID: project ID.
	ProjectID string `json:"project_id"`

	// Tags: list of tags applied to the model.
	Tags []string `json:"tags"`

	// Status: status of the model.
	// Default value: unknown_status
	Status ModelStatus `json:"status"`

	// Description: purpose of the model.
	Description string `json:"description"`

	// ErrorMessage: displays information if your model is in error state.
	ErrorMessage *string `json:"error_message"`

	// HasEula: defines whether the model has an end user license agreement.
	HasEula bool `json:"has_eula"`

	// CreatedAt: creation date of the model.
	CreatedAt *time.Time `json:"created_at"`

	// UpdatedAt: last modification date of the model.
	UpdatedAt *time.Time `json:"updated_at"`

	// Region: region of the model.
	Region scw.Region `json:"region"`

	// NodesSupport: supported nodes types with quantization options and context lengths.
	NodesSupport []*ModelSupportInfo `json:"nodes_support"`

	// ParameterSizeBits: size, in bits, of the model parameters.
	ParameterSizeBits uint32 `json:"parameter_size_bits"`

	// SizeBytes: total size, in bytes, of the model files.
	SizeBytes uint64 `json:"size_bytes"`
}

Model: model.

type ModelSource ¶

type ModelSource struct {
	URL string `json:"url"`

	// Precisely one of Secret must be set.
	Secret *string `json:"secret,omitempty"`
}

ModelSource: model source.

type ModelStatus ¶

type ModelStatus string

func (ModelStatus) MarshalJSON ¶

func (enum ModelStatus) MarshalJSON() ([]byte, error)

func (ModelStatus) String ¶

func (enum ModelStatus) String() string

func (*ModelStatus) UnmarshalJSON ¶

func (enum *ModelStatus) UnmarshalJSON(data []byte) error

func (ModelStatus) Values ¶

func (enum ModelStatus) Values() []ModelStatus

type ModelSupportInfo ¶

type ModelSupportInfo struct {
	// Nodes: list of supported node types.
	Nodes []*ModelSupportedNode `json:"nodes"`
}

ModelSupportInfo: model support info.

type ModelSupportedNode ¶

type ModelSupportedNode struct {
	// NodeTypeName: supported node type.
	NodeTypeName string `json:"node_type_name"`

	// Quantizations: supported quantizations.
	Quantizations []*ModelSupportedQuantization `json:"quantizations"`
}

ModelSupportedNode: model supported node.

type ModelSupportedQuantization ¶

type ModelSupportedQuantization struct {
	// QuantizationBits: number of bits for this supported quantization.
	QuantizationBits uint32 `json:"quantization_bits"`

	// Allowed: tells whether this quantization is allowed for this node type.
	Allowed bool `json:"allowed"`

	// MaxContextSize: maximum inference context size available for this node type and quantization.
	MaxContextSize uint32 `json:"max_context_size"`
}

ModelSupportedQuantization: model supported quantization.

type NodeType ¶

type NodeType struct {
	// Name: name of the node type.
	Name string `json:"name"`

	// StockStatus: current stock status for the node type.
	// Default value: unknown_stock
	StockStatus NodeTypeStock `json:"stock_status"`

	// Description: current specs of the offer.
	Description string `json:"description"`

	// Vcpus: number of virtual CPUs.
	Vcpus uint32 `json:"vcpus"`

	// Memory: quantity of RAM.
	Memory scw.Size `json:"memory"`

	// Vram: quantity of GPU RAM.
	Vram scw.Size `json:"vram"`

	// Disabled: the node type is currently disabled.
	Disabled bool `json:"disabled"`

	// Beta: the node type is currently in beta.
	Beta bool `json:"beta"`

	// CreatedAt: creation date of the node type.
	CreatedAt *time.Time `json:"created_at"`

	// UpdatedAt: last modification date of the node type.
	UpdatedAt *time.Time `json:"updated_at"`

	// Gpus: number of GPUs.
	Gpus uint32 `json:"gpus"`

	// Region: region of the node type.
	Region scw.Region `json:"region"`
}

NodeType: node type.

type NodeTypeStock ¶

type NodeTypeStock string

func (NodeTypeStock) MarshalJSON ¶

func (enum NodeTypeStock) MarshalJSON() ([]byte, error)

func (NodeTypeStock) String ¶

func (enum NodeTypeStock) String() string

func (*NodeTypeStock) UnmarshalJSON ¶

func (enum *NodeTypeStock) UnmarshalJSON(data []byte) error

func (NodeTypeStock) Values ¶

func (enum NodeTypeStock) Values() []NodeTypeStock

type UpdateDeploymentRequest ¶

type UpdateDeploymentRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// DeploymentID: ID of the deployment to update.
	DeploymentID string `json:"-"`

	// Name: name of the deployment.
	Name *string `json:"name,omitempty"`

	// Tags: list of tags to apply to the deployment.
	Tags *[]string `json:"tags,omitempty"`

	// MinSize: defines the new minimum size of the pool.
	MinSize *uint32 `json:"min_size,omitempty"`

	// MaxSize: defines the new maximum size of the pool.
	MaxSize *uint32 `json:"max_size,omitempty"`

	// ModelID: id of the model to set to the deployment.
	ModelID *string `json:"model_id,omitempty"`

	// Quantization: quantization to use to the deployment.
	Quantization *DeploymentQuantization `json:"quantization,omitempty"`
}

UpdateDeploymentRequest: update deployment request.

type UpdateEndpointRequest ¶

type UpdateEndpointRequest struct {
	// Region: region to target. If none is passed will use default region from the config.
	Region scw.Region `json:"-"`

	// EndpointID: ID of the endpoint to update.
	EndpointID string `json:"-"`

	// DisableAuth: by default, deployments are protected by IAM authentication.
	// When setting this field to true, the authentication will be disabled.
	DisableAuth *bool `json:"disable_auth,omitempty"`
}

UpdateEndpointRequest: update endpoint request.

type WaitForDeploymentRequest ¶

type WaitForDeploymentRequest struct {
	DeploymentID  string
	Region        scw.Region
	Status        DeploymentStatus
	Timeout       *time.Duration
	RetryInterval *time.Duration
}

type WaitForModelRequest ¶

type WaitForModelRequest struct {
	ModelID       string
	Region        scw.Region
	Timeout       *time.Duration
	RetryInterval *time.Duration
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
sweepers

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL