templatecenter

package
v0.0.0-...-3e0e934 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 28, 2026 License: Apache-2.0, BSD-2-Clause Imports: 46 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ArtifactStatusPending  = "PENDING"
	ArtifactStatusBuilding = "BUILDING"
	ArtifactStatusReady    = "READY"
	ArtifactStatusFailed   = "FAILED"
	// ArtifactStatusCleanupPending marks an artifact whose logical references
	// have reached zero and whose physical files are being / awaiting removal.
	// A row in this state must never be reused; the create/reuse path rebuilds
	// instead. GC retries cleanup until the artifact row can be safely deleted.
	ArtifactStatusCleanupPending = "CLEANUP_PENDING"
	// ArtifactStatusOrphaned marks an artifact with no surviving references that
	// was never fully built/distributed (e.g. interrupted build); GC reclaims it.
	ArtifactStatusOrphaned = "ORPHANED"

	JobStatusPending = "PENDING"
	JobStatusRunning = "RUNNING"
	JobStatusReady   = "READY"
	JobStatusFailed  = "FAILED"

	JobOperationCreate           = "CREATE"
	JobOperationRedo             = "REDO"
	JobOperationCommit           = "COMMIT"
	JobOperationLegacy           = "LEGACY"
	JobOperationSnapshotCreate   = "SNAPSHOT_CREATE"
	JobOperationSnapshotRollback = "SNAPSHOT_ROLLBACK"
	JobOperationSnapshotDelete   = "SNAPSHOT_DELETE"

	JobResourceTypeSnapshot = "snapshot"
	JobResourceTypeTemplate = "template"

	RedoModeAll         = "ALL"
	RedoModeNodes       = "NODES"
	RedoModeFailedOnly  = "FAILED_ONLY"
	RedoModeFailedNodes = "FAILED_NODES"

	JobPhasePulling            = "PULLING"
	JobPhaseUnpacking          = "UNPACKING"
	JobPhaseBuildingExt4       = "BUILDING_EXT4"
	JobPhaseGeneratingJSON     = "GENERATING_JSON"
	JobPhaseDistributing       = "DISTRIBUTING"
	JobPhaseCreatingTemplate   = "CREATING_TEMPLATE"
	JobPhaseSnapshotting       = "SNAPSHOTTING"
	JobPhaseRegistering        = "REGISTERING"
	JobPhaseRollbackPreparing  = "ROLLBACK_PREPARING"
	JobPhaseRollbackDriving    = "ROLLBACK_DRIVING"
	JobPhaseRollbackRecovering = "ROLLBACK_RECOVERING"
	JobPhaseDeleting           = "DELETING"
	JobPhaseReady              = "READY"
)
View Source
const (
	SnapshotRuntimeBindingMemoryBacking = "memory_backing"
	SnapshotRuntimeRefStatusActive      = "ACTIVE"
	SnapshotRuntimeRefStatusReleased    = "RELEASED"
)
View Source
const (
	DefaultTemplateVersion = "v2"

	StatusPending        = "PENDING"
	StatusReady          = "READY"
	StatusPartiallyReady = "PARTIALLY_READY"
	StatusFailed         = "FAILED"
	StatusCreating       = "CREATING"
	StatusDeleting       = "DELETING"

	TemplateKindTemplate = "template"
	TemplateKindSnapshot = "snapshot"

	StorageBackendCow = "cubecow"

	ReplicaStatusReady  = "READY"
	ReplicaStatusFailed = "FAILED"

	ReplicaPhasePending      = "PENDING"
	ReplicaPhaseDistributing = "DISTRIBUTING"
	ReplicaPhaseDistributed  = "DISTRIBUTED"
	ReplicaPhaseSnapshotting = "SNAPSHOTTING"
	ReplicaPhaseReady        = "READY"
	ReplicaPhaseFailed       = "FAILED"
	ReplicaPhaseCleaning     = "CLEANING"

	CompatStatusOK      = "OK"
	CompatStatusStale   = "STALE"
	CompatStatusUnknown = "UNKNOWN"
	CompatStatusMissing = "MISSING"

	CompatPolicyStrict    = "STRICT"
	CompatPolicyGuestOnly = "GUEST_ONLY"
)

Variables

View Source
var (
	ErrSnapshotNotFound          = errors.New("snapshot not found")
	ErrSnapshotOperationNotFound = errors.New("snapshot operation not found")
)
View Source
var (
	ErrTemplateStoreNotInitialized = errors.New("template store is not initialized")
	ErrTemplateNotFound            = errors.New("template not found")
	ErrTemplateIDRequired          = errors.New("template id is required")
	ErrTemplateHasNoReadyReplica   = errors.New("template has no ready replica")
	ErrNoTemplateNodes             = errors.New("no healthy nodes available for template creation")
	ErrDuplicateTemplate           = errors.New("template already exists")
	ErrTemplateAttemptInProgress   = errors.New("template attempt is already in progress")
	ErrTemplateStaleNeedsRedo      = errors.New("template stale needs redo")
)
View Source
var ErrNoFailedTemplateReplicas = errors.New("no failed template replicas matched redo request")
View Source
var ErrSnapshotReplicaMetadataIncomplete = errors.New("snapshot replica metadata is incomplete")
View Source
var ErrTemplateCleanupLocatorMissing = errors.New("template cleanup locator is missing")
View Source
var ErrTemplateInUse = errors.New("template is still in use")

Functions

func AcquireSnapshotRuntimeRef

func AcquireSnapshotRuntimeRef(ctx context.Context, ref SnapshotRuntimeRefInfo) error

func AdoptCompatBaseline

func AdoptCompatBaseline(ctx context.Context, templateID string) (int, error)

func CountActiveSnapshotRuntimeRefs

func CountActiveSnapshotRuntimeRefs(ctx context.Context, snapshotID string) (int64, error)

func DeleteSnapshot

func DeleteSnapshot(ctx context.Context, requestID, snapshotID, instanceType string) (*sandboxtypes.TemplateImageJobInfo, error)

DeleteSnapshot tears down a snapshot synchronously: it returns only when the underlying delete job has settled into a terminal state (READY on success, FAILED on error). There is no "started, please poll" return path — pending / running states are converted into errors by `finalizeSynchronousSnapshotJob`. The caller can therefore treat a nil error as "snapshot is gone (replica + metadata + caches all cleaned)" and a non-nil error as "delete either rejected up-front or ran to failure".

Behaviour summary:

  • Up-front guards (kind, status, in-use, active-job, active runtime refs) all run inside `withTemplateWriteLock`, so a duplicate request for the same `requestID` is idempotent: a re-arrived call either resumes the still-pending job or surfaces the prior terminal result.
  • The actual delete (`runSnapshotDeleteJob`) runs under a detached context produced by `synchronousSnapshotJobContext`, capped at `snapshotOperationTimeout` (15 min) so a stuck cubelet cannot wedge the master goroutine forever. The wider request context is allowed to cancel the *response*, but the job itself is owned by master and completes (or fails) regardless.
  • On crash / restart, `reconcileSnapshotDefinitionTimeouts` will mark definitions left in `deleting` past the timeout as `failed`, and the next `DeleteSnapshot` call for the same id will re-attempt cleanly.

The snapshot API is synchronous — CubeAPI waits for a terminal state and does not expose a polling interface to callers.

func DeleteTemplate

func DeleteTemplate(ctx context.Context, templateID, instanceType string) error

func EnsureReadyReplica

func EnsureReadyReplica(ctx context.Context, templateID string) error

func EnsureTemplateLocalityReady

func EnsureTemplateLocalityReady(ctx context.Context, templateID, instanceType string) error

func GenerateTemplateID

func GenerateTemplateID() string

GenerateTemplateID returns a new unique template ID with "tpl-" prefix. Exported for use by HTTP handlers (e.g. template commit) that need to generate a template ID before calling NormalizeRequest.

func GetDefinition

func GetDefinition(ctx context.Context, templateID string) (*models.TemplateDefinition, error)

func GetRootfsArtifactInfo

func GetRootfsArtifactInfo(ctx context.Context, artifactID string) (*types.RootfsArtifactInfo, error)

func GetTemplateImageJobInfo

func GetTemplateImageJobInfo(ctx context.Context, jobID string) (*types.TemplateImageJobInfo, error)

func GetTemplateKind

func GetTemplateKind(ctx context.Context, templateID string) (string, error)

func GetTemplateRequest

func GetTemplateRequest(ctx context.Context, templateID string) (*sandboxtypes.CreateCubeSandboxReq, error)

func Init

func Init(ctx context.Context) error

func ListReplicas

func ListReplicas(ctx context.Context, templateID string) ([]models.TemplateReplica, error)

func OpenRootfsArtifact

func OpenRootfsArtifact(ctx context.Context, artifactID, token string) (*models.RootfsArtifact, *os.File, error)

func RefreshSnapshotRuntimeRefsFromNode

func RefreshSnapshotRuntimeRefsFromNode(ctx context.Context, nodeID, nodeIP string, observed []SnapshotRuntimeRefInfo) error

func RegisterSnapshotRuntimeRefForCreatedSandbox

func RegisterSnapshotRuntimeRefForCreatedSandbox(ctx context.Context, snapshotID, sandboxID, nodeID, nodeIP string) error

RegisterSnapshotRuntimeRefForCreatedSandbox records a sandbox↔snapshot binding for the runtime ref tracker. v4: master no longer carries a physical memory_vol reference; MemoryVol on the ref is intentionally left empty. The replica lookup is still performed for its side-effect of validating that a bindable ready replica exists on the chosen node before registering the ref - callers should fail fast if the snapshot is not actually consumable.

func RegisterSnapshotRuntimeRefForCreatedSandboxWithReplica

func RegisterSnapshotRuntimeRefForCreatedSandboxWithReplica(
	ctx context.Context,
	snapshotID, sandboxID, nodeID, nodeIP string,
	replica ReplicaStatus,
) error

RegisterSnapshotRuntimeRefForCreatedSandboxWithReplica is a fast-path variant of RegisterSnapshotRuntimeRefForCreatedSandbox that skips the extra ListReplicas round-trip when the caller has already selected a ready replica earlier in the request (e.g. during bindSnapshotCreateReplica).

The supplied replica MUST originate from a successful bind call for the same (snapshotID, sandboxID's host) - i.e. the chosen replica that was stamped onto reqInOut.DistributionScope. The function still validates the replica metadata before acquiring the ref so a stale value cannot create a half-baked runtime ref row.

func ReleaseSnapshotRuntimeRefsBySandbox

func ReleaseSnapshotRuntimeRefsBySandbox(ctx context.Context, sandboxID, reason string) error

func ReportResolveMetric

func ReportResolveMetric(ctx context.Context, cost time.Duration)

func ReportResolveStageMetric

func ReportResolveStageMetric(ctx context.Context, action string, cost time.Duration)

ReportResolveStageMetric emits a per-stage trace for the four sub-phases of dealCubeboxCreateReqWithTemplateCenter (request / locality / kind / bind). It re-uses the same Callee/Action shape as ReportResolveMetric so the existing log.ReportExt sink handles it without additional config.

func RescanCompat

func RescanCompat(ctx context.Context, nodeIDs []string) error

func ResolveSnapshotReadyNodeScope

func ResolveSnapshotReadyNodeScope(ctx context.Context, snapshotID string) ([]string, error)

func ResolveTemplate

func ResolveTemplate(ctx context.Context, reqInOut *sandboxtypes.CreateCubeSandboxReq) error

func RollbackSandboxToSnapshot

func RollbackSandboxToSnapshot(ctx context.Context, requestID, sandboxID, snapshotID, instanceType string) (*sandboxtypes.TemplateImageJobInfo, error)

func ScanNodeCompat

func ScanNodeCompat(ctx context.Context, nodeID string) error

func ScheduleCompatScanForNode

func ScheduleCompatScanForNode(nodeID string)

func SnapshotOperationTimeout

func SnapshotOperationTimeout() time.Duration

func SubmitRedoTemplateFromImage

func SubmitRedoTemplateFromImage(ctx context.Context, req *types.RedoTemplateFromImageReq, downloadBaseURL string) (*types.TemplateImageJobInfo, error)

func SubmitSandboxSnapshot

func SubmitSandboxSnapshot(ctx context.Context, requestID, sandboxID, hostID, hostIP, displayName string) (*sandboxtypes.TemplateImageJobInfo, error)

SubmitSandboxSnapshot snapshots an existing running sandbox.

The caller only supplies identifiers (requestID/sandboxID/hostID/hostIP) and an optional displayName. The canonical create-request is resolved internally from sandboxspec (the create-time spec we persist for every sandbox), with a best-effort fallback to GetTemplateRequest(SandboxData.TemplateID) when specstore has no record (e.g. for sandboxes created before the spec store existed). This removes the historical requirement that callers re-supply the original CreateCubeSandboxReq, which was the original motivation for this refactor.

func SubmitTemplateCommit

func SubmitTemplateCommit(ctx context.Context, sandboxID, nodeID, nodeIP string, req *sandboxtypes.CreateCubeSandboxReq) (*sandboxtypes.TemplateImageJobInfo, error)

func SubmitTemplateFromImage

func SubmitTemplateFromImage(ctx context.Context, req *types.CreateTemplateFromImageReq, downloadBaseURL string) (*types.TemplateImageJobInfo, error)

func UpdateDefinitionStatus

func UpdateDefinitionStatus(ctx context.Context, templateID, status, lastError string) error

func UpdateSnapshotRuntimeRefsNodeError

func UpdateSnapshotRuntimeRefsNodeError(ctx context.Context, nodeID, nodeIP, message string) error

func UpsertReplica

func UpsertReplica(ctx context.Context, templateID, instanceType string, replica ReplicaStatus) error

Types

type ListSnapshotsOptions

type ListSnapshotsOptions struct {
	SnapshotID string
	SandboxID  string
	Name       string
	Status     string
	Limit      int
	NextToken  string
}

type ReplicaStatus

type ReplicaStatus struct {
	NodeID            string `json:"node_id"`
	NodeIP            string `json:"node_ip"`
	InstanceType      string `json:"instance_type,omitempty"`
	Spec              string `json:"spec,omitempty"`
	Status            string `json:"status"`
	Phase             string `json:"phase,omitempty"`
	ArtifactID        string `json:"artifact_id,omitempty"`
	LastJobID         string `json:"last_job_id,omitempty"`
	LastErrorPhase    string `json:"last_error_phase,omitempty"`
	CleanupRequired   bool   `json:"cleanup_required,omitempty"`
	ErrorMessage      string `json:"error_message,omitempty"`
	GuestImageVersion string `json:"guest_image_version,omitempty"`
	AgentVersion      string `json:"agent_version,omitempty"`
	KernelVersion     string `json:"kernel_version,omitempty"`
	CompatStatus      string `json:"compat_status,omitempty"`
	CompatPolicy      string `json:"compat_policy,omitempty"`
	CompatCheckedUnix int64  `json:"compat_checked_unix,omitempty"`
}

ReplicaStatus is the master-side, control-plane view of a template replica on a given node. v5: physical fields (rootfs_vol, memory_vol, snapshot_path, meta_dir, build_rootfs_vol, rootfs_kind, memory_kind, rootfs_dev, memory_dev) were removed because Cubelet's local snapshot catalog is the single source of truth, queried by templateID/snapshotID at restore/cleanup time.

func ResolveSnapshotReadyReplica

func ResolveSnapshotReadyReplica(ctx context.Context, snapshotID, preferredNodeID string) (ReplicaStatus, error)

func ResolveTemplateReadyReplica

func ResolveTemplateReadyReplica(ctx context.Context, templateID, preferredNodeID string) (ReplicaStatus, error)

type SnapshotInfo

type SnapshotInfo struct {
	SnapshotID                string                             `json:"snapshot_id,omitempty"`
	InstanceType              string                             `json:"instance_type,omitempty"`
	Version                   string                             `json:"version,omitempty"`
	Status                    string                             `json:"status,omitempty"`
	DisplayName               string                             `json:"display_name,omitempty"`
	OriginSandboxID           string                             `json:"origin_sandbox_id,omitempty"`
	OriginNodeID              string                             `json:"origin_node_id,omitempty"`
	StorageBackend            string                             `json:"storage_backend,omitempty"`
	Retain                    bool                               `json:"retain,omitempty"`
	RootfsSizeBytesAtSnapshot uint64                             `json:"rootfs_size_bytes_at_snapshot,omitempty"`
	LastError                 string                             `json:"last_error,omitempty"`
	CreatedAt                 string                             `json:"created_at,omitempty"`
	RuntimeRefCount           int64                              `json:"runtime_ref_count,omitempty"`
	RuntimeRefSandboxes       []string                           `json:"runtime_ref_sandboxes,omitempty"`
	Replicas                  []ReplicaStatus                    `json:"replicas,omitempty"`
	CreateRequest             *sandboxtypes.CreateCubeSandboxReq `json:"create_request,omitempty"`
}

func GetSnapshotInfo

func GetSnapshotInfo(ctx context.Context, snapshotID string, includeRequest bool) (*SnapshotInfo, error)

func ListSnapshots

func ListSnapshots(ctx context.Context, opts *ListSnapshotsOptions) ([]SnapshotInfo, string, error)

type SnapshotOperationInfo

type SnapshotOperationInfo struct {
	OperationID  string `json:"operation_id,omitempty"`
	SnapshotID   string `json:"snapshot_id,omitempty"`
	SandboxID    string `json:"sandbox_id,omitempty"`
	RequestID    string `json:"request_id,omitempty"`
	Operation    string `json:"operation,omitempty"`
	Status       string `json:"status,omitempty"`
	Phase        string `json:"phase,omitempty"`
	Progress     int32  `json:"progress,omitempty"`
	ErrorMessage string `json:"error_message,omitempty"`
	AttemptNo    int32  `json:"attempt_no,omitempty"`
	RetryOfJobID string `json:"retry_of_job_id,omitempty"`
	ResourceType string `json:"resource_type,omitempty"`
	ResourceID   string `json:"resource_id,omitempty"`
}

func GetSnapshotOperation

func GetSnapshotOperation(ctx context.Context, operationID string) (*SnapshotOperationInfo, error)

type SnapshotRuntimeRefInfo

type SnapshotRuntimeRefInfo struct {
	ID          uint
	SnapshotID  string
	SandboxID   string
	NodeID      string
	NodeIP      string
	BindingType string
	MemoryVol   string
	MemoryDev   string
	RootfsVol   string
	SandboxGen  uint32
	Status      string
	AttachedAt  time.Time
	ReleasedAt  *time.Time
	LastSeenAt  *time.Time
	LastError   string
}

func GetActiveSnapshotRuntimeRefBySandbox

func GetActiveSnapshotRuntimeRefBySandbox(ctx context.Context, sandboxID string) (*SnapshotRuntimeRefInfo, error)

func ListActiveSnapshotRuntimeRefs

func ListActiveSnapshotRuntimeRefs(ctx context.Context, snapshotID string) ([]SnapshotRuntimeRefInfo, error)

func SnapshotRuntimeRefFromSandboxBriefData

func SnapshotRuntimeRefFromSandboxBriefData(sandbox *sandboxtypes.SandboxBriefData) (SnapshotRuntimeRefInfo, bool)

func SnapshotRuntimeRefFromSandboxData

func SnapshotRuntimeRefFromSandboxData(sandbox *sandboxtypes.SandboxData) (SnapshotRuntimeRefInfo, bool)

type SnapshotStorageStatus

type SnapshotStorageStatus struct {
	NodeID        string `json:"node_id,omitempty"`
	NodeIP        string `json:"node_ip,omitempty"`
	UsagePct      uint64 `json:"usage_pct,omitempty"`
	Mode          string `json:"mode,omitempty"`
	LastError     string `json:"last_error,omitempty"`
	LastUpdatedAt int64  `json:"last_updated_at,omitempty"`
}

func ListSnapshotStorageStatus

func ListSnapshotStorageStatus(ctx context.Context, refresh bool) ([]SnapshotStorageStatus, error)

type TemplateCompatMatrix

type TemplateCompatMatrix struct {
	Summary   TemplateCompatSummary `json:"summary"`
	Templates []TemplateCompatRow   `json:"templates"`
}

func GetCompatMatrix

func GetCompatMatrix(ctx context.Context) (*TemplateCompatMatrix, error)

type TemplateCompatRow

type TemplateCompatRow struct {
	TemplateID   string               `json:"template_id"`
	InstanceType string               `json:"instance_type,omitempty"`
	Overall      string               `json:"overall"`
	Nodes        []TemplateNodeCompat `json:"nodes"`
}

type TemplateCompatSummary

type TemplateCompatSummary struct {
	StaleTemplates  int `json:"stale_templates"`
	StaleReplicas   int `json:"stale_replicas"`
	AffectedNodes   int `json:"affected_nodes"`
	MissingReplicas int `json:"missing_replicas"`
	UnknownReplicas int `json:"unknown_replicas"`
}

type TemplateInfo

type TemplateInfo struct {
	TemplateID                string          `json:"template_id"`
	InstanceType              string          `json:"instance_type,omitempty"`
	Version                   string          `json:"version,omitempty"`
	Status                    string          `json:"status"`
	Kind                      string          `json:"kind,omitempty"`
	OriginSandboxID           string          `json:"origin_sandbox_id,omitempty"`
	OriginNodeID              string          `json:"origin_node_id,omitempty"`
	DisplayName               string          `json:"display_name,omitempty"`
	StorageBackend            string          `json:"storage_backend,omitempty"`
	Retain                    bool            `json:"retain,omitempty"`
	RootfsSizeBytesAtSnapshot uint64          `json:"rootfs_size_bytes_at_snapshot,omitempty"`
	LastError                 string          `json:"last_error,omitempty"`
	CreatedAt                 string          `json:"created_at,omitempty"`
	ImageInfo                 string          `json:"image_info,omitempty"`
	JobID                     string          `json:"job_id,omitempty"`
	Replicas                  []ReplicaStatus `json:"replicas,omitempty"`

	// CubeEgress CA bake metadata, surfaced for ops triage. Populated
	// from the RootfsArtifact row pointed to by the first replica.
	// All replicas of one template share the same artifact, so a
	// single lookup covers them. Empty/zero on legacy templates that
	// were built before the CA feature existed.
	CubeEgressCABaked          bool   `json:"cube_egress_ca_baked,omitempty"`
	CubeEgressCAFingerprint    string `json:"cube_egress_ca_fingerprint,omitempty"`
	CubeEgressCATargetsWritten int    `json:"cube_egress_ca_targets_written,omitempty"`
}

func CreateTemplate

func CreateTemplate(ctx context.Context, req *sandboxtypes.CreateCubeSandboxReq) (info *TemplateInfo, err error)

func GetTemplateInfo

func GetTemplateInfo(ctx context.Context, templateID string) (*TemplateInfo, error)

func ListTemplates

func ListTemplates(ctx context.Context) ([]TemplateInfo, error)

type TemplateNodeCompat

type TemplateNodeCompat struct {
	NodeID                   string `json:"node_id"`
	NodeIP                   string `json:"node_ip,omitempty"`
	CompatStatus             string `json:"compat_status"`
	BoundGuestImageVersion   string `json:"bound_guest_image_version,omitempty"`
	CurrentGuestImageVersion string `json:"current_guest_image_version,omitempty"`
	BoundAgentVersion        string `json:"bound_agent_version,omitempty"`
	CurrentAgentVersion      string `json:"current_agent_version,omitempty"`
	BoundKernelVersion       string `json:"bound_kernel_version,omitempty"`
	CurrentKernelVersion     string `json:"current_kernel_version,omitempty"`
}

type TemplateStaleNeedsRedoError

type TemplateStaleNeedsRedoError struct {
	TemplateID string
	Nodes      []string
}

func (*TemplateStaleNeedsRedoError) Error

func (*TemplateStaleNeedsRedoError) Unwrap

func (e *TemplateStaleNeedsRedoError) Unwrap() error

Directories

Path Synopsis
Package cube_egress_ca bakes the CubeEgress root CA into a sandbox rootfs directory at template-build time.
Package cube_egress_ca bakes the CubeEgress root CA into a sandbox rootfs directory at template-build time.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL