Documentation
¶
Index ¶
- Constants
- func PauseContainerID(id entity.Id) string
- type BlobGCResult
- type Cgroups
- type CleanupResult
- type ContainerWatchdog
- type ImageGCConfig
- type ImageGCResult
- type ImageWatchdog
- type Metrics
- func (m *Metrics) Add(name string, pathes map[string]string, attributes map[string]string) error
- func (m *Metrics) Gather(name string) ([]*metric_v1alpha.ContainerSnapshot, error)
- func (m *Metrics) Monitor(ctx context.Context)
- func (m *Metrics) Remove(name string) error
- func (m *Metrics) Snapshot(ctx context.Context, req *metric_v1alpha.SandboxMetricsSnapshot) error
- func (m *Metrics) Validate() error
- type PortMonitor
- type SagaSandboxController
- func (s *SagaSandboxController) Close() error
- func (s *SagaSandboxController) Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error
- func (s *SagaSandboxController) Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error
- func (s *SagaSandboxController) Init(ctx context.Context) error
- func (s *SagaSandboxController) Periodic(ctx context.Context, timeHorizon time.Duration) error
- func (s *SagaSandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
- func (s *SagaSandboxController) SetWriteTracker(wt controller.WriteTracker)
- type SandboxContainerRuntime
- type SandboxController
- func (c *SandboxController) AllocateNetwork(ctx context.Context, co *compute.Sandbox) (*network.EndpointConfig, error)
- func (c *SandboxController) BootContainers(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, ...) ([]WaitPort, error)
- func (c *SandboxController) BootInitialTask(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, ...) (containerd.Task, error)
- func (c *SandboxController) BuildSpec(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, ...) ([]containerd.NewContainerOpts, error)
- func (c *SandboxController) CheckSandbox(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) (int, error)
- func (c *SandboxController) CleanupContainer(ctx context.Context, cont containerd.Container)
- func (c *SandboxController) Close() error
- func (c *SandboxController) ConfigureVolumes(ctx context.Context, sb *compute.Sandbox, meta *entity.Meta) (map[string]string, error)
- func (c *SandboxController) Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error
- func (c *SandboxController) Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error
- func (c *SandboxController) DestroySubContainers(ctx context.Context, id entity.Id) error
- func (c *SandboxController) EmitSandboxEvent(sb *compute.Sandbox, shortID, line string)
- func (c *SandboxController) Init(ctx context.Context) error
- func (c *SandboxController) Periodic(ctx context.Context, timeHorizon time.Duration) error
- func (c *SandboxController) ReleaseDiskLeases(ctx context.Context, sandboxID entity.Id) error
- func (c *SandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
- func (c *SandboxController) SetWriteTracker(wt controller.WriteTracker)
- func (c *SandboxController) StopSandbox(ctx context.Context, id entity.Id) error
- func (c *SandboxController) UnconfigureFirewall(sb *compute.Sandbox)
- func (c *SandboxController) UpdateServices(ctx context.Context, co *compute.Sandbox, meta *entity.Meta, ...) error
- func (c *SandboxController) WaitForPort(ctx context.Context, id string, port int, timeout time.Duration) error
- type SandboxControllerDeps
- type SandboxEntityStore
- type SandboxLifecycle
- type SandboxLogs
- type SandboxNetworking
- type SandboxObservability
- type WaitPort
Constants ¶
const (
// SandboxEntityLabel is the container label key used to associate containers with sandbox entities.
SandboxEntityLabel = "runtime.computer/entity-id"
)
Variables ¶
This section is empty.
Functions ¶
func PauseContainerID ¶ added in v0.4.0
PauseContainerID returns the containerd container ID for a sandbox's pause container.
Types ¶
type BlobGCResult ¶ added in v0.4.0
type BlobGCResult struct {
DeletedBlobs []string
FailedBlobs map[string]error
TotalBlobs int
RetainedBlobs int
}
BlobGCResult contains information about blobs cleaned up during GC.
type CleanupResult ¶
type CleanupResult struct {
// DeletedContainers contains IDs of containers successfully removed
DeletedContainers []string
// FailedContainers contains IDs and errors for containers that failed to be removed
FailedContainers map[string]error
}
CleanupResult contains information about containers cleaned up during orphan removal
type ContainerWatchdog ¶
type ContainerWatchdog struct {
Log *slog.Logger
CC *containerd.Client
EAC *entityserver_v1alpha.EntityAccessClient
Namespace string
// NodeId scopes sandbox lookups to this node so we only consider
// sandboxes that are scheduled here when building the valid set.
NodeId string
// CheckInterval is how often to check for orphaned containers
CheckInterval time.Duration
// GraceWindow is how long to wait before removing containers from non-running sandboxes
GraceWindow time.Duration
// Subnet is used to release IP addresses when removing orphaned containers
Subnet *netdb.Subnet
// contains filtered or unexported fields
}
ContainerWatchdog periodically checks that containers in containerd match what is expected by sandbox entities. It removes orphaned containers that shouldn't exist, acting as a safety mechanism to keep the container runtime clean.
func (*ContainerWatchdog) CleanupOrphanedContainers ¶ added in v0.4.0
func (w *ContainerWatchdog) CleanupOrphanedContainers(ctx context.Context) (*CleanupResult, error)
CleanupOrphanedContainers removes containers not associated with Running sandboxes. Returns a CleanupResult containing lists of successfully deleted and failed containers.
func (*ContainerWatchdog) Start ¶
func (w *ContainerWatchdog) Start(ctx context.Context)
Start begins the periodic container cleanup process
func (*ContainerWatchdog) Stop ¶
func (w *ContainerWatchdog) Stop()
Stop gracefully stops the watchdog
type ImageGCConfig ¶ added in v0.3.0
type ImageGCConfig struct {
// ScheduledGCInterval is how often to run scheduled GC regardless of pressure (default: 168h/weekly)
ScheduledGCInterval time.Duration
// PressureCheckInterval is how often to check disk pressure (default: 1h)
PressureCheckInterval time.Duration
// DiskPressureThreshold is the disk usage percentage that triggers immediate GC (default: 80%)
DiskPressureThreshold float64
}
ImageGCConfig holds configuration for the image garbage collector.
func DefaultImageGCConfig ¶ added in v0.3.0
func DefaultImageGCConfig() ImageGCConfig
DefaultImageGCConfig returns the default configuration for image GC.
type ImageGCResult ¶ added in v0.3.0
type ImageGCResult struct {
// DeletedImages contains names of images successfully removed
DeletedImages []string
// FailedImages contains names and errors for images that failed to be removed
FailedImages map[string]error
// TotalImages is the total number of images before GC
TotalImages int
// RetainedImages is the number of images kept
RetainedImages int
}
ImageGCResult contains information about images cleaned up during GC.
type ImageWatchdog ¶ added in v0.3.0
type ImageWatchdog struct {
Log *slog.Logger
CC *containerd.Client
EAC *entityserver_v1alpha.EntityAccessClient
Namespace string
DataPath string
Config ImageGCConfig
// contains filtered or unexported fields
}
ImageWatchdog periodically garbage collects container images from containerd. It uses Artifact entity status to determine which images to remove: - Images with no corresponding Artifact are kept (infrastructure images, etc.) - Images with Artifact status "active" or empty are kept - Images with Artifact status "archived" are deleted
func (*ImageWatchdog) ParseArtifactID ¶ added in v0.4.0
func (w *ImageWatchdog) ParseArtifactID(imageName string) string
ParseArtifactID extracts the artifact ID from an image name. Image format: cluster.local:5000/{app}:{artifact-name} Artifact ID format: artifact/{artifact-name} Returns empty string if the image doesn't match the expected format.
func (*ImageWatchdog) RunBlobGC ¶ added in v0.4.0
func (w *ImageWatchdog) RunBlobGC(ctx context.Context) (*BlobGCResult, error)
RunBlobGC performs garbage collection of unreferenced registry blobs. It compares blob files on disk against digests referenced by non-archived artifacts and deletes any that are no longer needed.
func (*ImageWatchdog) RunGC ¶ added in v0.3.0
func (w *ImageWatchdog) RunGC(ctx context.Context) (*ImageGCResult, error)
RunGC performs garbage collection of unused images.
func (*ImageWatchdog) Start ¶ added in v0.3.0
func (w *ImageWatchdog) Start(ctx context.Context)
Start begins the periodic image cleanup process.
func (*ImageWatchdog) Stop ¶ added in v0.3.0
func (w *ImageWatchdog) Stop()
Stop gracefully stops the watchdog.
type Metrics ¶
type Metrics struct {
Log *slog.Logger
CPUUsage *metrics.CPUUsage
MemUsage *metrics.MemoryUsage
// contains filtered or unexported fields
}
func (*Metrics) Gather ¶
func (m *Metrics) Gather(name string) ([]*metric_v1alpha.ContainerSnapshot, error)
func (*Metrics) Snapshot ¶
func (m *Metrics) Snapshot(ctx context.Context, req *metric_v1alpha.SandboxMetricsSnapshot) error
type PortMonitor ¶
type PortMonitor struct {
// contains filtered or unexported fields
}
PortMonitor monitors ports for containers using polling
func NewPortMonitor ¶
func NewPortMonitor(log *slog.Logger, ports observability.PortTracker) *PortMonitor
NewPortMonitor creates a new port monitor
func (*PortMonitor) MonitorContainer ¶
func (pm *PortMonitor) MonitorContainer(containerID string, ip string, pid int, ports []int)
MonitorContainer starts monitoring ports for a container. It checks port binding by reading /proc/<pid>/net/tcp from the container's network namespace (via the pause container's PID) rather than doing a TCP dial from the host, which can be interfered with by iptables DNAT rules.
func (*PortMonitor) StopMonitoring ¶
func (pm *PortMonitor) StopMonitoring(containerID string)
StopMonitoring stops monitoring for a container
type SagaSandboxController ¶ added in v0.6.0
type SagaSandboxController struct {
// contains filtered or unexported fields
}
SagaSandboxController implements SandboxLifecycle using the saga pattern for crash-recoverable sandbox creation. It wraps an inner SandboxController and delegates most operations to it, replacing only createSandbox with a saga-based implementation.
func NewSagaSandboxController ¶ added in v0.6.0
func NewSagaSandboxController( cfg SandboxControllerDeps, storage saga.Storage, log *slog.Logger, ) (*SagaSandboxController, error)
NewSagaSandboxController creates a saga-based sandbox controller.
func (*SagaSandboxController) Close ¶ added in v0.6.0
func (s *SagaSandboxController) Close() error
Close shuts down the inner controller.
func (*SagaSandboxController) Create ¶ added in v0.6.0
func (s *SagaSandboxController) Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error
Create handles sandbox create/update events. For new sandboxes, it uses the saga-based creation flow.
func (*SagaSandboxController) Delete ¶ added in v0.6.0
func (s *SagaSandboxController) Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error
Delete delegates to the inner controller.
func (*SagaSandboxController) Init ¶ added in v0.6.0
func (s *SagaSandboxController) Init(ctx context.Context) error
Init initializes the sandbox controller and registers saga definitions.
func (*SagaSandboxController) Periodic ¶ added in v0.6.0
Periodic delegates to the inner controller.
func (*SagaSandboxController) SetPortStatus ¶ added in v0.6.0
func (s *SagaSandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
SetPortStatus delegates to the inner controller.
func (*SagaSandboxController) SetWriteTracker ¶ added in v0.6.0
func (s *SagaSandboxController) SetWriteTracker(wt controller.WriteTracker)
SetWriteTracker sets the write tracker on both the saga controller and inner controller.
type SandboxContainerRuntime ¶ added in v0.6.0
type SandboxContainerRuntime interface {
BuildSpec(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, meta *entity.Meta) ([]containerd.NewContainerOpts, error)
CreateContainer(ctx context.Context, id string, opts ...containerd.NewContainerOpts) (string, error)
LoadContainer(ctx context.Context, id string) (containerd.Container, error)
CleanupContainer(ctx context.Context, cont containerd.Container)
BootInitialTask(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, container containerd.Container, shortID string) (containerd.Task, error)
ConfigureVolumes(ctx context.Context, sb *compute.Sandbox, meta *entity.Meta) (map[string]string, error)
BootContainers(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, sbPid int, cgroups map[string]string, meta *entity.Meta, volumeMounts map[string]string) ([]WaitPort, error)
DestroySubContainers(ctx context.Context, id entity.Id) error
ReleaseDiskLeases(ctx context.Context, sandboxID entity.Id) error
UnconfigureFirewall(sb *compute.Sandbox)
WaitForPort(ctx context.Context, id string, port int, timeout time.Duration) error
}
SandboxContainerRuntime provides containerd container operations.
type SandboxController ¶
type SandboxController struct {
Log *slog.Logger
CC *containerd.Client
EAC *entityserver_v1alpha.EntityAccessClient
Namespace string
NodeId string
NetServ *network.ServiceManager
Bridge string
Subnet *netdb.Subnet
DataPath string
Tempdir string
LogsMaintainer *observability.LogsMaintainer
LogWriter observability.LogWriter
StatusMon *observability.StatusMonitor
Resolver netresolve.Resolver
Metrics *Metrics
WorkloadIssuer workloadidentity.TokenIssuer
// contains filtered or unexported fields
}
func NewSandboxController ¶ added in v0.3.0
func NewSandboxController(cfg SandboxControllerDeps) (*SandboxController, error)
NewSandboxController creates a new SandboxController with validated dependencies.
func (*SandboxController) AllocateNetwork ¶ added in v0.6.0
func (c *SandboxController) AllocateNetwork( ctx context.Context, co *compute.Sandbox, ) (*network.EndpointConfig, error)
func (*SandboxController) BootContainers ¶ added in v0.6.0
func (*SandboxController) BootInitialTask ¶ added in v0.6.0
func (c *SandboxController) BootInitialTask( ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, container containerd.Container, shortID string, ) (containerd.Task, error)
func (*SandboxController) BuildSpec ¶ added in v0.6.0
func (c *SandboxController) BuildSpec( ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, meta *entity.Meta, ) ( []containerd.NewContainerOpts, error, )
func (*SandboxController) CheckSandbox ¶ added in v0.6.0
func (*SandboxController) CleanupContainer ¶ added in v0.6.0
func (c *SandboxController) CleanupContainer(ctx context.Context, cont containerd.Container)
CleanupContainer removes a container and its snapshot during failure scenarios
func (*SandboxController) Close ¶
func (c *SandboxController) Close() error
func (*SandboxController) ConfigureVolumes ¶ added in v0.6.0
func (c *SandboxController) ConfigureVolumes(ctx context.Context, sb *compute.Sandbox, meta *entity.Meta) (map[string]string, error)
ConfigureVolumes prepares volumes and returns a map of volume name to actual mount path
func (*SandboxController) DestroySubContainers ¶ added in v0.6.0
func (*SandboxController) EmitSandboxEvent ¶ added in v0.7.0
func (c *SandboxController) EmitSandboxEvent(sb *compute.Sandbox, shortID, line string)
EmitSandboxEvent writes a single runtime lifecycle line to the sandbox's log stream, using the same entity and attrs the container stdio pipeline uses. Events go on the Stderr stream so operators see them in `miren logs sandbox <id>` and can distinguish them from application output via the [miren] prefix.
func (*SandboxController) Periodic ¶
Periodic cleans up dead sandboxes that are older than the specified time horizon
func (*SandboxController) ReleaseDiskLeases ¶ added in v0.6.0
releaseDiskLeases releases all disk leases owned by the given sandbox. This transitions leases to RELEASED status, which triggers the disk lease controller to unmount the volumes and release the underlying resources.
func (*SandboxController) SetPortStatus ¶
func (c *SandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
func (*SandboxController) SetWriteTracker ¶
func (c *SandboxController) SetWriteTracker(wt controller.WriteTracker)
SetWriteTracker sets the write tracker for recording manual entity writes
func (*SandboxController) StopSandbox ¶ added in v0.6.0
func (*SandboxController) UnconfigureFirewall ¶ added in v0.6.0
func (c *SandboxController) UnconfigureFirewall(sb *compute.Sandbox)
func (*SandboxController) UpdateServices ¶ added in v0.6.0
func (c *SandboxController) UpdateServices( ctx context.Context, co *compute.Sandbox, meta *entity.Meta, ep *network.EndpointConfig, ) error
func (*SandboxController) WaitForPort ¶ added in v0.6.0
type SandboxControllerDeps ¶ added in v0.3.0
type SandboxControllerDeps struct {
Log *slog.Logger
CC *containerd.Client
EAC *entityserver_v1alpha.EntityAccessClient
Namespace string
NodeId string
NetServ *network.ServiceManager
Bridge string
Subnet *netdb.Subnet
DataPath string
Tempdir string
LogsMaintainer *observability.LogsMaintainer
LogWriter observability.LogWriter
StatusMon *observability.StatusMonitor
Resolver netresolve.Resolver
Metrics *Metrics
WorkloadIssuer workloadidentity.TokenIssuer
}
SandboxControllerDeps holds required dependencies for SandboxController.
type SandboxEntityStore ¶ added in v0.6.0
type SandboxEntityStore interface {
GetSandbox(ctx context.Context, id string) (*compute.Sandbox, *entity.Meta, error)
PatchSandbox(ctx context.Context, attrs []entity.Attr, revision int64) (int64, error)
}
SandboxEntityStore provides entity read/write operations with write tracking.
type SandboxLifecycle ¶ added in v0.6.0
type SandboxLifecycle interface {
Init(ctx context.Context) error
Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error
Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error
Close() error
Periodic(ctx context.Context, timeHorizon time.Duration) error
SetWriteTracker(wt controller.WriteTracker)
SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
}
SandboxLifecycle defines the interface for sandbox lifecycle management. Both SandboxController and SagaSandboxController implement this interface.
type SandboxLogs ¶
type SandboxLogs struct {
// contains filtered or unexported fields
}
func NewSandboxLogs ¶
func NewSandboxLogs( log *slog.Logger, entity string, attrs map[string]string, lw observability.LogWriter, ) *SandboxLogs
func (*SandboxLogs) Stderr ¶
func (s *SandboxLogs) Stderr() *SandboxLogs
type SandboxNetworking ¶ added in v0.6.0
type SandboxNetworking interface {
AllocateNetwork(ctx context.Context, sb *compute.Sandbox) (*network.EndpointConfig, error)
ReleaseAddr(addr netip.Addr) error
RebuildEndpointConfig(addresses []string) (*network.EndpointConfig, error)
BridgeName() string
}
SandboxNetworking provides network allocation and configuration.
type SandboxObservability ¶ added in v0.6.0
type SandboxObservability interface {
AddMetrics(logEntity string, cgroups map[string]string, attrs map[string]string) error
RemoveMetrics(logEntity string)
UpdateServices(ctx context.Context, co *compute.Sandbox, meta *entity.Meta, ep *network.EndpointConfig) error
// LogSandboxEvent writes a runtime lifecycle message to the
// sandbox's normal log stream, so `miren logs sandbox <id>`
// surfaces it alongside container output. Intended for startup
// or teardown events where a container never produced logs of
// its own (e.g. volume mount failures).
LogSandboxEvent(sb *compute.Sandbox, shortID, line string)
}
SandboxObservability provides metrics and service management.