sandbox

package
v0.7.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 14, 2026 License: Apache-2.0 Imports: 59 Imported by: 0

Documentation

Index

Constants

View Source
const (

	// SandboxEntityLabel is the container label key used to associate containers with sandbox entities.
	SandboxEntityLabel = "runtime.computer/entity-id"
)

Variables

This section is empty.

Functions

func PauseContainerID added in v0.4.0

func PauseContainerID(id entity.Id) string

PauseContainerID returns the containerd container ID for a sandbox's pause container.

Types

type BlobGCResult added in v0.4.0

type BlobGCResult struct {
	DeletedBlobs  []string
	FailedBlobs   map[string]error
	TotalBlobs    int
	RetainedBlobs int
}

BlobGCResult contains information about blobs cleaned up during GC.

type Cgroups

type Cgroups struct {
	// contains filtered or unexported fields
}

type CleanupResult

type CleanupResult struct {
	// DeletedContainers contains IDs of containers successfully removed
	DeletedContainers []string
	// FailedContainers contains IDs and errors for containers that failed to be removed
	FailedContainers map[string]error
}

CleanupResult contains information about containers cleaned up during orphan removal

type ContainerWatchdog

type ContainerWatchdog struct {
	Log *slog.Logger
	CC  *containerd.Client
	EAC *entityserver_v1alpha.EntityAccessClient

	Namespace string
	// NodeId scopes sandbox lookups to this node so we only consider
	// sandboxes that are scheduled here when building the valid set.
	NodeId string
	// CheckInterval is how often to check for orphaned containers
	CheckInterval time.Duration
	// GraceWindow is how long to wait before removing containers from non-running sandboxes
	GraceWindow time.Duration
	// Subnet is used to release IP addresses when removing orphaned containers
	Subnet *netdb.Subnet
	// contains filtered or unexported fields
}

ContainerWatchdog periodically checks that containers in containerd match what is expected by sandbox entities. It removes orphaned containers that shouldn't exist, acting as a safety mechanism to keep the container runtime clean.

func (*ContainerWatchdog) CleanupOrphanedContainers added in v0.4.0

func (w *ContainerWatchdog) CleanupOrphanedContainers(ctx context.Context) (*CleanupResult, error)

CleanupOrphanedContainers removes containers not associated with Running sandboxes. Returns a CleanupResult containing lists of successfully deleted and failed containers.

func (*ContainerWatchdog) Start

func (w *ContainerWatchdog) Start(ctx context.Context)

Start begins the periodic container cleanup process

func (*ContainerWatchdog) Stop

func (w *ContainerWatchdog) Stop()

Stop gracefully stops the watchdog

type ImageGCConfig added in v0.3.0

type ImageGCConfig struct {
	// ScheduledGCInterval is how often to run scheduled GC regardless of pressure (default: 168h/weekly)
	ScheduledGCInterval time.Duration
	// PressureCheckInterval is how often to check disk pressure (default: 1h)
	PressureCheckInterval time.Duration
	// DiskPressureThreshold is the disk usage percentage that triggers immediate GC (default: 80%)
	DiskPressureThreshold float64
}

ImageGCConfig holds configuration for the image garbage collector.

func DefaultImageGCConfig added in v0.3.0

func DefaultImageGCConfig() ImageGCConfig

DefaultImageGCConfig returns the default configuration for image GC.

type ImageGCResult added in v0.3.0

type ImageGCResult struct {
	// DeletedImages contains names of images successfully removed
	DeletedImages []string
	// FailedImages contains names and errors for images that failed to be removed
	FailedImages map[string]error
	// TotalImages is the total number of images before GC
	TotalImages int
	// RetainedImages is the number of images kept
	RetainedImages int
}

ImageGCResult contains information about images cleaned up during GC.

type ImageWatchdog added in v0.3.0

type ImageWatchdog struct {
	Log *slog.Logger
	CC  *containerd.Client
	EAC *entityserver_v1alpha.EntityAccessClient

	Namespace string
	DataPath  string
	Config    ImageGCConfig
	// contains filtered or unexported fields
}

ImageWatchdog periodically garbage collects container images from containerd. It uses Artifact entity status to determine which images to remove: - Images with no corresponding Artifact are kept (infrastructure images, etc.) - Images with Artifact status "active" or empty are kept - Images with Artifact status "archived" are deleted

func (*ImageWatchdog) ParseArtifactID added in v0.4.0

func (w *ImageWatchdog) ParseArtifactID(imageName string) string

ParseArtifactID extracts the artifact ID from an image name. Image format: cluster.local:5000/{app}:{artifact-name} Artifact ID format: artifact/{artifact-name} Returns empty string if the image doesn't match the expected format.

func (*ImageWatchdog) RunBlobGC added in v0.4.0

func (w *ImageWatchdog) RunBlobGC(ctx context.Context) (*BlobGCResult, error)

RunBlobGC performs garbage collection of unreferenced registry blobs. It compares blob files on disk against digests referenced by non-archived artifacts and deletes any that are no longer needed.

func (*ImageWatchdog) RunGC added in v0.3.0

func (w *ImageWatchdog) RunGC(ctx context.Context) (*ImageGCResult, error)

RunGC performs garbage collection of unused images.

func (*ImageWatchdog) Start added in v0.3.0

func (w *ImageWatchdog) Start(ctx context.Context)

Start begins the periodic image cleanup process.

func (*ImageWatchdog) Stop added in v0.3.0

func (w *ImageWatchdog) Stop()

Stop gracefully stops the watchdog.

type Metrics

type Metrics struct {
	Log      *slog.Logger
	CPUUsage *metrics.CPUUsage
	MemUsage *metrics.MemoryUsage
	// contains filtered or unexported fields
}

func NewMetrics added in v0.3.0

func NewMetrics() *Metrics

NewMetrics creates a new Metrics.

func (*Metrics) Add

func (m *Metrics) Add(name string, pathes map[string]string, attributes map[string]string) error

func (*Metrics) Gather

func (m *Metrics) Gather(name string) ([]*metric_v1alpha.ContainerSnapshot, error)

func (*Metrics) Monitor

func (m *Metrics) Monitor(ctx context.Context)

func (*Metrics) Remove

func (m *Metrics) Remove(name string) error

func (*Metrics) Snapshot

func (*Metrics) Validate added in v0.7.0

func (m *Metrics) Validate() error

Validate checks that required fields are set so Monitor() won't nil-deref.

type PortMonitor

type PortMonitor struct {
	// contains filtered or unexported fields
}

PortMonitor monitors ports for containers using polling

func NewPortMonitor

func NewPortMonitor(log *slog.Logger, ports observability.PortTracker) *PortMonitor

NewPortMonitor creates a new port monitor

func (*PortMonitor) Close

func (pm *PortMonitor) Close() error

Close stops all monitoring

func (*PortMonitor) MonitorContainer

func (pm *PortMonitor) MonitorContainer(containerID string, ip string, pid int, ports []int)

MonitorContainer starts monitoring ports for a container. It checks port binding by reading /proc/<pid>/net/tcp from the container's network namespace (via the pause container's PID) rather than doing a TCP dial from the host, which can be interfered with by iptables DNAT rules.

func (*PortMonitor) StopMonitoring

func (pm *PortMonitor) StopMonitoring(containerID string)

StopMonitoring stops monitoring for a container

type SagaSandboxController added in v0.6.0

type SagaSandboxController struct {
	// contains filtered or unexported fields
}

SagaSandboxController implements SandboxLifecycle using the saga pattern for crash-recoverable sandbox creation. It wraps an inner SandboxController and delegates most operations to it, replacing only createSandbox with a saga-based implementation.

func NewSagaSandboxController added in v0.6.0

func NewSagaSandboxController(
	cfg SandboxControllerDeps,
	storage saga.Storage,
	log *slog.Logger,
) (*SagaSandboxController, error)

NewSagaSandboxController creates a saga-based sandbox controller.

func (*SagaSandboxController) Close added in v0.6.0

func (s *SagaSandboxController) Close() error

Close shuts down the inner controller.

func (*SagaSandboxController) Create added in v0.6.0

func (s *SagaSandboxController) Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error

Create handles sandbox create/update events. For new sandboxes, it uses the saga-based creation flow.

func (*SagaSandboxController) Delete added in v0.6.0

Delete delegates to the inner controller.

func (*SagaSandboxController) Init added in v0.6.0

Init initializes the sandbox controller and registers saga definitions.

func (*SagaSandboxController) Periodic added in v0.6.0

func (s *SagaSandboxController) Periodic(ctx context.Context, timeHorizon time.Duration) error

Periodic delegates to the inner controller.

func (*SagaSandboxController) SetPortStatus added in v0.6.0

func (s *SagaSandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)

SetPortStatus delegates to the inner controller.

func (*SagaSandboxController) SetWriteTracker added in v0.6.0

func (s *SagaSandboxController) SetWriteTracker(wt controller.WriteTracker)

SetWriteTracker sets the write tracker on both the saga controller and inner controller.

type SandboxContainerRuntime added in v0.6.0

type SandboxContainerRuntime interface {
	BuildSpec(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, meta *entity.Meta) ([]containerd.NewContainerOpts, error)
	CreateContainer(ctx context.Context, id string, opts ...containerd.NewContainerOpts) (string, error)
	LoadContainer(ctx context.Context, id string) (containerd.Container, error)
	CleanupContainer(ctx context.Context, cont containerd.Container)
	BootInitialTask(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, container containerd.Container) (containerd.Task, error)
	ConfigureVolumes(ctx context.Context, sb *compute.Sandbox, meta *entity.Meta) (map[string]string, error)
	BootContainers(ctx context.Context, sb *compute.Sandbox, ep *network.EndpointConfig, sbPid int, cgroups map[string]string, meta *entity.Meta, volumeMounts map[string]string) ([]WaitPort, error)
	DestroySubContainers(ctx context.Context, id entity.Id) error
	ReleaseDiskLeases(ctx context.Context, sandboxID entity.Id) error
	UnconfigureFirewall(sb *compute.Sandbox)
	WaitForPort(ctx context.Context, id string, port int, timeout time.Duration) error
}

SandboxContainerRuntime provides containerd container operations.

type SandboxController

type SandboxController struct {
	Log *slog.Logger
	CC  *containerd.Client

	EAC *entityserver_v1alpha.EntityAccessClient

	Namespace string
	NodeId    string

	NetServ *network.ServiceManager

	Bridge string
	Subnet *netdb.Subnet

	DataPath string
	Tempdir  string

	LogsMaintainer *observability.LogsMaintainer
	LogWriter      observability.LogWriter

	StatusMon *observability.StatusMonitor

	Resolver netresolve.Resolver
	Metrics  *Metrics
	// contains filtered or unexported fields
}

func NewSandboxController added in v0.3.0

func NewSandboxController(cfg SandboxControllerDeps) (*SandboxController, error)

NewSandboxController creates a new SandboxController with validated dependencies.

func (*SandboxController) AllocateNetwork added in v0.6.0

func (c *SandboxController) AllocateNetwork(
	ctx context.Context,
	co *compute.Sandbox,
) (*network.EndpointConfig, error)

func (*SandboxController) BootContainers added in v0.6.0

func (c *SandboxController) BootContainers(
	ctx context.Context,
	sb *compute.Sandbox,
	ep *network.EndpointConfig,
	sbPid int,
	cgroups map[string]string,
	meta *entity.Meta,
	volumeMounts map[string]string,
) ([]WaitPort, error)

func (*SandboxController) BootInitialTask added in v0.6.0

func (c *SandboxController) BootInitialTask(
	ctx context.Context,
	sb *compute.Sandbox,
	ep *network.EndpointConfig,
	container containerd.Container,
) (containerd.Task, error)

func (*SandboxController) BuildSpec added in v0.6.0

func (*SandboxController) CheckSandbox added in v0.6.0

func (c *SandboxController) CheckSandbox(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) (int, error)

func (*SandboxController) CleanupContainer added in v0.6.0

func (c *SandboxController) CleanupContainer(ctx context.Context, cont containerd.Container)

CleanupContainer removes a container and its snapshot during failure scenarios

func (*SandboxController) Close

func (c *SandboxController) Close() error

func (*SandboxController) ConfigureVolumes added in v0.6.0

func (c *SandboxController) ConfigureVolumes(ctx context.Context, sb *compute.Sandbox, meta *entity.Meta) (map[string]string, error)

ConfigureVolumes prepares volumes and returns a map of volume name to actual mount path

func (*SandboxController) Create

func (c *SandboxController) Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error

func (*SandboxController) Delete

func (c *SandboxController) Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error

func (*SandboxController) DestroySubContainers added in v0.6.0

func (c *SandboxController) DestroySubContainers(ctx context.Context, id entity.Id) error

func (*SandboxController) EmitSandboxEvent added in v0.7.0

func (c *SandboxController) EmitSandboxEvent(sb *compute.Sandbox, line string)

EmitSandboxEvent writes a single runtime lifecycle line to the sandbox's log stream, using the same entity and attrs the container stdio pipeline uses. Events go on the Stderr stream so operators see them in `miren logs sandbox <id>` and can distinguish them from application output via the [miren] prefix.

func (*SandboxController) Init

func (c *SandboxController) Init(ctx context.Context) error

func (*SandboxController) Periodic

func (c *SandboxController) Periodic(ctx context.Context, timeHorizon time.Duration) error

Periodic cleans up dead sandboxes that are older than the specified time horizon

func (*SandboxController) ReleaseDiskLeases added in v0.6.0

func (c *SandboxController) ReleaseDiskLeases(ctx context.Context, sandboxID entity.Id) error

releaseDiskLeases releases all disk leases owned by the given sandbox. This transitions leases to RELEASED status, which triggers the disk lease controller to unmount the volumes and release the underlying resources.

func (*SandboxController) SetPortStatus

func (c *SandboxController) SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)

func (*SandboxController) SetWriteTracker

func (c *SandboxController) SetWriteTracker(wt controller.WriteTracker)

SetWriteTracker sets the write tracker for recording manual entity writes

func (*SandboxController) StopSandbox added in v0.6.0

func (c *SandboxController) StopSandbox(ctx context.Context, id entity.Id) error

func (*SandboxController) UnconfigureFirewall added in v0.6.0

func (c *SandboxController) UnconfigureFirewall(sb *compute.Sandbox)

func (*SandboxController) UpdateServices added in v0.6.0

func (c *SandboxController) UpdateServices(
	ctx context.Context,
	co *compute.Sandbox,
	meta *entity.Meta,
	ep *network.EndpointConfig,
) error

func (*SandboxController) WaitForPort added in v0.6.0

func (c *SandboxController) WaitForPort(ctx context.Context, id string, port int, timeout time.Duration) error

type SandboxControllerDeps added in v0.3.0

type SandboxControllerDeps struct {
	Log       *slog.Logger
	CC        *containerd.Client
	EAC       *entityserver_v1alpha.EntityAccessClient
	Namespace string
	NodeId    string
	NetServ   *network.ServiceManager
	Bridge    string
	Subnet    *netdb.Subnet
	DataPath  string
	Tempdir   string

	LogsMaintainer *observability.LogsMaintainer
	LogWriter      observability.LogWriter
	StatusMon      *observability.StatusMonitor
	Resolver       netresolve.Resolver
	Metrics        *Metrics
}

SandboxControllerDeps holds required dependencies for SandboxController.

type SandboxEntityStore added in v0.6.0

type SandboxEntityStore interface {
	GetSandbox(ctx context.Context, id string) (*compute.Sandbox, *entity.Meta, error)
	PatchSandbox(ctx context.Context, attrs []entity.Attr, revision int64) (int64, error)
}

SandboxEntityStore provides entity read/write operations with write tracking.

type SandboxLifecycle added in v0.6.0

type SandboxLifecycle interface {
	Init(ctx context.Context) error
	Create(ctx context.Context, co *compute.Sandbox, meta *entity.Meta) error
	Delete(ctx context.Context, id entity.Id, sb *compute.Sandbox) error
	Close() error
	Periodic(ctx context.Context, timeHorizon time.Duration) error
	SetWriteTracker(wt controller.WriteTracker)
	SetPortStatus(id string, port observability.BoundPort, status observability.PortStatus)
}

SandboxLifecycle defines the interface for sandbox lifecycle management. Both SandboxController and SagaSandboxController implement this interface.

type SandboxLogs

type SandboxLogs struct {
	// contains filtered or unexported fields
}

func NewSandboxLogs

func NewSandboxLogs(
	log *slog.Logger,
	entity string,
	attrs map[string]string,
	lw observability.LogWriter,
) *SandboxLogs

func (*SandboxLogs) Stderr

func (s *SandboxLogs) Stderr() *SandboxLogs

func (*SandboxLogs) Write

func (s *SandboxLogs) Write(p []byte) (n int, err error)

type SandboxNetworking added in v0.6.0

type SandboxNetworking interface {
	AllocateNetwork(ctx context.Context, sb *compute.Sandbox) (*network.EndpointConfig, error)
	ReleaseAddr(addr netip.Addr) error
	RebuildEndpointConfig(addresses []string) (*network.EndpointConfig, error)
	BridgeName() string
}

SandboxNetworking provides network allocation and configuration.

type SandboxObservability added in v0.6.0

type SandboxObservability interface {
	AddMetrics(logEntity string, cgroups map[string]string, attrs map[string]string) error
	RemoveMetrics(logEntity string)
	UpdateServices(ctx context.Context, co *compute.Sandbox, meta *entity.Meta, ep *network.EndpointConfig) error
	// LogSandboxEvent writes a runtime lifecycle message to the
	// sandbox's normal log stream, so `miren logs sandbox <id>`
	// surfaces it alongside container output. Intended for startup
	// or teardown events where a container never produced logs of
	// its own (e.g. volume mount failures).
	LogSandboxEvent(sb *compute.Sandbox, line string)
}

SandboxObservability provides metrics and service management.

type WaitPort added in v0.6.0

type WaitPort struct {
	ID   string
	Port int
}

WaitPort describes a container port to wait for during sandbox creation.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL