Documentation
¶
Overview ¶
Package firmwaremanager provides firmware update orchestration for NV-Switch trays.
Package firmwaremanager provides firmware update orchestration for NV-Switch trays.
Index ¶
- Constants
- Variables
- func IsReachable(ip string, port int) bool
- func PowerCycleTray(ctx context.Context, tray *nvswitch.NVSwitchTray) error
- func ResetTray(ctx context.Context, tray *nvswitch.NVSwitchTray, resetType redfish.ResetType) error
- func WaitForReachable(ctx context.Context, ip net.IP, timeout time.Duration) error
- func WaitForReboot(ctx context.Context, ip net.IP, downTimeout, upTimeout time.Duration) error
- func WaitForUnreachable(ctx context.Context, ip net.IP, timeout time.Duration) error
- type Config
- type ExecContext
- type FirmwareManager
- func (m *FirmwareManager) CancelUpdate(ctx context.Context, updateID uuid.UUID) error
- func (m *FirmwareManager) GetAllUpdates(ctx context.Context) ([]*FirmwareUpdate, error)
- func (m *FirmwareManager) GetBundle(version string) (*packages.FirmwarePackage, error)
- func (m *FirmwareManager) GetUpdate(ctx context.Context, updateID uuid.UUID) (*FirmwareUpdate, error)
- func (m *FirmwareManager) GetUpdatesForSwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
- func (m *FirmwareManager) ListBundles() []string
- func (m *FirmwareManager) QueueUpdate(ctx context.Context, switchUUID uuid.UUID, bundleVersion string, ...) ([]*FirmwareUpdate, error)
- func (m *FirmwareManager) ReloadPackages() error
- func (m *FirmwareManager) Start(ctx context.Context) error
- func (m *FirmwareManager) Stats() FirmwareManagerStats
- func (m *FirmwareManager) Stop()
- type FirmwareManagerStats
- type FirmwareUpdate
- type FirmwareUpdateModel
- type InMemoryUpdateStore
- func (s *InMemoryUpdateStore) CancelRemainingInBundle(ctx context.Context, bundleUpdateID uuid.UUID, afterSequence int, ...) (int, error)
- func (s *InMemoryUpdateStore) Delete(ctx context.Context, id uuid.UUID) error
- func (s *InMemoryUpdateStore) Get(ctx context.Context, id uuid.UUID) (*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) GetActive(ctx context.Context, switchUUID uuid.UUID, component nvswitch.Component) (*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) GetAll(ctx context.Context) ([]*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) GetAnyActiveForSwitch(ctx context.Context, switchUUID uuid.UUID) (*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) GetBySwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) GetPendingUpdates(ctx context.Context, limit int) ([]*FirmwareUpdate, error)
- func (s *InMemoryUpdateStore) Save(ctx context.Context, update *FirmwareUpdate) error
- type OutcomeType
- type PostgresUpdateStore
- func (s *PostgresUpdateStore) CancelRemainingInBundle(ctx context.Context, bundleUpdateID uuid.UUID, afterSequence int, ...) (int, error)
- func (s *PostgresUpdateStore) Delete(ctx context.Context, id uuid.UUID) error
- func (s *PostgresUpdateStore) Get(ctx context.Context, id uuid.UUID) (*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) GetActive(ctx context.Context, switchUUID uuid.UUID, component nvswitch.Component) (*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) GetAll(ctx context.Context) ([]*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) GetAnyActiveForSwitch(ctx context.Context, switchUUID uuid.UUID) (*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) GetBySwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) GetPendingUpdates(ctx context.Context, limit int) ([]*FirmwareUpdate, error)
- func (s *PostgresUpdateStore) Save(ctx context.Context, update *FirmwareUpdate) error
- type RedfishStrategy
- func (s *RedfishStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
- func (s *RedfishStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
- func (s *RedfishStrategy) Name() Strategy
- func (s *RedfishStrategy) SetFirmwarePath(path string)
- func (s *RedfishStrategy) Steps(update *FirmwareUpdate) []UpdateState
- type SSHStrategy
- func (s *SSHStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
- func (s *SSHStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
- func (s *SSHStrategy) Name() Strategy
- func (s *SSHStrategy) SetFirmwarePath(path string)
- func (s *SSHStrategy) Steps(update *FirmwareUpdate) []UpdateState
- type ScriptStrategy
- func (s *ScriptStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
- func (s *ScriptStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
- func (s *ScriptStrategy) Name() Strategy
- func (s *ScriptStrategy) SetFirmwarePath(path string)
- func (s *ScriptStrategy) SetScriptArgs(tokens []string)
- func (s *ScriptStrategy) SetScriptName(name string)
- func (s *ScriptStrategy) Steps(update *FirmwareUpdate) []UpdateState
- type StepOutcome
- type Strategy
- type StrategyFactory
- type UpdateState
- type UpdateStore
- type UpdateStrategy
- type WorkItem
- type WorkerPool
Constants ¶
const ( // DefaultReachabilityTimeout is the default timeout for waiting for a host to become reachable. DefaultReachabilityTimeout = 5 * time.Minute // DefaultReachabilityInterval is the default interval between reachability checks. DefaultReachabilityInterval = 5 * time.Second // PostReachabilityDelay is extra time to wait after a host becomes reachable (for services to start). PostReachabilityDelay = 60 * time.Second )
const ( // DefaultSchedulerInterval is how often the scheduler queries for pending updates. DefaultSchedulerInterval = 5 * time.Second // DefaultNumWorkers is the default number of concurrent workers. DefaultNumWorkers = 10 )
Variables ¶
var ErrNoWorkAvailable = errors.New("no work available")
ErrNoWorkAvailable is returned when no work items are available for processing.
var ErrUpdateNotFound = errors.New("firmware update not found")
ErrUpdateNotFound is returned when an update is not found.
Functions ¶
func IsReachable ¶
IsReachable performs a single non-blocking check if a host is reachable via TCP on the specified port. Returns true if reachable, false otherwise. This is used for async polling.
func PowerCycleTray ¶
func PowerCycleTray(ctx context.Context, tray *nvswitch.NVSwitchTray) error
PowerCycleTray performs a power cycle on the NV-Switch tray via Redfish.
func WaitForReachable ¶
WaitForReachable waits for a host to become reachable via TCP port 22 (SSH). Returns nil when the host is reachable, or an error on timeout/cancellation.
func WaitForReboot ¶
WaitForReboot waits for a host to go down and come back up. This is a combination of WaitForUnreachable followed by WaitForReachable.
Types ¶
type Config ¶
type Config struct {
// PackagesDir is the directory containing firmware package YAML definitions
PackagesDir string
// FirmwareDir is the directory containing firmware files
FirmwareDir string
// NumWorkers is the number of concurrent update workers
NumWorkers int
// SchedulerInterval is how often the scheduler queries for pending updates
SchedulerInterval time.Duration
}
Config holds configuration for the FirmwareManager.
type ExecContext ¶
type ExecContext struct {
// StartedAt is when the async operation began.
StartedAt time.Time `json:"started_at"`
// DeadlineAt is when the operation should timeout.
DeadlineAt time.Time `json:"deadline_at"`
// TaskURI is the Redfish task URI for polling (Redfish strategy).
TaskURI string `json:"task_uri,omitempty"`
// PID is the process ID for script/SCP operations (Script/SSH strategy).
PID int `json:"pid,omitempty"`
// TargetIP is the IP address being monitored for reachability checks.
TargetIP string `json:"target_ip,omitempty"`
// BecameUnreachableAt tracks when the target became unreachable (for reboot detection).
BecameUnreachableAt *time.Time `json:"became_unreachable_at,omitempty"`
// WaitingForReboot indicates we're waiting for the device to come back after reboot.
WaitingForReboot bool `json:"waiting_for_reboot,omitempty"`
}
ExecContext holds async execution state that persists across poll intervals. This allows workers to resume monitoring long-running operations without blocking.
type FirmwareManager ¶
type FirmwareManager struct {
// contains filtered or unexported fields
}
FirmwareManager orchestrates firmware updates for NV-Switches.
func New ¶
func New( config Config, store UpdateStore, nsmgr *nvswitchmanager.NVSwitchManager, ) (*FirmwareManager, error)
New creates a new FirmwareManager.
func (*FirmwareManager) CancelUpdate ¶
CancelUpdate attempts to cancel an in-progress update. If the update is part of a bundle, all remaining (QUEUED) updates in the bundle are also cancelled.
func (*FirmwareManager) GetAllUpdates ¶
func (m *FirmwareManager) GetAllUpdates(ctx context.Context) ([]*FirmwareUpdate, error)
GetAllUpdates returns all firmware updates.
func (*FirmwareManager) GetBundle ¶
func (m *FirmwareManager) GetBundle(version string) (*packages.FirmwarePackage, error)
GetBundle returns a firmware package by version.
func (*FirmwareManager) GetUpdate ¶
func (m *FirmwareManager) GetUpdate(ctx context.Context, updateID uuid.UUID) (*FirmwareUpdate, error)
GetUpdate retrieves a firmware update by ID.
func (*FirmwareManager) GetUpdatesForSwitch ¶
func (m *FirmwareManager) GetUpdatesForSwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
GetUpdatesForSwitch returns all firmware updates for a switch.
func (*FirmwareManager) ListBundles ¶
func (m *FirmwareManager) ListBundles() []string
ListBundles returns all available firmware bundle versions.
func (*FirmwareManager) QueueUpdate ¶
func (m *FirmwareManager) QueueUpdate( ctx context.Context, switchUUID uuid.UUID, bundleVersion string, components []nvswitch.Component, ) ([]*FirmwareUpdate, error)
QueueUpdate queues firmware updates for one or more components. If components is empty, all components in the bundle are updated in sequence. Returns the list of queued updates in execution order.
func (*FirmwareManager) ReloadPackages ¶
func (m *FirmwareManager) ReloadPackages() error
ReloadPackages reloads firmware packages from the packages directory.
func (*FirmwareManager) Start ¶
func (m *FirmwareManager) Start(ctx context.Context) error
Start initializes and starts the firmware manager.
func (*FirmwareManager) Stats ¶
func (m *FirmwareManager) Stats() FirmwareManagerStats
Stats returns current worker pool statistics.
func (*FirmwareManager) Stop ¶
func (m *FirmwareManager) Stop()
Stop shuts down the firmware manager.
type FirmwareManagerStats ¶
FirmwareManagerStats holds runtime statistics.
type FirmwareUpdate ¶
type FirmwareUpdate struct {
// Primary key - unique identifier for this update operation
ID uuid.UUID `json:"id"`
// Foreign key to nvswitch table
SwitchUUID uuid.UUID `json:"switch_uuid"`
// What is being updated
Component nvswitch.Component `json:"component"` // FIRMWARE, CPLD, NVOS
BundleVersion string `json:"bundle_version"` // Version of the firmware bundle
// How it's being updated
Strategy Strategy `json:"strategy"` // script, ssh, redfish
// Current state in the state machine
State UpdateState `json:"state"`
// Version tracking
VersionFrom string `json:"version_from"` // Version before update
VersionTo string `json:"version_to"` // Target version
VersionActual string `json:"version_actual"` // Actual version after update (set during verify)
// Redfish-specific: task URI for resume capability
TaskURI string `json:"task_uri,omitempty"`
// Error information
ErrorMessage string `json:"error_message,omitempty"`
// Sequencing fields for multi-component updates
BundleUpdateID *uuid.UUID `json:"bundle_update_id,omitempty"` // Groups related updates
SequenceOrder int `json:"sequence_order"` // Order within bundle (1, 2, 3...)
PredecessorID *uuid.UUID `json:"predecessor_id,omitempty"` // Must complete before this starts
// Async worker pool fields
ExecContext *ExecContext `json:"exec_context,omitempty"` // Persisted async execution state
LastCheckedAt *time.Time `json:"last_checked_at,omitempty"` // Last time worker polled this update
// Timestamps
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
FirmwareUpdate represents a firmware update operation tracked in the database.
func NewFirmwareUpdate ¶
func NewFirmwareUpdate( switchUUID uuid.UUID, component nvswitch.Component, bundleVersion string, strategy Strategy, versionTo string, ) *FirmwareUpdate
NewFirmwareUpdate creates a new FirmwareUpdate in QUEUED state.
func (*FirmwareUpdate) SetError ¶
func (fu *FirmwareUpdate) SetError(err error)
SetError sets the error message and transitions to FAILED state.
func (*FirmwareUpdate) SetState ¶
func (fu *FirmwareUpdate) SetState(state UpdateState)
SetState updates the state and timestamp.
func (*FirmwareUpdate) WithSequencing ¶
func (fu *FirmwareUpdate) WithSequencing(bundleUpdateID *uuid.UUID, order int, predecessorID *uuid.UUID) *FirmwareUpdate
WithSequencing sets the sequencing fields for multi-component updates.
type FirmwareUpdateModel ¶
type FirmwareUpdateModel struct {
bun.BaseModel `bun:"table:firmware_update,alias:fu"`
ID uuid.UUID `bun:"id,pk,type:uuid"`
SwitchUUID uuid.UUID `bun:"switch_uuid,notnull,type:uuid"`
Component nvswitch.Component `bun:"component,notnull"`
BundleVersion string `bun:"bundle_version,notnull"`
Strategy Strategy `bun:"strategy,notnull"`
State UpdateState `bun:"state,notnull"`
VersionFrom string `bun:"version_from"`
VersionTo string `bun:"version_to,notnull"`
VersionActual string `bun:"version_actual"`
TaskURI string `bun:"task_uri"`
ErrorMessage string `bun:"error_message"`
// Sequencing fields for multi-component updates
BundleUpdateID *uuid.UUID `bun:"bundle_update_id,type:uuid"`
SequenceOrder int `bun:"sequence_order"`
PredecessorID *uuid.UUID `bun:"predecessor_id,type:uuid"`
// Async worker pool fields
ExecContext *ExecContext `bun:"exec_context,type:jsonb"`
LastCheckedAt *time.Time `bun:"last_checked_at"`
// Timestamps
CreatedAt time.Time `bun:"created_at,notnull,default:now()"`
UpdatedAt time.Time `bun:"updated_at,notnull,default:now()"`
}
FirmwareUpdateModel is the database model for firmware updates. This maps to the firmware_update table.
type InMemoryUpdateStore ¶
type InMemoryUpdateStore struct {
// contains filtered or unexported fields
}
InMemoryUpdateStore provides an in-memory implementation of UpdateStore. All data is lost when the process exits.
func NewInMemoryUpdateStore ¶
func NewInMemoryUpdateStore() *InMemoryUpdateStore
NewInMemoryUpdateStore creates a new in-memory update store.
func (*InMemoryUpdateStore) CancelRemainingInBundle ¶
func (s *InMemoryUpdateStore) CancelRemainingInBundle(ctx context.Context, bundleUpdateID uuid.UUID, afterSequence int, failedComponent nvswitch.Component) (int, error)
CancelRemainingInBundle cancels all QUEUED updates in a bundle that come after the specified sequence order.
func (*InMemoryUpdateStore) Get ¶
func (s *InMemoryUpdateStore) Get(ctx context.Context, id uuid.UUID) (*FirmwareUpdate, error)
Get retrieves a firmware update by ID.
func (*InMemoryUpdateStore) GetActive ¶
func (s *InMemoryUpdateStore) GetActive(ctx context.Context, switchUUID uuid.UUID, component nvswitch.Component) (*FirmwareUpdate, error)
GetActive returns the active (non-terminal) update for a switch/component pair.
func (*InMemoryUpdateStore) GetAll ¶
func (s *InMemoryUpdateStore) GetAll(ctx context.Context) ([]*FirmwareUpdate, error)
GetAll returns all firmware updates.
func (*InMemoryUpdateStore) GetAnyActiveForSwitch ¶
func (s *InMemoryUpdateStore) GetAnyActiveForSwitch(ctx context.Context, switchUUID uuid.UUID) (*FirmwareUpdate, error)
GetAnyActiveForSwitch returns any active (non-terminal) update for a switch.
func (*InMemoryUpdateStore) GetBySwitch ¶
func (s *InMemoryUpdateStore) GetBySwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
GetBySwitch returns all firmware updates for a given switch.
func (*InMemoryUpdateStore) GetPendingUpdates ¶
func (s *InMemoryUpdateStore) GetPendingUpdates(ctx context.Context, limit int) ([]*FirmwareUpdate, error)
GetPendingUpdates returns up to `limit` updates that need processing.
func (*InMemoryUpdateStore) Save ¶
func (s *InMemoryUpdateStore) Save(ctx context.Context, update *FirmwareUpdate) error
Save persists a firmware update (insert or update).
type OutcomeType ¶
type OutcomeType int
OutcomeType represents the type of outcome from executing a step.
const ( // OutcomeWait indicates the step is waiting for an async operation to complete. // The worker should save exec_context, update last_checked_at, and move on. OutcomeWait OutcomeType = iota // OutcomeTransition indicates the step completed and should transition to the next state. // The worker should clear exec_context and advance the state. OutcomeTransition // OutcomeFailed indicates the step failed with an error. // The worker should mark the update as FAILED and cancel dependent updates. OutcomeFailed )
func (OutcomeType) String ¶
func (o OutcomeType) String() string
String returns a human-readable representation of the outcome type.
type PostgresUpdateStore ¶
type PostgresUpdateStore struct {
// contains filtered or unexported fields
}
PostgresUpdateStore implements UpdateStore using PostgreSQL.
func NewPostgresUpdateStore ¶
func NewPostgresUpdateStore(db *bun.DB) *PostgresUpdateStore
NewPostgresUpdateStore creates a new PostgreSQL-backed update store.
func (*PostgresUpdateStore) CancelRemainingInBundle ¶
func (s *PostgresUpdateStore) CancelRemainingInBundle(ctx context.Context, bundleUpdateID uuid.UUID, afterSequence int, failedComponent nvswitch.Component) (int, error)
CancelRemainingInBundle cancels all QUEUED updates in a bundle that come after the specified sequence order. Used when a predecessor fails.
func (*PostgresUpdateStore) Get ¶
func (s *PostgresUpdateStore) Get(ctx context.Context, id uuid.UUID) (*FirmwareUpdate, error)
Get retrieves a firmware update by ID.
func (*PostgresUpdateStore) GetActive ¶
func (s *PostgresUpdateStore) GetActive(ctx context.Context, switchUUID uuid.UUID, component nvswitch.Component) (*FirmwareUpdate, error)
GetActive returns the active (non-terminal) update for a switch/component pair.
func (*PostgresUpdateStore) GetAll ¶
func (s *PostgresUpdateStore) GetAll(ctx context.Context) ([]*FirmwareUpdate, error)
GetAll returns all firmware updates.
func (*PostgresUpdateStore) GetAnyActiveForSwitch ¶
func (s *PostgresUpdateStore) GetAnyActiveForSwitch(ctx context.Context, switchUUID uuid.UUID) (*FirmwareUpdate, error)
GetAnyActiveForSwitch returns any active (non-terminal) update for a switch.
func (*PostgresUpdateStore) GetBySwitch ¶
func (s *PostgresUpdateStore) GetBySwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
GetBySwitch returns all firmware updates for a given switch.
func (*PostgresUpdateStore) GetPendingUpdates ¶
func (s *PostgresUpdateStore) GetPendingUpdates(ctx context.Context, limit int) ([]*FirmwareUpdate, error)
GetPendingUpdates returns up to `limit` updates that need processing. Returns both:
- QUEUED updates whose predecessor has completed (or has no predecessor)
- Active (non-terminal, non-queued) updates
Priority: QUEUED first (by sequence_order, created_at), then active (by oldest updated_at). This method does NOT modify any state - state transitions happen in the worker.
func (*PostgresUpdateStore) Save ¶
func (s *PostgresUpdateStore) Save(ctx context.Context, update *FirmwareUpdate) error
Save persists a firmware update (insert or update).
type RedfishStrategy ¶
type RedfishStrategy struct {
// contains filtered or unexported fields
}
RedfishStrategy implements firmware updates via Redfish API. Used for BMC/BIOS firmware updates.
Steps: UPLOAD -> POLL_COMPLETION -> VERIFY
func NewRedfishStrategy ¶
func NewRedfishStrategy(config *packages.RedfishConfig) *RedfishStrategy
NewRedfishStrategy creates a new Redfish update strategy.
func (*RedfishStrategy) ExecuteStep ¶
func (s *RedfishStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
ExecuteStep performs the work for a single state.
func (*RedfishStrategy) GetCurrentVersion ¶
func (s *RedfishStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
GetCurrentVersion queries the current firmware version via Redfish.
func (*RedfishStrategy) Name ¶
func (s *RedfishStrategy) Name() Strategy
Name returns the strategy type.
func (*RedfishStrategy) SetFirmwarePath ¶
func (s *RedfishStrategy) SetFirmwarePath(path string)
SetFirmwarePath sets the path to the firmware file for the current update.
func (*RedfishStrategy) Steps ¶
func (s *RedfishStrategy) Steps(update *FirmwareUpdate) []UpdateState
Steps returns the ordered sequence of states for Redfish updates.
type SSHStrategy ¶
type SSHStrategy struct {
// contains filtered or unexported fields
}
SSHStrategy implements firmware updates via SSH. Used for CPLD and NVOS firmware updates.
CPLD flow: WAIT_REACHABLE -> COPY -> UPLOAD -> INSTALL -> VERIFY -> CLEANUP NVOS flow (bundle): POWER_CYCLE -> WAIT_REACHABLE -> COPY -> UPLOAD -> INSTALL -> VERIFY -> CLEANUP NVOS flow (standalone): WAIT_REACHABLE -> COPY -> UPLOAD -> INSTALL -> VERIFY -> CLEANUP
func NewSSHStrategy ¶
func NewSSHStrategy(config *packages.SSHConfig) *SSHStrategy
NewSSHStrategy creates a new SSH update strategy.
func (*SSHStrategy) ExecuteStep ¶
func (s *SSHStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
ExecuteStep performs the work for a single state.
func (*SSHStrategy) GetCurrentVersion ¶
func (s *SSHStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
GetCurrentVersion queries the current firmware version via SSH.
func (*SSHStrategy) SetFirmwarePath ¶
func (s *SSHStrategy) SetFirmwarePath(path string)
SetFirmwarePath sets the path to the firmware file for the current update.
func (*SSHStrategy) Steps ¶
func (s *SSHStrategy) Steps(update *FirmwareUpdate) []UpdateState
Steps returns the ordered sequence of states for SSH updates. The steps vary based on component and whether this is a bundle update.
type ScriptStrategy ¶
type ScriptStrategy struct {
// contains filtered or unexported fields
}
ScriptStrategy implements firmware updates via external shell scripts. Scripts are specified per-component in the firmware bundle YAML.
Steps: INSTALL -> VERIFY
func NewScriptStrategy ¶
func NewScriptStrategy(config *packages.ScriptConfig) *ScriptStrategy
NewScriptStrategy creates a new script update strategy.
func (*ScriptStrategy) ExecuteStep ¶
func (s *ScriptStrategy) ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
ExecuteStep performs the work for a single state.
func (*ScriptStrategy) GetCurrentVersion ¶
func (s *ScriptStrategy) GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
GetCurrentVersion queries the current firmware version. For script strategy, we delegate to the appropriate method based on component.
func (*ScriptStrategy) Name ¶
func (s *ScriptStrategy) Name() Strategy
Name returns the strategy type.
func (*ScriptStrategy) SetFirmwarePath ¶
func (s *ScriptStrategy) SetFirmwarePath(path string)
SetFirmwarePath sets the path to the firmware file for the current update.
func (*ScriptStrategy) SetScriptArgs ¶
func (s *ScriptStrategy) SetScriptArgs(tokens []string)
SetScriptArgs sets the argument tokens for the script.
func (*ScriptStrategy) SetScriptName ¶
func (s *ScriptStrategy) SetScriptName(name string)
SetScriptName sets the script name to execute (from component definition).
func (*ScriptStrategy) Steps ¶
func (s *ScriptStrategy) Steps(update *FirmwareUpdate) []UpdateState
Steps returns the ordered sequence of states for Script updates.
type StepOutcome ¶
type StepOutcome struct {
// Type indicates what kind of outcome this is.
Type OutcomeType
// NextState is the state to transition to (only valid for OutcomeTransition).
NextState UpdateState
// ExecContext holds async state to persist (only valid for OutcomeWait).
ExecContext *ExecContext
// Error is the error that caused failure (only valid for OutcomeFailed).
Error error
}
StepOutcome represents the result of executing a single step in the update state machine. It uses an explicit outcome model to support async/non-blocking worker pools.
func Failed ¶
func Failed(err error) StepOutcome
Failed creates a StepOutcome indicating the step failed with an error. The update will be marked as FAILED and dependent updates will be cancelled.
func Transition ¶
func Transition(nextState UpdateState) StepOutcome
Transition creates a StepOutcome indicating successful transition to the next state. The exec context will be cleared and the state machine will advance.
func Wait ¶
func Wait(ctx *ExecContext) StepOutcome
Wait creates a StepOutcome indicating the step is waiting for an async operation. The exec context will be persisted and the worker will poll again after the interval.
type Strategy ¶
type Strategy string
Strategy represents the method used to perform a firmware update.
const ( // StrategyScript uses external shell scripts for updates (legacy). StrategyScript Strategy = "script" // StrategySSH uses direct SSH commands for CPLD/NVOS updates. StrategySSH Strategy = "ssh" // StrategyRedfish uses Redfish API for BMC/BIOS firmware updates. StrategyRedfish Strategy = "redfish" )
type StrategyFactory ¶
type StrategyFactory func(config interface{}) UpdateStrategy
StrategyFactory creates an UpdateStrategy for a given strategy type.
type UpdateState ¶
type UpdateState string
UpdateState represents the granular state of a firmware update operation.
const ( // Common states StateQueued UpdateState = "QUEUED" StateCompleted UpdateState = "COMPLETED" StateFailed UpdateState = "FAILED" StateCancelled UpdateState = "CANCELLED" // Cancelled due to predecessor failure // Shared states (used by multiple strategies) StateInstall UpdateState = "INSTALL" StateVerify UpdateState = "VERIFY" StateCleanup UpdateState = "CLEANUP" // NVOS-specific states (pre-update) StatePowerCycle UpdateState = "POWER_CYCLE" // Power cycle via BMC Redfish (bundle updates only) StateWaitReachable UpdateState = "WAIT_REACHABLE" // Wait for NVOS to be pingable // SSH-specific states StateCopy UpdateState = "COPY" // SCP file to switch StateUpload UpdateState = "UPLOAD" // nv action fetch // Redfish-specific states StatePollCompletion UpdateState = "POLL_COMPLETION" // Poll task until done )
func GetFirstState ¶
func GetFirstState(update *FirmwareUpdate) UpdateState
GetFirstState returns the initial execution state for a given firmware update. For SSH strategy, the first state depends on the component and whether it's a bundle update.
func (UpdateState) IsActive ¶
func (s UpdateState) IsActive() bool
IsActive returns true if the state indicates active processing.
func (UpdateState) IsTerminal ¶
func (s UpdateState) IsTerminal() bool
IsTerminal returns true if the state is a terminal state.
type UpdateStore ¶
type UpdateStore interface {
// Save persists a firmware update (insert or update).
Save(ctx context.Context, update *FirmwareUpdate) error
// Get retrieves a firmware update by ID.
Get(ctx context.Context, id uuid.UUID) (*FirmwareUpdate, error)
// GetAll returns all firmware updates.
GetAll(ctx context.Context) ([]*FirmwareUpdate, error)
// GetPendingUpdates returns up to `limit` updates that need processing.
// Priority:
// 1. QUEUED updates whose predecessor has completed (or has no predecessor)
// 2. Active (non-terminal, non-queued) updates, ordered by oldest updated_at first
//
// For QUEUED updates, the state is transitioned to the first active state before returning.
// This method is used by the scheduler to dispatch work to workers.
GetPendingUpdates(ctx context.Context, limit int) ([]*FirmwareUpdate, error)
// GetBySwitch returns all firmware updates for a given switch.
GetBySwitch(ctx context.Context, switchUUID uuid.UUID) ([]*FirmwareUpdate, error)
// GetActive returns the active (non-terminal) update for a switch/component pair.
// Returns nil, nil if no active update exists.
GetActive(ctx context.Context, switchUUID uuid.UUID, component nvswitch.Component) (*FirmwareUpdate, error)
// GetAnyActiveForSwitch returns any active (non-terminal) update for a switch.
// Returns nil, nil if no active update exists.
GetAnyActiveForSwitch(ctx context.Context, switchUUID uuid.UUID) (*FirmwareUpdate, error)
// CancelRemainingInBundle cancels all QUEUED updates in a bundle that come after
// the specified sequence order. Used when a predecessor fails.
// Returns the number of updates cancelled.
CancelRemainingInBundle(ctx context.Context, bundleUpdateID uuid.UUID, afterSequence int, failedComponent nvswitch.Component) (int, error)
// Delete removes a firmware update by ID.
Delete(ctx context.Context, id uuid.UUID) error
}
UpdateStore provides persistence for firmware update records.
type UpdateStrategy ¶
type UpdateStrategy interface {
// Name returns the strategy type.
Name() Strategy
// Steps returns the ordered sequence of states for this strategy.
// The executor will iterate through these states in order.
// The update parameter allows strategies to return different steps based on
// the component being updated and whether it's part of a bundle.
Steps(update *FirmwareUpdate) []UpdateState
// ExecuteStep performs the work for a single state in the update process.
// Returns a StepOutcome indicating what the worker should do next:
// - Wait: persist ExecContext and poll again after interval
// - Transition: advance to the next state
// - Failed: mark update as failed
ExecuteStep(ctx context.Context, update *FirmwareUpdate, tray *nvswitch.NVSwitchTray) StepOutcome
// GetCurrentVersion queries the current firmware version for a component.
// This is used to populate VersionFrom before the update starts.
GetCurrentVersion(ctx context.Context, tray *nvswitch.NVSwitchTray, component nvswitch.Component) (string, error)
}
UpdateStrategy defines the interface for a firmware update strategy. Each strategy (Script, SSH, Redfish) implements this interface with its own sequence of steps and execution logic.
type WorkItem ¶
type WorkItem struct {
Update *FirmwareUpdate
Done func() // Called when processing is complete
}
WorkItem represents a unit of work dispatched to a worker.
type WorkerPool ¶
type WorkerPool struct {
// contains filtered or unexported fields
}
WorkerPool manages concurrent firmware update execution using a scheduler-based model.
Architecture:
- Scheduler: Single goroutine that queries DB for pending updates and dispatches to workChan
- Workers: N goroutines that read from workChan and process updates
- Batch-and-Wait: Scheduler waits for all dispatched work to complete before next cycle
This ensures no two workers process the same update simultaneously (channel = natural mutex).
func NewWorkerPool ¶
func NewWorkerPool( numWorkers int, schedulerInterval time.Duration, store UpdateStore, nsmgr *nvswitchmanager.NVSwitchManager, packages *packages.Registry, ) *WorkerPool
NewWorkerPool creates a new scheduler-based worker pool.
func (*WorkerPool) ActiveJobCount ¶
func (p *WorkerPool) ActiveJobCount() int
ActiveJobCount returns the number of currently running jobs.
func (*WorkerPool) CancelJob ¶
func (p *WorkerPool) CancelJob(updateID uuid.UUID) bool
CancelJob cancels a specific running job.
func (*WorkerPool) IsJobActive ¶
func (p *WorkerPool) IsJobActive(updateID uuid.UUID) bool
IsJobActive returns true if the given job is currently being processed.
func (*WorkerPool) Start ¶
func (p *WorkerPool) Start()
Start launches the scheduler and worker goroutines.