Documentation
¶
Overview ¶
Package sbdprotocol implements the SBD (Storage-Based Death) protocol message format for inter-node communication in cluster environments.
Package sbdprotocol implements node mapping management for SBD devices ¶
Package sbdprotocol implements node-to-slot mapping for SBD devices
Index ¶
- Constants
- Variables
- func CalculateChecksum(data []byte) uint32
- func GetFenceReasonName(reason uint8) string
- func GetMessageTypeName(msgType byte) string
- func IsEmptySlot(data []byte) bool
- func IsValidMessageType(msgType byte) bool
- func IsValidNodeID(nodeID uint16) bool
- func Marshal(msg SBDMessageHeader) ([]byte, error)
- func MarshalFence(msg SBDFenceMessage) ([]byte, error)
- func MarshalHeartbeat(msg SBDHeartbeatMessage) ([]byte, error)
- type NodeHasher
- type NodeManager
- func (nm *NodeManager) CleanupStaleNodes() ([]string, error)
- func (nm *NodeManager) Close() error
- func (nm *NodeManager) GetActiveNodes() []string
- func (nm *NodeManager) GetClusterName() string
- func (nm *NodeManager) GetCoordinationStrategy() string
- func (nm *NodeManager) GetLastSync() time.Time
- func (nm *NodeManager) GetNodeForNodeID(slotID uint16) (string, bool)
- func (nm *NodeManager) GetNodeIDForNode(nodeName string) (uint16, error)
- func (nm *NodeManager) GetStats() map[string]interface{}
- func (nm *NodeManager) IsDirty() bool
- func (nm *NodeManager) IsFileLockingEnabled() bool
- func (nm *NodeManager) LookupNodeIDForNode(nodeName string) (uint16, error)
- func (nm *NodeManager) ReadWithLock(operation string, fn func() error) error
- func (nm *NodeManager) ReloadFromDevice() error
- func (nm *NodeManager) RemoveNode(nodeName string) error
- func (nm *NodeManager) StartPeriodicSync() chan struct{}
- func (nm *NodeManager) Sync() error
- func (nm *NodeManager) ValidateIntegrity() error
- func (nm *NodeManager) WriteWithLock(operation string, fn func() error) error
- type NodeManagerConfig
- type NodeMapEntry
- type NodeMapTable
- func (table *NodeMapTable) AssignSlot(nodeName string, hasher *NodeHasher) (uint16, error)
- func (table *NodeMapTable) CleanupStaleNodes(maxAge time.Duration) []string
- func (table *NodeMapTable) GetActiveNodes(maxAge time.Duration) []string
- func (table *NodeMapTable) GetNodeForNodeID(nodeID uint16) (string, bool)
- func (table *NodeMapTable) GetNodeIDForNode(nodeName string) (uint16, bool)
- func (table *NodeMapTable) GetStaleNodes(maxAge time.Duration) []string
- func (table *NodeMapTable) GetStats() map[string]interface{}
- func (table *NodeMapTable) Marshal() ([]byte, error)
- func (table *NodeMapTable) RemoveNode(nodeName string) error
- func (table *NodeMapTable) UpdateLastSeen(nodeName string) error
- type PathProvider
- type SBDDevice
- type SBDFenceMessage
- type SBDHeartbeatMessage
- type SBDMessageHeader
Constants ¶
const ( // SBD_MAGIC is the magic string used to identify valid SBD messages SBD_MAGIC = "SBDMSG01" // SBD_HEADER_SIZE is the size in bytes of the SBD message header SBD_HEADER_SIZE = 33 // 8 + 2 + 1 + 2 + 8 + 8 + 4 = 33 bytes // SBD_SLOT_SIZE is the size in bytes of a single SBD slot on the device SBD_SLOT_SIZE = 512 // SBD_MAX_NODES is the maximum number of nodes supported in an SBD cluster SBD_MAX_NODES = 255 // SBD_MSG_TYPE_HEARTBEAT identifies heartbeat messages SBD_MSG_TYPE_HEARTBEAT byte = 0x01 // SBD_MSG_TYPE_FENCE identifies fence messages SBD_MSG_TYPE_FENCE byte = 0x02 )
SBD Protocol Constants
const ( // FENCE_REASON_NONE indicates no fencing is required FENCE_REASON_NONE uint8 = 0x00 // FENCE_REASON_HEARTBEAT_TIMEOUT indicates fencing due to missed heartbeats FENCE_REASON_HEARTBEAT_TIMEOUT uint8 = 0x01 // FENCE_REASON_MANUAL indicates manual fencing request FENCE_REASON_MANUAL uint8 = 0x02 // FENCE_REASON_SPLIT_BRAIN indicates fencing due to split-brain detection FENCE_REASON_SPLIT_BRAIN uint8 = 0x03 // FENCE_REASON_RESOURCE_CONFLICT indicates fencing due to resource conflicts FENCE_REASON_RESOURCE_CONFLICT uint8 = 0x04 )
SBD Fence Reason Constants
const ( // MaxAtomicRetries is the maximum number of retries for atomic operations MaxAtomicRetries = 5 // AtomicRetryDelay is the base delay between retries (will be randomized) AtomicRetryDelay = 100 * time.Millisecond // File locking constants // FileLockTimeout is the maximum time to wait for acquiring a file lock FileLockTimeout = 5 * time.Second // FileLockRetryInterval is the interval between file lock acquisition attempts FileLockRetryInterval = 100 * time.Millisecond )
Constants for atomic operations
const ( // SBD_NODE_MAP_FILE_SUFFIX is the suffix for the node mapping file // The node mapping is stored in a separate file alongside the SBD device // e.g., /dev/sbd0 -> /dev/sbd0.nodemap SBD_NODE_MAP_FILE_SUFFIX = agent.SharedStorageNodeMappingSuffix // SBD_NODE_MAP_MAGIC is the magic string for node mapping data SBD_NODE_MAP_MAGIC = "SBDNMAP1" // SBD_NODE_MAP_VERSION is the current version of the node mapping format SBD_NODE_MAP_VERSION = 1 // SBD_USABLE_SLOTS_START is the first slot available for node communication // All slots are now available since node mapping is stored in a separate file SBD_USABLE_SLOTS_START = 1 // SBD_USABLE_SLOTS_END is the last slot available for node communication SBD_USABLE_SLOTS_END = SBD_MAX_NODES // SBD_MAX_RETRIES is the maximum number of collision resolution attempts SBD_MAX_RETRIES = 10 )
SBD Node Mapping Constants
Variables ¶
var ( ErrVersionMismatch = errors.New("version mismatch during atomic update") ErrMaxRetriesExceeded = errors.New("maximum retry attempts exceeded") )
Error types for node manager operations
Functions ¶
func CalculateChecksum ¶
CalculateChecksum computes the CRC32 checksum of the provided data. This function uses the IEEE polynomial for CRC32 calculation, which is the standard for most applications.
Parameters:
- data: Byte slice for which to calculate the checksum
Returns:
- uint32: CRC32 checksum of the input data
func GetFenceReasonName ¶
GetFenceReasonName returns a human-readable name for the fence reason.
Parameters:
- reason: Fence reason code
Returns:
- string: Human-readable name for the fence reason
func GetMessageTypeName ¶
GetMessageTypeName returns a human-readable name for the message type.
Parameters:
- msgType: Message type byte
Returns:
- string: Human-readable name for the message type
func IsEmptySlot ¶
isEmptySlot checks if a slot contains only zeros or is uninitialized
func IsValidMessageType ¶
IsValidMessageType checks if the given message type is valid.
Parameters:
- msgType: Message type byte to validate
Returns:
- bool: True if the message type is valid, false otherwise
func IsValidNodeID ¶
IsValidNodeID checks if the given node ID is valid.
Parameters:
- nodeID: Node ID to validate
Returns:
- bool: True if the node ID is valid, false otherwise
func Marshal ¶
func Marshal(msg SBDMessageHeader) ([]byte, error)
Marshal serializes an SBDMessageHeader to a byte slice using binary encoding. It calculates and includes the CRC32 checksum of the message for validation.
The marshaled format uses little-endian byte order for consistency across different architectures.
Parameters:
- msg: The SBD message header to serialize
Returns:
- []byte: Serialized message as byte slice
- error: Error if marshaling fails
func MarshalFence ¶
func MarshalFence(msg SBDFenceMessage) ([]byte, error)
MarshalFence serializes a complete SBD fence message to a byte slice. It includes both the header and fence-specific fields.
Parameters:
- msg: The fence message to serialize
Returns:
- []byte: Serialized fence message
- error: Error if marshaling fails
func MarshalHeartbeat ¶
func MarshalHeartbeat(msg SBDHeartbeatMessage) ([]byte, error)
MarshalHeartbeat serializes a complete SBD heartbeat message to a byte slice.
Parameters:
- msg: The heartbeat message to serialize
Returns:
- []byte: Serialized heartbeat message
- error: Error if marshaling fails
Types ¶
type NodeHasher ¶
type NodeHasher struct {
// contains filtered or unexported fields
}
NodeHasher provides consistent hashing for node names
func NewNodeHasher ¶
func NewNodeHasher(clusterName string) *NodeHasher
NewNodeHasher creates a new node hasher with cluster-specific salt
func (*NodeHasher) CalculateNodeID ¶
func (h *NodeHasher) CalculateNodeID(nodeName string, attempt int) uint16
CalculateNodeID calculates the preferred node ID for a node based on its hash
func (*NodeHasher) HashNodeName ¶
func (h *NodeHasher) HashNodeName(nodeName string) uint32
HashNodeName generates a consistent hash for a node name
type NodeManager ¶
type NodeManager struct {
// contains filtered or unexported fields
}
NodeManager manages node-to-slot mappings with persistence to a separate file
func NewNodeManager ¶
func NewNodeManager(device SBDDevice, config NodeManagerConfig) (*NodeManager, error)
NewNodeManager creates a new node manager with the specified SBD device
func (*NodeManager) CleanupStaleNodes ¶
func (nm *NodeManager) CleanupStaleNodes() ([]string, error)
CleanupStaleNodes removes nodes that haven't been seen for the configured timeout
func (*NodeManager) Close ¶
func (nm *NodeManager) Close() error
Close closes the node manager and syncs any pending changes
func (*NodeManager) GetActiveNodes ¶
func (nm *NodeManager) GetActiveNodes() []string
GetActiveNodes returns a list of nodes that have been seen recently
func (*NodeManager) GetClusterName ¶
func (nm *NodeManager) GetClusterName() string
GetClusterName returns the cluster name
func (*NodeManager) GetCoordinationStrategy ¶
func (nm *NodeManager) GetCoordinationStrategy() string
GetCoordinationStrategy returns a string describing the coordination strategy being used
func (*NodeManager) GetLastSync ¶
func (nm *NodeManager) GetLastSync() time.Time
GetLastSync returns the timestamp of the last successful sync
func (*NodeManager) GetNodeForNodeID ¶
func (nm *NodeManager) GetNodeForNodeID(slotID uint16) (string, bool)
GetNodeForNodeID returns the node name for a given slot ID
func (*NodeManager) GetNodeIDForNode ¶
func (nm *NodeManager) GetNodeIDForNode(nodeName string) (uint16, error)
GetNodeIDForNode returns the node ID for a given node name, assigning one if necessary This is the main API for obtaining a node's ID for SBD protocol operations
func (*NodeManager) GetStats ¶
func (nm *NodeManager) GetStats() map[string]interface{}
GetStats returns statistics about the node mapping
func (*NodeManager) IsDirty ¶
func (nm *NodeManager) IsDirty() bool
IsDirty returns whether the mapping table has unsaved changes
func (*NodeManager) IsFileLockingEnabled ¶
func (nm *NodeManager) IsFileLockingEnabled() bool
IsFileLockingEnabled returns whether file locking is enabled
func (*NodeManager) LookupNodeIDForNode ¶
func (nm *NodeManager) LookupNodeIDForNode(nodeName string) (uint16, error)
func (*NodeManager) ReadWithLock ¶
func (nm *NodeManager) ReadWithLock(operation string, fn func() error) error
ReadWithLock executes a read operation using the same coordination strategy as writes. This ensures reads don't happen while writes are in progress, preventing corruption.
func (*NodeManager) ReloadFromDevice ¶
func (nm *NodeManager) ReloadFromDevice() error
ReloadFromDevice forces a reload of the node mapping table from the device
func (*NodeManager) RemoveNode ¶
func (nm *NodeManager) RemoveNode(nodeName string) error
RemoveNode removes a node from the mapping table
func (*NodeManager) StartPeriodicSync ¶
func (nm *NodeManager) StartPeriodicSync() chan struct{}
StartPeriodicSync starts a background goroutine that periodically syncs and cleans up
func (*NodeManager) Sync ¶
func (nm *NodeManager) Sync() error
Sync forces an immediate synchronization of the mapping table to the SBD device
func (*NodeManager) ValidateIntegrity ¶
func (nm *NodeManager) ValidateIntegrity() error
ValidateIntegrity checks the integrity of the node mapping
func (*NodeManager) WriteWithLock ¶
func (nm *NodeManager) WriteWithLock(operation string, fn func() error) error
WriteWithLock executes a write operation using the configured coordination strategy. When file locking is enabled and a device path is available, it uses POSIX file locking. When file locking is disabled or no device path is available, it uses randomized jitter to reduce write contention between nodes.
type NodeManagerConfig ¶
type NodeManagerConfig struct {
ClusterName string
SyncInterval time.Duration
StaleNodeTimeout time.Duration
Logger logr.Logger
// File locking configuration
FileLockingEnabled bool
}
NodeManagerConfig holds configuration for the node manager
func DefaultNodeManagerConfig ¶
func DefaultNodeManagerConfig() NodeManagerConfig
DefaultNodeManagerConfig returns a default configuration
type NodeMapEntry ¶
type NodeMapEntry struct {
NodeName string `json:"node_name"`
NodeID uint16 `json:"node_id"`
Hash uint32 `json:"hash"`
Timestamp time.Time `json:"timestamp"`
LastSeen time.Time `json:"last_seen"`
ClusterName string `json:"cluster_name,omitempty"`
}
NodeMapEntry represents a single entry in the node mapping table
type NodeMapTable ¶
type NodeMapTable struct {
Magic [8]byte `json:"-"`
Version uint64 `json:"version"`
ClusterName string `json:"cluster_name"`
Entries map[string]*NodeMapEntry `json:"entries"`
NodeUsage map[uint16]string `json:"node_usage"` // node_id -> node_name
LastUpdate time.Time `json:"last_update"`
Checksum uint32 `json:"-"`
// contains filtered or unexported fields
}
NodeMapTable represents the complete node-to-slot mapping table
func NewNodeMapTable ¶
func NewNodeMapTable(clusterName string) *NodeMapTable
NewNodeMapTable creates a new node mapping table
func UnmarshalNodeMapTable ¶
func UnmarshalNodeMapTable(data []byte) (*NodeMapTable, error)
Unmarshal deserializes bytes into a node mapping table
func (*NodeMapTable) AssignSlot ¶
func (table *NodeMapTable) AssignSlot(nodeName string, hasher *NodeHasher) (uint16, error)
AssignSlot assigns a slot to a node name using consistent hashing with collision detection
func (*NodeMapTable) CleanupStaleNodes ¶
func (table *NodeMapTable) CleanupStaleNodes(maxAge time.Duration) []string
CleanupStaleNodes removes nodes that haven't been seen for a specified duration
func (*NodeMapTable) GetActiveNodes ¶
func (table *NodeMapTable) GetActiveNodes(maxAge time.Duration) []string
GetActiveNodes returns a list of nodes that have been seen recently
func (*NodeMapTable) GetNodeForNodeID ¶
func (table *NodeMapTable) GetNodeForNodeID(nodeID uint16) (string, bool)
GetNodeForNodeID returns the node name for a given node ID
func (*NodeMapTable) GetNodeIDForNode ¶
func (table *NodeMapTable) GetNodeIDForNode(nodeName string) (uint16, bool)
GetNodeIDForNode returns the node ID for a given node name
func (*NodeMapTable) GetStaleNodes ¶
func (table *NodeMapTable) GetStaleNodes(maxAge time.Duration) []string
GetStaleNodes returns a list of nodes that haven't been seen recently
func (*NodeMapTable) GetStats ¶
func (table *NodeMapTable) GetStats() map[string]interface{}
GetStats returns statistics about the node mapping table
func (*NodeMapTable) Marshal ¶
func (table *NodeMapTable) Marshal() ([]byte, error)
Marshal serializes the node mapping table to bytes for storage
func (*NodeMapTable) RemoveNode ¶
func (table *NodeMapTable) RemoveNode(nodeName string) error
RemoveNode removes a node from the mapping table
func (*NodeMapTable) UpdateLastSeen ¶
func (table *NodeMapTable) UpdateLastSeen(nodeName string) error
UpdateLastSeen updates the last seen timestamp for a node
type PathProvider ¶
type PathProvider interface {
Path() string
}
PathProvider interface for getting device path (needed for file locking)
type SBDFenceMessage ¶
type SBDFenceMessage struct {
Header SBDMessageHeader
TargetNodeID uint16 // ID of the node to be fenced
Reason uint8 // Reason for fencing
NodeID uint16 // ID of the node that is fencing the target node
}
SBDFenceMessage represents a fence message in the SBD protocol. Fence messages are used to request that a specific node be fenced (shut down) due to various reasons such as missed heartbeats or resource conflicts.
func NewFence ¶
func NewFence(nodeID, targetNodeID uint16, sequence uint64, reason uint8) SBDFenceMessage
NewFence creates a new SBD fence message. It initializes all required fields for a fence request with the specified target and reason.
Parameters:
- nodeID: Unique identifier of the sending node
- targetNodeID: Unique identifier of the node to be fenced
- sequence: Incremental sequence number for message ordering
- reason: Reason code for the fencing request
Returns:
- SBDFenceMessage: Initialized fence message
func UnmarshalFence ¶
func UnmarshalFence(data []byte) (*SBDFenceMessage, error)
UnmarshalFence deserializes a byte slice into an SBD fence message.
Parameters:
- data: Byte slice containing the serialized fence message
Returns:
- *SBDFenceMessage: Pointer to the deserialized fence message
- error: Error if unmarshaling fails
type SBDHeartbeatMessage ¶
type SBDHeartbeatMessage struct {
Header SBDMessageHeader
}
SBDHeartbeatMessage represents a heartbeat message in the SBD protocol. Heartbeat messages are used to indicate that a node is alive and functioning. Currently, heartbeat messages only contain the header information.
func UnmarshalHeartbeat ¶
func UnmarshalHeartbeat(data []byte) (*SBDHeartbeatMessage, error)
UnmarshalHeartbeat deserializes a byte slice into an SBD heartbeat message.
Parameters:
- data: Byte slice containing the serialized heartbeat message
Returns:
- *SBDHeartbeatMessage: Pointer to the deserialized heartbeat message
- error: Error if unmarshaling fails
type SBDMessageHeader ¶
type SBDMessageHeader struct {
// Magic contains the magic string for message identification and validation
Magic [8]byte
// Version specifies the SBD protocol version (currently 1)
Version uint16
// Type identifies the message type (heartbeat, fence, etc.)
Type byte
// NodeID is the unique identifier of the sending node
NodeID uint16
// Timestamp contains the Unix timestamp (nanoseconds) when the message was created
Timestamp uint64
// Sequence is an incremental sequence number for message ordering
Sequence uint64
// Checksum contains the CRC32 checksum of the entire message for validation
Checksum uint32
}
SBDMessageHeader represents the common header for all SBD messages. This header is present in all SBD protocol messages and contains essential metadata for message validation, routing, and ordering.
func NewHeartbeat ¶
func NewHeartbeat(nodeID uint16, sequence uint64) SBDMessageHeader
NewHeartbeat creates a new SBD heartbeat message header. It initializes all required fields with appropriate values and sets the current timestamp.
Parameters:
- nodeID: Unique identifier of the sending node
- sequence: Incremental sequence number for message ordering
Returns:
- SBDMessageHeader: Initialized heartbeat message header
func Unmarshal ¶
func Unmarshal(data []byte) (*SBDMessageHeader, error)
Unmarshal deserializes a byte slice into an SBDMessageHeader using binary decoding. It validates the magic string and verifies the CRC32 checksum of the message.
Parameters:
- data: Byte slice containing the serialized message
Returns:
- *SBDMessageHeader: Pointer to the deserialized message header
- error: Error if unmarshaling fails or validation fails