resilience

package
v0.1.13 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 28, 2025 License: MIT Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var BackoffPresets = struct {
	// Fast for low-latency networks
	Fast BackoffStrategy
	// Normal for typical networks
	Normal BackoffStrategy
	// Slow for high-latency networks
	Slow BackoffStrategy
	// Aggressive for time-sensitive operations
	Aggressive BackoffStrategy
	// Conservative for rate-limited operations
	Conservative BackoffStrategy
}{
	Fast: &ExponentialBackoff{
		InitialDelay: 50 * time.Millisecond,
		MaxDelay:     1 * time.Second,
		Multiplier:   1.5,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Normal: &ExponentialBackoff{
		InitialDelay: 100 * time.Millisecond,
		MaxDelay:     5 * time.Second,
		Multiplier:   2.0,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Slow: &ExponentialBackoff{
		InitialDelay: 500 * time.Millisecond,
		MaxDelay:     30 * time.Second,
		Multiplier:   2.0,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Aggressive: &LinearBackoff{
		InitialDelay: 10 * time.Millisecond,
		Increment:    10 * time.Millisecond,
		MaxDelay:     500 * time.Millisecond,
		MaxAttempts:  10,
	},
	Conservative: &FibonacciBackoff{
		InitialDelay: 1 * time.Second,
		MaxDelay:     60 * time.Second,
		MaxAttempts:  8,
	},
}

BackoffPresets provides preset backoff strategies

Functions

func IsRetryableError

func IsRetryableError(err error) bool

IsRetryable determines if an error is retryable

Types

type AdaptiveBackoff

type AdaptiveBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
	SuccessRate  float64
	Multiplier   float64
}

AdaptiveBackoff adapts delay based on success rate

func (*AdaptiveBackoff) NextDelay

func (ab *AdaptiveBackoff) NextDelay(attempt int) time.Duration

NextDelay returns adaptive delay based on success rate

func (*AdaptiveBackoff) UpdateSuccessRate

func (ab *AdaptiveBackoff) UpdateSuccessRate(rate float64)

UpdateSuccessRate updates the success rate for adaptive backoff

type BackoffFactory

type BackoffFactory struct{}

BackoffFactory creates backoff strategies

func (BackoffFactory) NewAdaptiveBackoff

func (BackoffFactory) NewAdaptiveBackoff() BackoffStrategy

NewAdaptiveBackoff creates adaptive backoff

func (BackoffFactory) NewConstantBackoff

func (BackoffFactory) NewConstantBackoff() BackoffStrategy

NewConstantBackoff creates constant backoff with defaults

func (BackoffFactory) NewDecorrelatedJitterBackoff

func (BackoffFactory) NewDecorrelatedJitterBackoff() BackoffStrategy

NewDecorrelatedJitterBackoff creates decorrelated jitter backoff

func (BackoffFactory) NewExponentialBackoff

func (BackoffFactory) NewExponentialBackoff() BackoffStrategy

NewExponentialBackoff creates exponential backoff with defaults

func (BackoffFactory) NewFibonacciBackoff

func (BackoffFactory) NewFibonacciBackoff() BackoffStrategy

NewFibonacciBackoff creates Fibonacci backoff with defaults

func (BackoffFactory) NewLinearBackoff

func (BackoffFactory) NewLinearBackoff() BackoffStrategy

NewLinearBackoff creates linear backoff with defaults

type BackoffStrategy

type BackoffStrategy interface {
	// NextDelay returns the next delay duration for the given attempt
	// Returns 0 if max attempts reached
	NextDelay(attempt int) time.Duration
}

BackoffStrategy defines a backoff strategy for retries

type CircuitBreakerConfig

type CircuitBreakerConfig struct {
	// Failure threshold before opening
	FailureThreshold int
	// Success threshold to close from half-open
	SuccessThreshold int
	// Timeout before attempting half-open
	Timeout time.Duration
	// Max requests in half-open state
	MaxHalfOpenRequests int
}

CircuitBreakerConfig contains circuit breaker configuration

type CircuitBreakerManager

type CircuitBreakerManager struct {
	// contains filtered or unexported fields
}

CircuitBreakerManager manages circuit breakers for peer communications

func NewCircuitBreakerManager

func NewCircuitBreakerManager(config CircuitBreakerConfig, logger forge.Logger) *CircuitBreakerManager

NewCircuitBreakerManager creates a new circuit breaker manager

func (*CircuitBreakerManager) Execute

func (cbm *CircuitBreakerManager) Execute(
	ctx context.Context,
	peerID string,
	operation string,
	fn func() error,
) error

Execute executes an operation with circuit breaker protection

func (*CircuitBreakerManager) GetAllPeerHealth

func (cbm *CircuitBreakerManager) GetAllPeerHealth() map[string]*PeerHealth

GetAllPeerHealth returns health for all peers

func (*CircuitBreakerManager) GetOrCreateBreaker

func (cbm *CircuitBreakerManager) GetOrCreateBreaker(peerID string) *PeerCircuitBreaker

GetOrCreateBreaker gets or creates a circuit breaker for a peer

func (*CircuitBreakerManager) GetPeerHealth

func (cbm *CircuitBreakerManager) GetPeerHealth(peerID string) *PeerHealth

GetPeerHealth returns health information for a peer

func (*CircuitBreakerManager) GetState

func (cbm *CircuitBreakerManager) GetState(peerID string) CircuitState

GetState gets the circuit breaker state for a peer

func (*CircuitBreakerManager) GetStatistics

func (cbm *CircuitBreakerManager) GetStatistics() CircuitBreakerStatistics

GetStatistics returns circuit breaker statistics

func (*CircuitBreakerManager) GetUnhealthyPeers

func (cbm *CircuitBreakerManager) GetUnhealthyPeers() []string

GetUnhealthyPeers returns list of peers with open circuit breakers

func (*CircuitBreakerManager) IsOpen

func (cbm *CircuitBreakerManager) IsOpen(peerID string) bool

IsOpen checks if circuit breaker is open for a peer

func (*CircuitBreakerManager) MonitorHealth

func (cbm *CircuitBreakerManager) MonitorHealth(ctx context.Context, interval time.Duration)

MonitorHealth monitors circuit breaker health

func (*CircuitBreakerManager) RemoveBreaker

func (cbm *CircuitBreakerManager) RemoveBreaker(peerID string)

RemoveBreaker removes a circuit breaker for a peer

func (*CircuitBreakerManager) Reset

func (cbm *CircuitBreakerManager) Reset(peerID string)

Reset resets a circuit breaker for a peer

func (*CircuitBreakerManager) ResetAll

func (cbm *CircuitBreakerManager) ResetAll()

ResetAll resets all circuit breakers

func (*CircuitBreakerManager) TripBreaker

func (cbm *CircuitBreakerManager) TripBreaker(peerID string) error

TripBreaker manually trips (opens) a circuit breaker

type CircuitBreakerStatistics

type CircuitBreakerStatistics struct {
	TotalBreakers    int
	OpenBreakers     int
	HalfOpenBreakers int
	ClosedBreakers   int
	TotalRequests    int64
	FailedRequests   int64
	RejectedRequests int64
}

CircuitBreakerStatistics contains circuit breaker statistics

type CircuitState

type CircuitState string

CircuitState represents circuit breaker state

const (
	// CircuitStateClosed circuit is closed (normal operation)
	CircuitStateClosed CircuitState = "closed"
	// CircuitStateOpen circuit is open (failing)
	CircuitStateOpen CircuitState = "open"
	// CircuitStateHalfOpen circuit is half-open (testing)
	CircuitStateHalfOpen CircuitState = "half-open"
)

func (CircuitState) String

func (cs CircuitState) String() string

String returns string representation of circuit state

type ConstantBackoff

type ConstantBackoff struct {
	Delay       time.Duration
	MaxAttempts int
}

ConstantBackoff implements constant backoff strategy

func (*ConstantBackoff) NextDelay

func (cb *ConstantBackoff) NextDelay(attempt int) time.Duration

NextDelay returns constant delay

type DecorrelatedJitterBackoff

type DecorrelatedJitterBackoff struct {
	BaseDelay   time.Duration
	MaxDelay    time.Duration
	MaxAttempts int
	// contains filtered or unexported fields
}

DecorrelatedJitterBackoff implements AWS's decorrelated jitter backoff

func (*DecorrelatedJitterBackoff) NextDelay

func (djb *DecorrelatedJitterBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with decorrelated jitter

type ExponentialBackoff

type ExponentialBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	Multiplier   float64
	MaxAttempts  int
	Jitter       bool
}

ExponentialBackoff implements exponential backoff strategy

func (*ExponentialBackoff) NextDelay

func (eb *ExponentialBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with exponential backoff

type FailoverConfig

type FailoverConfig struct {
	// Enable automatic failover
	AutoFailoverEnabled bool
	// Health check interval
	HealthCheckInterval time.Duration
	// Failure threshold before failover
	FailureThreshold int
	// Cooldown period between failovers
	CooldownPeriod time.Duration
	// Prefer peers with higher priority
	UsePriority bool
}

FailoverConfig contains failover configuration

type FailoverEvent

type FailoverEvent struct {
	Timestamp time.Time
	FromPeer  string
	ToPeer    string
	Reason    string
	Success   bool
	Duration  time.Duration
}

FailoverEvent represents a failover event

type FailoverManager

type FailoverManager struct {
	// contains filtered or unexported fields
}

FailoverManager manages automatic failover between nodes

func NewFailoverManager

func NewFailoverManager(config FailoverConfig, logger forge.Logger) *FailoverManager

NewFailoverManager creates a new failover manager

func (*FailoverManager) AutoFailover

func (fm *FailoverManager) AutoFailover(
	ctx context.Context,
	fromPeer string,
	reason string,
	fn func(toPeer string) error,
	maxAttempts int,
) error

AutoFailover automatically fails over with retry

func (*FailoverManager) Failover

func (fm *FailoverManager) Failover(
	ctx context.Context,
	fromPeer string,
	reason string,
	fn func(toPeer string) error,
) error

Failover performs failover from one peer to another

func (*FailoverManager) GetAllPeerStatus

func (fm *FailoverManager) GetAllPeerStatus() map[string]*PeerStatus

GetAllPeerStatus returns status for all peers

func (*FailoverManager) GetAvailablePeers

func (fm *FailoverManager) GetAvailablePeers() []string

GetAvailablePeers returns list of available peers

func (*FailoverManager) GetFailoverHistory

func (fm *FailoverManager) GetFailoverHistory(limit int) []FailoverEvent

GetFailoverHistory returns failover history

func (*FailoverManager) GetPeerStatus

func (fm *FailoverManager) GetPeerStatus(peerID string) (*PeerStatus, error)

GetPeerStatus returns status for a peer

func (*FailoverManager) GetStatistics

func (fm *FailoverManager) GetStatistics() FailoverStatistics

GetStatistics returns failover statistics

func (*FailoverManager) MarkPeerAvailable

func (fm *FailoverManager) MarkPeerAvailable(peerID string)

MarkPeerAvailable marks a peer as available

func (*FailoverManager) MarkPeerUnavailable

func (fm *FailoverManager) MarkPeerUnavailable(peerID string, reason string)

MarkPeerUnavailable marks a peer as unavailable

func (*FailoverManager) MonitorHealth

func (fm *FailoverManager) MonitorHealth(ctx context.Context)

MonitorHealth monitors peer health and triggers auto-failover

func (*FailoverManager) RegisterPeer

func (fm *FailoverManager) RegisterPeer(peerID string, priority int)

RegisterPeer registers a peer for failover

func (*FailoverManager) ResetPeerMetrics

func (fm *FailoverManager) ResetPeerMetrics(peerID string)

ResetPeerMetrics resets metrics for a peer

func (*FailoverManager) SelectPeer

func (fm *FailoverManager) SelectPeer(excludePeers []string) (string, error)

SelectPeer selects the best available peer

func (*FailoverManager) UnregisterPeer

func (fm *FailoverManager) UnregisterPeer(peerID string)

UnregisterPeer unregisters a peer

func (*FailoverManager) UpdatePeerHealth

func (fm *FailoverManager) UpdatePeerHealth(peerID string, responseTime time.Duration, success bool)

UpdatePeerHealth updates peer health metrics

type FailoverStatistics

type FailoverStatistics struct {
	TotalFailovers      int64
	SuccessfulFailovers int64
	FailedFailovers     int64
	AverageFailoverTime time.Duration
}

FailoverStatistics contains failover statistics

type FibonacciBackoff

type FibonacciBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
}

FibonacciBackoff implements Fibonacci backoff strategy

func (*FibonacciBackoff) NextDelay

func (fb *FibonacciBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay using Fibonacci sequence

type LinearBackoff

type LinearBackoff struct {
	InitialDelay time.Duration
	Increment    time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
}

LinearBackoff implements linear backoff strategy

func (*LinearBackoff) NextDelay

func (lb *LinearBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with linear backoff

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager provides resilience features for consensus operations

func NewManager

func NewManager(config ManagerConfig, forgeLogger forge.Logger, metrics shared.Metrics) *Manager

NewManager creates a new resilience manager

func (*Manager) ExecuteCommit

func (m *Manager) ExecuteCommit(ctx context.Context, fn func() error) error

ExecuteCommit executes a commit operation with retry logic

func (*Manager) ExecuteRPC

func (m *Manager) ExecuteRPC(ctx context.Context, fn func() error) error

ExecuteRPC executes an RPC with retry logic

func (*Manager) ExecuteRPCWithCircuitBreaker

func (m *Manager) ExecuteRPCWithCircuitBreaker(ctx context.Context, fn func() error) error

ExecuteRPCWithCircuitBreaker executes an RPC with circuit breaker and retry

func (*Manager) ExecuteReplication

func (m *Manager) ExecuteReplication(ctx context.Context, fn func() error) error

ExecuteReplication executes a replication operation with circuit breaker

func (*Manager) ExecuteSnapshot

func (m *Manager) ExecuteSnapshot(ctx context.Context, fn func() error) error

ExecuteSnapshot executes a snapshot operation with retry logic

func (*Manager) ExecuteStorage

func (m *Manager) ExecuteStorage(ctx context.Context, fn func() error) error

ExecuteStorage executes a storage operation with circuit breaker

func (*Manager) ExecuteWithTimeout

func (m *Manager) ExecuteWithTimeout(ctx context.Context, timeout time.Duration, fn func(context.Context) error) error

ExecuteWithTimeout executes a function with timeout

func (*Manager) GetStats

func (m *Manager) GetStats() map[string]interface{}

GetStats returns resilience statistics

func (*Manager) IsRetryableError

func (m *Manager) IsRetryableError(err error) bool

IsRetryableError checks if an error is retryable

func (*Manager) Reset

func (m *Manager) Reset()

Reset resets all resilience components

type ManagerConfig

type ManagerConfig struct {
	// Retry configuration
	MaxRPCAttempts      int
	MaxCommitAttempts   int
	MaxSnapshotAttempts int
	RetryInitialDelay   time.Duration
	RetryMaxDelay       time.Duration

	// Circuit breaker configuration
	MaxFailures         int
	FailureThreshold    float64
	RecoveryTimeout     time.Duration
	HalfOpenMaxRequests int
}

ManagerConfig contains resilience manager configuration

type OperationStats

type OperationStats struct {
	// contains filtered or unexported fields
}

OperationStats tracks operation timing statistics

type PeerCircuitBreaker

type PeerCircuitBreaker struct {
	PeerID string
	// contains filtered or unexported fields
}

PeerCircuitBreaker wraps a circuit breaker with peer-specific state

type PeerHealth

type PeerHealth struct {
	PeerID         string
	State          CircuitState
	Healthy        bool
	FailureCount   int
	SuccessCount   int
	TotalRequests  int64
	FailedRequests int64
	SuccessRate    float64
	LastFailure    time.Time
	LastSuccess    time.Time
}

PeerHealth contains peer health information

type PeerStatus

type PeerStatus struct {
	PeerID        string
	Available     bool
	Priority      int
	LastSeen      time.Time
	LastFailover  time.Time
	FailoverCount int
	ResponseTime  time.Duration
	HealthScore   float64
}

PeerStatus represents peer status for failover

type RetryEvent

type RetryEvent struct {
	Operation string
	Attempt   int
	Success   bool
	Error     error
	Duration  time.Duration
	Timestamp time.Time
}

RetryEvent represents a retry event

type RetryManager

type RetryManager struct {
	// contains filtered or unexported fields
}

RetryManager manages retry logic for consensus operations

func NewRetryManager

func NewRetryManager(config RetryManagerConfig, logger forge.Logger) *RetryManager

NewRetryManager creates a new retry manager

func (*RetryManager) Close

func (rm *RetryManager) Close()

Close closes the retry manager

func (*RetryManager) GetStatistics

func (rm *RetryManager) GetStatistics() RetryStatistics

GetStatistics returns retry statistics

func (*RetryManager) RetryRPC

func (rm *RetryManager) RetryRPC(ctx context.Context, operation string, fn func() error) error

RetryRPC retries an RPC operation

func (*RetryManager) RetryReplication

func (rm *RetryManager) RetryReplication(ctx context.Context, nodeID string, fn func() error) error

RetryReplication retries a replication operation

func (*RetryManager) RetrySnapshot

func (rm *RetryManager) RetrySnapshot(ctx context.Context, operation string, fn func() error) error

RetrySnapshot retries a snapshot operation

func (*RetryManager) RetryWithBackoff

func (rm *RetryManager) RetryWithBackoff(
	ctx context.Context,
	operation string,
	fn func() error,
	backoff BackoffStrategy,
) error

RetryWithBackoff retries with custom backoff

type RetryManagerConfig

type RetryManagerConfig struct {
	NodeID string

	// RPC retry configuration
	RPCMaxRetries    int
	RPCRetryDelay    time.Duration
	RPCMaxRetryDelay time.Duration

	// Replication retry configuration
	ReplicaMaxRetries    int
	ReplicaRetryDelay    time.Duration
	ReplicaMaxRetryDelay time.Duration

	// Snapshot retry configuration
	SnapshotMaxRetries    int
	SnapshotRetryDelay    time.Duration
	SnapshotMaxRetryDelay time.Duration
}

RetryManagerConfig contains retry manager configuration

type RetryStatistics

type RetryStatistics struct {
	RPCRetries         int64
	ReplicationRetries int64
	SnapshotRetries    int64
	TotalRetries       int64
	SuccessfulRetries  int64
	FailedRetries      int64
}

RetryStatistics contains retry statistics

type TimeoutManager

type TimeoutManager struct {
	// contains filtered or unexported fields
}

TimeoutManager manages operation timeouts

func NewTimeoutManager

func NewTimeoutManager(config TimeoutManagerConfig, logger forge.Logger) *TimeoutManager

NewTimeoutManager creates a new timeout manager

func (*TimeoutManager) EnableAdaptive

func (tm *TimeoutManager) EnableAdaptive(enable bool)

EnableAdaptive enables or disables adaptive timeouts

func (*TimeoutManager) GetStatistics

func (tm *TimeoutManager) GetStatistics() TimeoutStatistics

GetStatistics returns timeout statistics

func (*TimeoutManager) GetTimeouts

func (tm *TimeoutManager) GetTimeouts() map[string]time.Duration

GetTimeouts returns current timeout values

func (*TimeoutManager) IsAdaptiveEnabled

func (tm *TimeoutManager) IsAdaptiveEnabled() bool

IsAdaptiveEnabled returns whether adaptive timeouts are enabled

func (*TimeoutManager) ResetStatistics

func (tm *TimeoutManager) ResetStatistics()

ResetStatistics resets operation statistics

func (*TimeoutManager) SetTimeout

func (tm *TimeoutManager) SetTimeout(operation string, timeout time.Duration) error

SetTimeout sets a timeout value

func (*TimeoutManager) WithCustomTimeout

func (tm *TimeoutManager) WithCustomTimeout(
	ctx context.Context,
	timeout time.Duration,
	fn func(context.Context) error,
) error

WithCustomTimeout executes operation with custom timeout

func (*TimeoutManager) WithRPCTimeout

func (tm *TimeoutManager) WithRPCTimeout(ctx context.Context, fn func(context.Context) error) error

WithRPCTimeout executes operation with RPC timeout

func (*TimeoutManager) WithReplicationTimeout

func (tm *TimeoutManager) WithReplicationTimeout(ctx context.Context, fn func(context.Context) error) error

WithReplicationTimeout executes operation with replication timeout

func (*TimeoutManager) WithSnapshotTimeout

func (tm *TimeoutManager) WithSnapshotTimeout(ctx context.Context, fn func(context.Context) error) error

WithSnapshotTimeout executes operation with snapshot timeout

type TimeoutManagerConfig

type TimeoutManagerConfig struct {
	NodeID string

	// Timeout values
	RPCTimeout         time.Duration
	ReplicationTimeout time.Duration
	SnapshotTimeout    time.Duration
	ElectionTimeout    time.Duration

	// Adaptive configuration
	AdaptiveEnabled bool
	MaxSamples      int
}

TimeoutManagerConfig contains timeout manager configuration

type TimeoutStatistics

type TimeoutStatistics struct {
	RPCTimeout         time.Duration
	ReplicationTimeout time.Duration
	SnapshotTimeout    time.Duration
	ElectionTimeout    time.Duration
	AdaptiveEnabled    bool
	RPCAverage         time.Duration
	ReplicationAverage time.Duration
	RPCP99             time.Duration
	ReplicationP99     time.Duration
}

TimeoutStatistics contains timeout statistics

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL