resilience

package
v0.7.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2025 License: Apache-2.0 Imports: 12 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var BackoffPresets = struct {
	// Fast for low-latency networks
	Fast BackoffStrategy
	// Normal for typical networks
	Normal BackoffStrategy
	// Slow for high-latency networks
	Slow BackoffStrategy
	// Aggressive for time-sensitive operations
	Aggressive BackoffStrategy
	// Conservative for rate-limited operations
	Conservative BackoffStrategy
}{
	Fast: &ExponentialBackoff{
		InitialDelay: 50 * time.Millisecond,
		MaxDelay:     1 * time.Second,
		Multiplier:   1.5,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Normal: &ExponentialBackoff{
		InitialDelay: 100 * time.Millisecond,
		MaxDelay:     5 * time.Second,
		Multiplier:   2.0,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Slow: &ExponentialBackoff{
		InitialDelay: 500 * time.Millisecond,
		MaxDelay:     30 * time.Second,
		Multiplier:   2.0,
		MaxAttempts:  5,
		Jitter:       true,
	},
	Aggressive: &LinearBackoff{
		InitialDelay: 10 * time.Millisecond,
		Increment:    10 * time.Millisecond,
		MaxDelay:     500 * time.Millisecond,
		MaxAttempts:  10,
	},
	Conservative: &FibonacciBackoff{
		InitialDelay: 1 * time.Second,
		MaxDelay:     60 * time.Second,
		MaxAttempts:  8,
	},
}

BackoffPresets provides preset backoff strategies.

Functions

func IsRetryableError

func IsRetryableError(err error) bool

IsRetryable determines if an error is retryable.

Types

type AdaptiveBackoff

type AdaptiveBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
	SuccessRate  float64
	Multiplier   float64
}

AdaptiveBackoff adapts delay based on success rate.

func (*AdaptiveBackoff) NextDelay

func (ab *AdaptiveBackoff) NextDelay(attempt int) time.Duration

NextDelay returns adaptive delay based on success rate.

func (*AdaptiveBackoff) UpdateSuccessRate

func (ab *AdaptiveBackoff) UpdateSuccessRate(rate float64)

UpdateSuccessRate updates the success rate for adaptive backoff.

type BackoffFactory

type BackoffFactory struct{}

BackoffFactory creates backoff strategies.

func (BackoffFactory) NewAdaptiveBackoff

func (BackoffFactory) NewAdaptiveBackoff() BackoffStrategy

NewAdaptiveBackoff creates adaptive backoff.

func (BackoffFactory) NewConstantBackoff

func (BackoffFactory) NewConstantBackoff() BackoffStrategy

NewConstantBackoff creates constant backoff with defaults.

func (BackoffFactory) NewDecorrelatedJitterBackoff

func (BackoffFactory) NewDecorrelatedJitterBackoff() BackoffStrategy

NewDecorrelatedJitterBackoff creates decorrelated jitter backoff.

func (BackoffFactory) NewExponentialBackoff

func (BackoffFactory) NewExponentialBackoff() BackoffStrategy

NewExponentialBackoff creates exponential backoff with defaults.

func (BackoffFactory) NewFibonacciBackoff

func (BackoffFactory) NewFibonacciBackoff() BackoffStrategy

NewFibonacciBackoff creates Fibonacci backoff with defaults.

func (BackoffFactory) NewLinearBackoff

func (BackoffFactory) NewLinearBackoff() BackoffStrategy

NewLinearBackoff creates linear backoff with defaults.

type BackoffStrategy

type BackoffStrategy interface {
	// NextDelay returns the next delay duration for the given attempt
	// Returns 0 if max attempts reached
	NextDelay(attempt int) time.Duration
}

BackoffStrategy defines a backoff strategy for retries.

type CircuitBreakerConfig

type CircuitBreakerConfig struct {
	// Failure threshold before opening
	FailureThreshold int
	// Success threshold to close from half-open
	SuccessThreshold int
	// Timeout before attempting half-open
	Timeout time.Duration
	// Max requests in half-open state
	MaxHalfOpenRequests int
}

CircuitBreakerConfig contains circuit breaker configuration.

type CircuitBreakerManager

type CircuitBreakerManager struct {
	// contains filtered or unexported fields
}

CircuitBreakerManager manages circuit breakers for peer communications.

func NewCircuitBreakerManager

func NewCircuitBreakerManager(config CircuitBreakerConfig, logger forge.Logger) *CircuitBreakerManager

NewCircuitBreakerManager creates a new circuit breaker manager.

func (*CircuitBreakerManager) Execute

func (cbm *CircuitBreakerManager) Execute(
	ctx context.Context,
	peerID string,
	operation string,
	fn func() error,
) error

Execute executes an operation with circuit breaker protection.

func (*CircuitBreakerManager) GetAllPeerHealth

func (cbm *CircuitBreakerManager) GetAllPeerHealth() map[string]*PeerHealth

GetAllPeerHealth returns health for all peers.

func (*CircuitBreakerManager) GetOrCreateBreaker

func (cbm *CircuitBreakerManager) GetOrCreateBreaker(peerID string) *PeerCircuitBreaker

GetOrCreateBreaker gets or creates a circuit breaker for a peer.

func (*CircuitBreakerManager) GetPeerHealth

func (cbm *CircuitBreakerManager) GetPeerHealth(peerID string) *PeerHealth

GetPeerHealth returns health information for a peer.

func (*CircuitBreakerManager) GetState

func (cbm *CircuitBreakerManager) GetState(peerID string) CircuitState

GetState gets the circuit breaker state for a peer.

func (*CircuitBreakerManager) GetStatistics

func (cbm *CircuitBreakerManager) GetStatistics() CircuitBreakerStatistics

GetStatistics returns circuit breaker statistics.

func (*CircuitBreakerManager) GetUnhealthyPeers

func (cbm *CircuitBreakerManager) GetUnhealthyPeers() []string

GetUnhealthyPeers returns list of peers with open circuit breakers.

func (*CircuitBreakerManager) IsOpen

func (cbm *CircuitBreakerManager) IsOpen(peerID string) bool

IsOpen checks if circuit breaker is open for a peer.

func (*CircuitBreakerManager) MonitorHealth

func (cbm *CircuitBreakerManager) MonitorHealth(ctx context.Context, interval time.Duration)

MonitorHealth monitors circuit breaker health.

func (*CircuitBreakerManager) RemoveBreaker

func (cbm *CircuitBreakerManager) RemoveBreaker(peerID string)

RemoveBreaker removes a circuit breaker for a peer.

func (*CircuitBreakerManager) Reset

func (cbm *CircuitBreakerManager) Reset(peerID string)

Reset resets a circuit breaker for a peer.

func (*CircuitBreakerManager) ResetAll

func (cbm *CircuitBreakerManager) ResetAll()

ResetAll resets all circuit breakers.

func (*CircuitBreakerManager) TripBreaker

func (cbm *CircuitBreakerManager) TripBreaker(peerID string) error

TripBreaker manually trips (opens) a circuit breaker.

type CircuitBreakerStatistics

type CircuitBreakerStatistics struct {
	TotalBreakers    int
	OpenBreakers     int
	HalfOpenBreakers int
	ClosedBreakers   int
	TotalRequests    int64
	FailedRequests   int64
	RejectedRequests int64
}

CircuitBreakerStatistics contains circuit breaker statistics.

type CircuitState

type CircuitState string

CircuitState represents circuit breaker state.

const (
	// CircuitStateClosed circuit is closed (normal operation).
	CircuitStateClosed CircuitState = "closed"
	// CircuitStateOpen circuit is open (failing).
	CircuitStateOpen CircuitState = "open"
	// CircuitStateHalfOpen circuit is half-open (testing).
	CircuitStateHalfOpen CircuitState = "half-open"
)

func (CircuitState) String

func (cs CircuitState) String() string

String returns string representation of circuit state.

type ConstantBackoff

type ConstantBackoff struct {
	Delay       time.Duration
	MaxAttempts int
}

ConstantBackoff implements constant backoff strategy.

func (*ConstantBackoff) NextDelay

func (cb *ConstantBackoff) NextDelay(attempt int) time.Duration

NextDelay returns constant delay.

type DecorrelatedJitterBackoff

type DecorrelatedJitterBackoff struct {
	BaseDelay   time.Duration
	MaxDelay    time.Duration
	MaxAttempts int
	// contains filtered or unexported fields
}

DecorrelatedJitterBackoff implements AWS's decorrelated jitter backoff.

func (*DecorrelatedJitterBackoff) NextDelay

func (djb *DecorrelatedJitterBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with decorrelated jitter.

type ExponentialBackoff

type ExponentialBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	Multiplier   float64
	MaxAttempts  int
	Jitter       bool
}

ExponentialBackoff implements exponential backoff strategy.

func (*ExponentialBackoff) NextDelay

func (eb *ExponentialBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with exponential backoff.

type FailoverConfig

type FailoverConfig struct {
	// Enable automatic failover
	AutoFailoverEnabled bool
	// Health check interval
	HealthCheckInterval time.Duration
	// Failure threshold before failover
	FailureThreshold int
	// Cooldown period between failovers
	CooldownPeriod time.Duration
	// Prefer peers with higher priority
	UsePriority bool
}

FailoverConfig contains failover configuration.

type FailoverEvent

type FailoverEvent struct {
	Timestamp time.Time
	FromPeer  string
	ToPeer    string
	Reason    string
	Success   bool
	Duration  time.Duration
}

FailoverEvent represents a failover event.

type FailoverManager

type FailoverManager struct {
	// contains filtered or unexported fields
}

FailoverManager manages automatic failover between nodes.

func NewFailoverManager

func NewFailoverManager(config FailoverConfig, logger forge.Logger) *FailoverManager

NewFailoverManager creates a new failover manager.

func (*FailoverManager) AutoFailover

func (fm *FailoverManager) AutoFailover(
	ctx context.Context,
	fromPeer string,
	reason string,
	fn func(toPeer string) error,
	maxAttempts int,
) error

AutoFailover automatically fails over with retry.

func (*FailoverManager) Failover

func (fm *FailoverManager) Failover(
	ctx context.Context,
	fromPeer string,
	reason string,
	fn func(toPeer string) error,
) error

Failover performs failover from one peer to another.

func (*FailoverManager) GetAllPeerStatus

func (fm *FailoverManager) GetAllPeerStatus() map[string]*PeerStatus

GetAllPeerStatus returns status for all peers.

func (*FailoverManager) GetAvailablePeers

func (fm *FailoverManager) GetAvailablePeers() []string

GetAvailablePeers returns list of available peers.

func (*FailoverManager) GetFailoverHistory

func (fm *FailoverManager) GetFailoverHistory(limit int) []FailoverEvent

GetFailoverHistory returns failover history.

func (*FailoverManager) GetPeerStatus

func (fm *FailoverManager) GetPeerStatus(peerID string) (*PeerStatus, error)

GetPeerStatus returns status for a peer.

func (*FailoverManager) GetStatistics

func (fm *FailoverManager) GetStatistics() FailoverStatistics

GetStatistics returns failover statistics.

func (*FailoverManager) MarkPeerAvailable

func (fm *FailoverManager) MarkPeerAvailable(peerID string)

MarkPeerAvailable marks a peer as available.

func (*FailoverManager) MarkPeerUnavailable

func (fm *FailoverManager) MarkPeerUnavailable(peerID string, reason string)

MarkPeerUnavailable marks a peer as unavailable.

func (*FailoverManager) MonitorHealth

func (fm *FailoverManager) MonitorHealth(ctx context.Context)

MonitorHealth monitors peer health and triggers auto-failover.

func (*FailoverManager) RegisterPeer

func (fm *FailoverManager) RegisterPeer(peerID string, priority int)

RegisterPeer registers a peer for failover.

func (*FailoverManager) ResetPeerMetrics

func (fm *FailoverManager) ResetPeerMetrics(peerID string)

ResetPeerMetrics resets metrics for a peer.

func (*FailoverManager) SelectPeer

func (fm *FailoverManager) SelectPeer(excludePeers []string) (string, error)

SelectPeer selects the best available peer.

func (*FailoverManager) UnregisterPeer

func (fm *FailoverManager) UnregisterPeer(peerID string)

UnregisterPeer unregisters a peer.

func (*FailoverManager) UpdatePeerHealth

func (fm *FailoverManager) UpdatePeerHealth(peerID string, responseTime time.Duration, success bool)

UpdatePeerHealth updates peer health metrics.

type FailoverStatistics

type FailoverStatistics struct {
	TotalFailovers      int64
	SuccessfulFailovers int64
	FailedFailovers     int64
	AverageFailoverTime time.Duration
}

FailoverStatistics contains failover statistics.

type FibonacciBackoff

type FibonacciBackoff struct {
	InitialDelay time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
}

FibonacciBackoff implements Fibonacci backoff strategy.

func (*FibonacciBackoff) NextDelay

func (fb *FibonacciBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay using Fibonacci sequence.

type LinearBackoff

type LinearBackoff struct {
	InitialDelay time.Duration
	Increment    time.Duration
	MaxDelay     time.Duration
	MaxAttempts  int
}

LinearBackoff implements linear backoff strategy.

func (*LinearBackoff) NextDelay

func (lb *LinearBackoff) NextDelay(attempt int) time.Duration

NextDelay returns the next delay with linear backoff.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager provides resilience features for consensus operations.

func NewManager

func NewManager(config ManagerConfig, forgeLogger forge.Logger, metrics shared.Metrics) *Manager

NewManager creates a new resilience manager.

func (*Manager) ExecuteCommit

func (m *Manager) ExecuteCommit(ctx context.Context, fn func() error) error

ExecuteCommit executes a commit operation with retry logic.

func (*Manager) ExecuteRPC

func (m *Manager) ExecuteRPC(ctx context.Context, fn func() error) error

ExecuteRPC executes an RPC with retry logic.

func (*Manager) ExecuteRPCWithCircuitBreaker

func (m *Manager) ExecuteRPCWithCircuitBreaker(ctx context.Context, fn func() error) error

ExecuteRPCWithCircuitBreaker executes an RPC with circuit breaker and retry.

func (*Manager) ExecuteReplication

func (m *Manager) ExecuteReplication(ctx context.Context, fn func() error) error

ExecuteReplication executes a replication operation with circuit breaker.

func (*Manager) ExecuteSnapshot

func (m *Manager) ExecuteSnapshot(ctx context.Context, fn func() error) error

ExecuteSnapshot executes a snapshot operation with retry logic.

func (*Manager) ExecuteStorage

func (m *Manager) ExecuteStorage(ctx context.Context, fn func() error) error

ExecuteStorage executes a storage operation with circuit breaker.

func (*Manager) ExecuteWithTimeout

func (m *Manager) ExecuteWithTimeout(ctx context.Context, timeout time.Duration, fn func(context.Context) error) error

ExecuteWithTimeout executes a function with timeout.

func (*Manager) GetStats

func (m *Manager) GetStats() map[string]any

GetStats returns resilience statistics.

func (*Manager) IsRetryableError

func (m *Manager) IsRetryableError(err error) bool

IsRetryableError checks if an error is retryable.

func (*Manager) Reset

func (m *Manager) Reset()

Reset resets all resilience components.

type ManagerConfig

type ManagerConfig struct {
	// Retry configuration
	MaxRPCAttempts      int
	MaxCommitAttempts   int
	MaxSnapshotAttempts int
	RetryInitialDelay   time.Duration
	RetryMaxDelay       time.Duration

	// Circuit breaker configuration
	MaxFailures         int
	FailureThreshold    float64
	RecoveryTimeout     time.Duration
	HalfOpenMaxRequests int
}

ManagerConfig contains resilience manager configuration.

type OperationStats

type OperationStats struct {
	// contains filtered or unexported fields
}

OperationStats tracks operation timing statistics.

type PeerCircuitBreaker

type PeerCircuitBreaker struct {
	PeerID string
	// contains filtered or unexported fields
}

PeerCircuitBreaker wraps a circuit breaker with peer-specific state.

type PeerHealth

type PeerHealth struct {
	PeerID         string
	State          CircuitState
	Healthy        bool
	FailureCount   int
	SuccessCount   int
	TotalRequests  int64
	FailedRequests int64
	SuccessRate    float64
	LastFailure    time.Time
	LastSuccess    time.Time
}

PeerHealth contains peer health information.

type PeerStatus

type PeerStatus struct {
	PeerID        string
	Available     bool
	Priority      int
	LastSeen      time.Time
	LastFailover  time.Time
	FailoverCount int
	ResponseTime  time.Duration
	HealthScore   float64
}

PeerStatus represents peer status for failover.

type RetryEvent

type RetryEvent struct {
	Operation string
	Attempt   int
	Success   bool
	Error     error
	Duration  time.Duration
	Timestamp time.Time
}

RetryEvent represents a retry event.

type RetryManager

type RetryManager struct {
	// contains filtered or unexported fields
}

RetryManager manages retry logic for consensus operations.

func NewRetryManager

func NewRetryManager(config RetryManagerConfig, logger forge.Logger) *RetryManager

NewRetryManager creates a new retry manager.

func (*RetryManager) Close

func (rm *RetryManager) Close()

Close closes the retry manager.

func (*RetryManager) GetStatistics

func (rm *RetryManager) GetStatistics() RetryStatistics

GetStatistics returns retry statistics.

func (*RetryManager) RetryRPC

func (rm *RetryManager) RetryRPC(ctx context.Context, operation string, fn func() error) error

RetryRPC retries an RPC operation.

func (*RetryManager) RetryReplication

func (rm *RetryManager) RetryReplication(ctx context.Context, nodeID string, fn func() error) error

RetryReplication retries a replication operation.

func (*RetryManager) RetrySnapshot

func (rm *RetryManager) RetrySnapshot(ctx context.Context, operation string, fn func() error) error

RetrySnapshot retries a snapshot operation.

func (*RetryManager) RetryWithBackoff

func (rm *RetryManager) RetryWithBackoff(
	ctx context.Context,
	operation string,
	fn func() error,
	backoff BackoffStrategy,
) error

RetryWithBackoff retries with custom backoff.

type RetryManagerConfig

type RetryManagerConfig struct {
	NodeID string

	// RPC retry configuration
	RPCMaxRetries    int
	RPCRetryDelay    time.Duration
	RPCMaxRetryDelay time.Duration

	// Replication retry configuration
	ReplicaMaxRetries    int
	ReplicaRetryDelay    time.Duration
	ReplicaMaxRetryDelay time.Duration

	// Snapshot retry configuration
	SnapshotMaxRetries    int
	SnapshotRetryDelay    time.Duration
	SnapshotMaxRetryDelay time.Duration
}

RetryManagerConfig contains retry manager configuration.

type RetryStatistics

type RetryStatistics struct {
	RPCRetries         int64
	ReplicationRetries int64
	SnapshotRetries    int64
	TotalRetries       int64
	SuccessfulRetries  int64
	FailedRetries      int64
}

RetryStatistics contains retry statistics.

type TimeoutManager

type TimeoutManager struct {
	// contains filtered or unexported fields
}

TimeoutManager manages operation timeouts.

func NewTimeoutManager

func NewTimeoutManager(config TimeoutManagerConfig, logger forge.Logger) *TimeoutManager

NewTimeoutManager creates a new timeout manager.

func (*TimeoutManager) EnableAdaptive

func (tm *TimeoutManager) EnableAdaptive(enable bool)

EnableAdaptive enables or disables adaptive timeouts.

func (*TimeoutManager) GetStatistics

func (tm *TimeoutManager) GetStatistics() TimeoutStatistics

GetStatistics returns timeout statistics.

func (*TimeoutManager) GetTimeouts

func (tm *TimeoutManager) GetTimeouts() map[string]time.Duration

GetTimeouts returns current timeout values.

func (*TimeoutManager) IsAdaptiveEnabled

func (tm *TimeoutManager) IsAdaptiveEnabled() bool

IsAdaptiveEnabled returns whether adaptive timeouts are enabled.

func (*TimeoutManager) ResetStatistics

func (tm *TimeoutManager) ResetStatistics()

ResetStatistics resets operation statistics.

func (*TimeoutManager) SetTimeout

func (tm *TimeoutManager) SetTimeout(operation string, timeout time.Duration) error

SetTimeout sets a timeout value.

func (*TimeoutManager) WithCustomTimeout

func (tm *TimeoutManager) WithCustomTimeout(
	ctx context.Context,
	timeout time.Duration,
	fn func(context.Context) error,
) error

WithCustomTimeout executes operation with custom timeout.

func (*TimeoutManager) WithRPCTimeout

func (tm *TimeoutManager) WithRPCTimeout(ctx context.Context, fn func(context.Context) error) error

WithRPCTimeout executes operation with RPC timeout.

func (*TimeoutManager) WithReplicationTimeout

func (tm *TimeoutManager) WithReplicationTimeout(ctx context.Context, fn func(context.Context) error) error

WithReplicationTimeout executes operation with replication timeout.

func (*TimeoutManager) WithSnapshotTimeout

func (tm *TimeoutManager) WithSnapshotTimeout(ctx context.Context, fn func(context.Context) error) error

WithSnapshotTimeout executes operation with snapshot timeout.

type TimeoutManagerConfig

type TimeoutManagerConfig struct {
	NodeID string

	// Timeout values
	RPCTimeout         time.Duration
	ReplicationTimeout time.Duration
	SnapshotTimeout    time.Duration
	ElectionTimeout    time.Duration

	// Adaptive configuration
	AdaptiveEnabled bool
	MaxSamples      int
}

TimeoutManagerConfig contains timeout manager configuration.

type TimeoutStatistics

type TimeoutStatistics struct {
	RPCTimeout         time.Duration
	ReplicationTimeout time.Duration
	SnapshotTimeout    time.Duration
	ElectionTimeout    time.Duration
	AdaptiveEnabled    bool
	RPCAverage         time.Duration
	ReplicationAverage time.Duration
	RPCP99             time.Duration
	ReplicationP99     time.Duration
}

TimeoutStatistics contains timeout statistics.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL