Documentation
¶
Index ¶
- Constants
- Variables
- func DetectConfigFromConfigFile() bool
- func DetectConfigFromEnv() bool
- func HardwareInfoPatroni() func(ctx context.Context, state *agent.MetricsState) error
- func IsPatroniManagedParameter(name string) bool
- type Config
- type FailoverDetectedError
- type PatroniAdapter
- func (adapter *PatroniAdapter) ApplyConfig(proposedConfig *agent.ProposedConfigResponse) error
- func (adapter *PatroniAdapter) CheckForFailover(ctx context.Context) error
- func (adapter *PatroniAdapter) Collectors() []agent.MetricCollector
- func (adapter *PatroniAdapter) GetActiveConfig() (agent.ConfigArraySchema, error)
- func (adapter *PatroniAdapter) GetSystemInfo() ([]metrics.FlatValue, error)
- func (adapter *PatroniAdapter) Guardrails() *guardrails.Signal
- type PatroniClusterResponse
- type PatroniClusterStatus
- type PatroniMember
- type PatroniPatchRequest
- type State
- func (s *State) CancelOperations()
- func (s *State) ClearFailoverTime()
- func (s *State) CreateOperationsContext()
- func (s *State) GetLastFailoverTime() time.Time
- func (s *State) GetLastKnownPrimary() string
- func (s *State) GetOperationsContext() context.Context
- func (s *State) IsInRestartWindow() bool
- func (s *State) IsRecoveryContextCreated() bool
- func (s *State) SetInRestartWindow(inRestart bool)
- func (s *State) SetLastFailoverTime(t time.Time)
- func (s *State) SetLastKnownPrimary(primaryNode string)
- func (s *State) SetRecoveryContextCreated(created bool)
- func (s *State) TimeSinceLastFailover() time.Duration
- func (s *State) TimeSinceLastGuardrailCheck() time.Duration
- func (s *State) UpdateLastAppliedConfig()
- func (s *State) UpdateLastGuardrailCheck()
Constants ¶
const ( // NoPrimaryDetected is used as a placeholder when no primary node can be identified NoPrimaryDetected = "NO PRIMARY DETECTED" // FailoverStabilizationPeriod is the minimum time to wait after failover detection // before allowing operations to resume, even if cluster appears healthy. // This ensures: // - Patroni leader election is complete // - PostgreSQL promotion is fully finished (WAL application, timeline change) // - DCS state updates have propagated // Even after Patroni reports "running", PostgreSQL may still be completing promotion. // Testing with active load may require increasing this value beyond 30s. FailoverStabilizationPeriod = 30 * time.Second FailoverGracePeriod = 5 * time.Minute )
const DEFAULT_API_PORT = 8008
const DEFAULT_CONFIG_KEY = "patroni"
const PGInRecoveryQuery = "SELECT pg_is_in_recovery()"
Variables ¶
var PatroniManagedParameters = map[string]bool{ "hot_standby": true, "in_hot_standby": true, "primary_conninfo": true, "primary_slot_name": true, "recovery_min_apply_delay": true, "synchronous_standby_names": true, "transaction_read_only": true, "max_connections": true, "max_wal_senders": true, "wal_level": true, "max_replication_slots": true, "cluster_name": true, }
Functions ¶
func DetectConfigFromConfigFile ¶
func DetectConfigFromConfigFile() bool
func DetectConfigFromEnv ¶
func DetectConfigFromEnv() bool
func HardwareInfoPatroni ¶
func HardwareInfoPatroni() func(ctx context.Context, state *agent.MetricsState) error
Types ¶
type Config ¶
type Config struct {
NodeName string `mapstructure:"node_name" validate:"required"` // Required: one agent monitors one specific Patroni node
PatroniAPIURL string `mapstructure:"patroni_api_url" validate:"required"` // Required: Patroni REST API URL for this specific node (e.g., http://10.0.1.5:8008)
ClusterName string `mapstructure:"cluster_name"` // Optional: name of the Patroni cluster this node belongs to (informational)
}
Config represents the configuration for a Patroni adapter Important: Each agent instance monitors ONE specific Patroni node, not the entire cluster Even though Patroni operates as an HA cluster, the agent connects to individual nodes
func ConfigFromViper ¶
type FailoverDetectedError ¶
type FailoverDetectedError struct {
OldPrimary string
NewPrimary string
Message string
InStabilization bool // True if we're in the stabilization period (allows baseline config)
}
FailoverDetectedError is returned when a failover is detected during tuning. This signals that the tuning session should terminate gracefully.
func (*FailoverDetectedError) Error ¶
func (e *FailoverDetectedError) Error() string
type PatroniAdapter ¶
type PatroniAdapter struct {
agent.CommonAgent
PatroniConfig Config
PGDriver *pgxpool.Pool
GuardrailConfig guardrails.Config
PGVersion string
HTTPClient *http.Client
State *State
// contains filtered or unexported fields
}
func CreatePatroniAdapter ¶
func CreatePatroniAdapter() (*PatroniAdapter, error)
func (*PatroniAdapter) ApplyConfig ¶
func (adapter *PatroniAdapter) ApplyConfig(proposedConfig *agent.ProposedConfigResponse) error
func (*PatroniAdapter) CheckForFailover ¶
func (adapter *PatroniAdapter) CheckForFailover(ctx context.Context) error
CheckForFailover checks if a failover has occurred since last check. Returns FailoverDetectedError if primary changed or cluster unhealthy, nil otherwise.
func (*PatroniAdapter) Collectors ¶
func (adapter *PatroniAdapter) Collectors() []agent.MetricCollector
func (*PatroniAdapter) GetActiveConfig ¶
func (adapter *PatroniAdapter) GetActiveConfig() (agent.ConfigArraySchema, error)
func (*PatroniAdapter) GetSystemInfo ¶
func (adapter *PatroniAdapter) GetSystemInfo() ([]metrics.FlatValue, error)
func (*PatroniAdapter) Guardrails ¶
func (adapter *PatroniAdapter) Guardrails() *guardrails.Signal
type PatroniClusterResponse ¶
type PatroniClusterResponse struct {
Members []PatroniMember `json:"members"`
}
PatroniClusterResponse represents the Patroni API response structure for the /cluster endpoint.
type PatroniClusterStatus ¶
PatroniClusterStatus holds the essential state from the Patroni API.
type PatroniMember ¶
type PatroniMember struct {
Name string `json:"name"`
Role string `json:"role"`
Host string `json:"host"`
State string `json:"state"`
}
PatroniMember represents a member node in the Patroni cluster.
type PatroniPatchRequest ¶
type PatroniPatchRequest struct {
PostgreSQL struct {
Parameters map[string]any `json:"parameters"`
} `json:"postgresql"`
}
PatroniPatchRequest represents the structure for PATCH request to Patroni's /config endpoint
type State ¶
type State struct {
LastGuardrailCheck time.Time
LastAppliedConfig time.Time
// Failover detection
// LastKnownPrimary is the primary node name from the last check
// Used to detect when primary changes (failover)
LastKnownPrimary string
// LastFailoverTime tracks when the most recent failover was detected
// Used to trigger recovery mode and health checking
LastFailoverTime time.Time
// InRestartWindow tracks if we're in an intentional PostgreSQL restart
// Used to suppress failover notifications during planned restarts
InRestartWindow bool
// contains filtered or unexported fields
}
func (*State) CancelOperations ¶
func (s *State) CancelOperations()
CancelOperations cancels all in-flight PostgreSQL operations Called when failover is detected to immediately abort queries to old primary
func (*State) ClearFailoverTime ¶
func (s *State) ClearFailoverTime()
ClearFailoverTime resets the failover time (used after recovery completes)
func (*State) CreateOperationsContext ¶
func (s *State) CreateOperationsContext()
CreateOperationsContext creates a new cancellable context for PostgreSQL operations Should be called during initialization and after failover recovery completes
func (*State) GetLastFailoverTime ¶
GetLastFailoverTime safely gets the last failover time
func (*State) GetLastKnownPrimary ¶
GetLastKnownPrimary safely reads the last known primary node
func (*State) GetOperationsContext ¶
GetOperationsContext returns the current operations context This context should be used for all PostgreSQL queries/operations It will be cancelled when failover is detected, aborting in-flight operations
func (*State) IsInRestartWindow ¶
IsInRestartWindow returns true if we're currently in an intentional restart
func (*State) IsRecoveryContextCreated ¶
IsRecoveryContextCreated checks if we've already created a recovery context
func (*State) SetInRestartWindow ¶
SetInRestartWindow marks that we're in an intentional PostgreSQL restart This prevents failover notifications from being sent during planned restarts
func (*State) SetLastFailoverTime ¶
SetLastFailoverTime safely records when a failover was detected
func (*State) SetLastKnownPrimary ¶
SetLastKnownPrimary safely updates the last known primary node
func (*State) SetRecoveryContextCreated ¶
SetRecoveryContextCreated marks that we've created a recovery context
func (*State) TimeSinceLastFailover ¶
TimeSinceLastFailover safely returns duration since last failover Returns 0 if no failover has been recorded yet
func (*State) TimeSinceLastGuardrailCheck ¶
TimeSinceLastGuardrailCheck safely returns time since last guardrail check
func (*State) UpdateLastAppliedConfig ¶
func (s *State) UpdateLastAppliedConfig()
UpdateLastAppliedConfig safely updates the last applied config timestamp
func (*State) UpdateLastGuardrailCheck ¶
func (s *State) UpdateLastGuardrailCheck()
UpdateLastGuardrailCheck safely updates the last guardrail check timestamp