Documentation
¶
Index ¶
- Constants
- Variables
- func InitializeRotationMetrics()
- func LoadCertificateExpiry(certPath string) (notBefore, notAfter time.Time, err error)
- func RecordRetry(certType CertificateType)
- func RecordRotation(certType CertificateType, duration time.Duration, success bool)
- func UpdateExpiryMetric(certType CertificateType, notAfter time.Time)
- type CAManager
- type CRLRefresher
- type CertificateInfo
- type CertificateRenewer
- type CertificateType
- type CertificateValidator
- type HealthValidator
- type RotationCallback
- type RotationEvent
- type RotationHistory
- type RotationScheduler
Constants ¶
const ( // CheckInterval controls how often certificate expiry is checked CheckInterval = 1 * time.Hour )
Variables ¶
var ( // CertificateExpirationTimestamp exports Unix timestamp when each certificate expires. // Labels: cert_type (nats, api-server, api-client) CertificateExpirationTimestamp = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "certificate_expiration_timestamp_seconds", Help: "Unix timestamp when certificate expires (NotAfter)", }, []string{"cert_type"}) // CertificateRotationTotal counts successful and failed certificate rotations. // Labels: cert_type, result (success, failure) CertificateRotationTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "certificate_rotation_total", Help: "Total number of certificate rotation attempts by result", }, []string{"cert_type", "result"}) // CertificateRotationDuration tracks rotation duration in seconds. // Labels: cert_type // Buckets: 0.1s to 30s (rotation should be fast, < 5s typical) CertificateRotationDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "certificate_rotation_duration_seconds", Help: "Certificate rotation duration in seconds", Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30}, }, []string{"cert_type"}) // CertificateLastRotationTimestamp exports Unix timestamp of last rotation attempt. // Labels: cert_type CertificateLastRotationTimestamp = prometheus.NewGaugeVec(prometheus.GaugeOpts{ Name: "certificate_last_rotation_timestamp_seconds", Help: "Unix timestamp of last rotation attempt (success or failure)", }, []string{"cert_type"}) // CertificateRotationRetryCount counts rotation retries (after initial failure). // Labels: cert_type CertificateRotationRetryCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "certificate_rotation_retry_total", Help: "Total number of certificate rotation retries", }, []string{"cert_type"}) )
Functions ¶
func InitializeRotationMetrics ¶
func InitializeRotationMetrics()
InitializeRotationMetrics registers all rotation metrics with Prometheus. Called during controller startup via pkg/metrics/metrics.go InitializeCollector().
func LoadCertificateExpiry ¶
LoadCertificateExpiry reads a certificate file and returns its expiry time. Used by rotation scheduler and history tracker.
func RecordRetry ¶
func RecordRetry(certType CertificateType)
RecordRetry increments the retry counter for a certificate type. Called when rotation fails and is retried.
func RecordRotation ¶
func RecordRotation(certType CertificateType, duration time.Duration, success bool)
RecordRotation records a rotation attempt with duration and success/failure. certType identifies which certificate was rotated. duration is how long the rotation took. success indicates whether rotation succeeded (true) or failed (false).
func UpdateExpiryMetric ¶
func UpdateExpiryMetric(certType CertificateType, notAfter time.Time)
UpdateExpiryMetric sets the certificate expiration timestamp gauge. certType identifies which certificate (NATS, API Server, API Client). notAfter is the certificate's NotAfter field (expiry time).
Types ¶
type CAManager ¶
type CAManager interface {
// HandleCertificateRequest validates and signs CSRs from a node.
// Returns map of CertificateType to PEM-encoded signed certificate.
// isFollowerRequest indicates if this is a follower CSR for bootstrap tracking.
HandleCertificateRequest(nodeID string, csrMap map[ca.CertificateType][]byte, isFollowerRequest bool) (map[ca.CertificateType][]byte, error)
}
CAManager interface defines certificate signing operations for testability. Implemented by pkg/ca.Manager in production, mocked in tests.
type CRLRefresher ¶
CRLRefresher defines interface for CRL refresh operations (avoids import cycle with crl package)
type CertificateInfo ¶
type CertificateInfo struct {
CertType CertificateType
NotBefore time.Time
NotAfter time.Time
}
CertificateInfo holds expiry information for a certificate
func (*CertificateInfo) ShouldRotate ¶
func (ci *CertificateInfo) ShouldRotate() bool
ShouldRotate calculates whether rotation should trigger at 67% of certificate lifetime. For a 90-day certificate: threshold = NotBefore + (90 * 0.67) = NotBefore + 60.3 days Returns true when current time exceeds this threshold.
type CertificateRenewer ¶
type CertificateRenewer struct {
// contains filtered or unexported fields
}
CertificateRenewer orchestrates certificate renewal with atomic swap and rollback. Generates new CSR matching existing certificate parameters, requests signing from CA manager (via Raft RPC to leader), validates health, and rolls back on failure.
func NewCertificateRenewer ¶
func NewCertificateRenewer(certDir string, validator CertificateValidator, caManager CAManager, nodeID string, logger logr.Logger) *CertificateRenewer
NewCertificateRenewer creates a renewer for certificate rotation. certDir is the base directory containing certificates. validator performs TLS handshake health checks before swap. caManager handles CSR signing (typically via Raft RPC to leader). nodeID identifies this node in signing requests.
func (*CertificateRenewer) RenewCertificate ¶
func (cr *CertificateRenewer) RenewCertificate(certType CertificateType, tlsAddr string) error
RenewCertificate generates new CSR, requests signing, validates health, and swaps certificate. Per-certificate rollback scope: if validation fails, restores old certificate for that type only.
Steps:
- Load existing certificate to extract CommonName and SANs
- Generate new CSR using same CommonName/SANs
- Request signing from CA manager (via Raft RPC to leader)
- Backup current certificate (.prev files)
- Write new certificate atomically
- Validate health via TLS handshake
- If validation fails: rollback (restore .prev), return error
- If validation succeeds: remove .prev backups, return nil
Does NOT restart servers - relies on GetCertificate callback (API) and file watcher (NATS).
type CertificateType ¶
type CertificateType int
CertificateType identifies which certificate needs rotation
const ( // CertTypeNATS is the NATS server certificate CertTypeNATS CertificateType = iota // CertTypeAPIServer is the API server HTTPS certificate CertTypeAPIServer // CertTypeAPIClient is the API client certificate CertTypeAPIClient )
func (CertificateType) String ¶
func (t CertificateType) String() string
String returns human-readable certificate type name
type CertificateValidator ¶
type CertificateValidator interface {
ValidateCertificate(certFile, keyFile, tlsAddr string) error
}
CertificateValidator interface defines health validation for testability. Implemented by HealthValidator in production, mocked in tests.
type HealthValidator ¶
type HealthValidator struct {
// contains filtered or unexported fields
}
HealthValidator performs TLS handshake health checks before certificate swap. It validates that new certificates can successfully complete TLS handshakes.
func NewHealthValidator ¶
func NewHealthValidator(timeout time.Duration, logger logr.Logger) *HealthValidator
NewHealthValidator creates a validator with specified timeout. Default timeout is 5 seconds if not provided (timeout <= 0).
func (*HealthValidator) ValidateCertificate ¶
func (hv *HealthValidator) ValidateCertificate(certFile, keyFile, tlsAddr string) error
ValidateCertificate tests TLS handshake to tlsAddr using provided certificate. This validates the certificate can successfully complete a TLS connection before swapping it into production use.
Steps:
- Load certificate using tls.LoadX509KeyPair
- Create TLS config with loaded cert, TLS 1.2+, AEAD cipher suites
- Test TLS handshake to tlsAddr (typically "127.0.0.1:8443")
- Close connection immediately after successful handshake
Returns error if any step fails (load, dial, handshake).
type RotationCallback ¶
type RotationCallback func(certType CertificateType) error
RotationCallback is invoked when a certificate reaches rotation threshold. Implementations should regenerate and distribute new certificates.
type RotationEvent ¶
type RotationEvent struct {
Timestamp time.Time `json:"timestamp"` // When rotation occurred
NodeID string `json:"nodeID"` // Node identifier
CertType string `json:"certType"` // Certificate type (NATS, API Server, API Client)
Success bool `json:"success"` // Whether rotation succeeded
Error string `json:"error"` // Error message if failed (empty on success)
OldExpiry time.Time `json:"oldExpiry"` // Previous certificate expiry
NewExpiry time.Time `json:"newExpiry"` // New certificate expiry (zero if failed)
DurationMs int64 `json:"durationMs"` // Rotation duration in milliseconds
}
RotationEvent records a certificate rotation attempt (success or failure). Stored in JetStream KV for audit trail and debugging.
type RotationHistory ¶
type RotationHistory struct {
// contains filtered or unexported fields
}
RotationHistory stores rotation events in JetStream KV for audit trail. Events are keyed by node, certificate type, and timestamp for queryability.
func NewRotationHistory ¶
func NewRotationHistory(kv jetstream.KeyValue, logger logr.Logger) *RotationHistory
NewRotationHistory creates a rotation history tracker. kv is the JetStream KeyValue bucket for storing events.
func (*RotationHistory) GetRotationHistory ¶
func (rh *RotationHistory) GetRotationHistory(ctx context.Context, nodeID string, certType CertificateType) ([]RotationEvent, error)
GetRotationHistory retrieves all rotation events for a specific node and certificate type. Returns events in chronological order (oldest first).
func (*RotationHistory) RecordRotation ¶
func (rh *RotationHistory) RecordRotation(ctx context.Context, nodeID string, certType CertificateType, success bool, err error, oldExpiry, newExpiry time.Time, duration time.Duration) error
RecordRotation stores a rotation event in JetStream KV. Key format: neuwerk.rotation.{nodeID}.{certType}.{timestamp_unix} Example: neuwerk.rotation.node-1.nats.1737400000
This format enables querying by node, certificate type, or time range.
type RotationScheduler ¶
type RotationScheduler struct {
// contains filtered or unexported fields
}
RotationScheduler runs background checks for certificate expiry. When any certificate reaches 67% of its lifetime, triggers rotation via callback.
func NewRotationScheduler ¶
func NewRotationScheduler(certDir string, onRotate RotationCallback, crlRefresher CRLRefresher, logger logr.Logger) *RotationScheduler
NewRotationScheduler creates a scheduler for periodic certificate expiry checks. certDir is the directory containing certificates (nats-server.crt, api-server.crt, api-client.crt). onRotate is called when any certificate reaches rotation threshold. crlRefresher is optional - if provided, enables hourly CRL refresh.
func (*RotationScheduler) Start ¶
func (rs *RotationScheduler) Start(ctx context.Context)
Start launches the background goroutine for periodic expiry checks. Checks occur every hour. Gracefully shuts down when context is cancelled.