Documentation
¶
Overview ¶
Package healthcheck helps you implement Kubernetes liveness and readiness checks for your application. It supports synchronous and asynchronous (background) checks. It can optionally report each check's status as a set of Prometheus gauge metrics for cluster-wide monitoring and alerting.
It also includes a small library of generic checks for DNS, TCP, and HTTP reachability as well as Goroutine usage.
Example ¶
// Create a Handler that we can use to register liveness and readiness checks.
health := NewHandler()
// Add a readiness check to make sure an upstream dependency resolves in DNS.
// If this fails we don't want to receive requests, but we shouldn't be
// restarted or rescheduled.
upstreamHost := "upstream.example.com"
health.AddReadinessCheck(
"upstream-dep-dns",
DNSResolveCheck(upstreamHost, 50*time.Millisecond))
// Add a liveness check to detect Goroutine leaks. If this fails we want
// to be restarted/rescheduled.
health.AddLivenessCheck("goroutine-threshold", GoroutineCountCheck(100))
// Serve http://0.0.0.0:8080/live and http://0.0.0.0:8080/ready endpoints.
// go http.ListenAndServe("0.0.0.0:8080", health)
// Make a request to the readiness endpoint and print the response.
fmt.Print(dumpRequest(health, "GET", "/ready"))
Output: HTTP/1.1 503 Service Unavailable Connection: close Content-Type: application/json; charset=utf-8 {}
Example (Advanced) ¶
// Create a Handler that we can use to register liveness and readiness checks.
health := NewHandler()
// Make sure we can connect to an upstream dependency over TCP in less than
// 50ms. Run this check asynchronously in the background every 10 seconds
// instead of every time the /ready or /live endpoints are hit.
//
// Async is useful whenever a check is expensive (especially if it causes
// load on upstream services).
upstreamAddr := "upstream.example.com:5432"
health.AddReadinessCheck(
"upstream-dep-tcp",
Async(TCPDialCheck(upstreamAddr, 50*time.Millisecond), 10*time.Second))
// Add a readiness check against the health of an upstream HTTP dependency
upstreamURL := "http://upstream-svc.example.com:8080/healthy"
health.AddReadinessCheck(
"upstream-dep-http",
HTTPGetCheck(upstreamURL, 500*time.Millisecond))
// Implement a custom check with a 50 millisecond timeout.
health.AddLivenessCheck("custom-check-with-timeout", Timeout(func() error {
// Simulate some work that could take a long time
time.Sleep(time.Millisecond * 100)
return nil
}, 50*time.Millisecond))
// Expose the readiness endpoints on a custom path /healthz mixed into
// our main application mux.
mux := http.NewServeMux()
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte("Hello, world!"))
})
mux.HandleFunc("/healthz", health.ReadyEndpoint)
// Sleep for just a moment to make sure our Async handler had a chance to run
time.Sleep(500 * time.Millisecond)
// Make a sample request to the /healthz endpoint and print the response.
fmt.Println(dumpRequest(mux, "GET", "/healthz"))
Output: HTTP/1.1 503 Service Unavailable Connection: close Content-Type: application/json; charset=utf-8 {}
Example (Database) ¶
// Connect to a database/sql database
var database *sql.DB
database = connectToDatabase()
// Create a Handler that we can use to register liveness and readiness checks.
health := NewHandler()
// Add a readiness check to we don't receive requests unless we can reach
// the database with a ping in <1 second.
health.AddReadinessCheck("database", DatabasePingCheck(database, 1*time.Second))
// Serve http://0.0.0.0:8080/live and http://0.0.0.0:8080/ready endpoints.
// go http.ListenAndServe("0.0.0.0:8080", health)
// Make a request to the readiness endpoint and print the response.
fmt.Print(dumpRequest(health, "GET", "/ready?full=1"))
Output: HTTP/1.1 200 OK Connection: close Content-Type: application/json; charset=utf-8 { "database": "OK" }
Example (Metrics) ¶
// Create a new Prometheus registry (you'd likely already have one of these).
registry := prometheus.NewRegistry()
// Create a metrics-exposing Handler for the Prometheus registry
// The healthcheck related metrics will be prefixed with the provided namespace
health := NewMetricsHandler(registry, "example")
// Add a simple readiness check that always fails.
health.AddReadinessCheck("failing-check", func() error {
return fmt.Errorf("example failure")
})
// Add a liveness check that always succeeds
health.AddLivenessCheck("successful-check", func() error {
return nil
})
// Create an "admin" listener on 0.0.0.0:9402
adminMux := http.NewServeMux()
// go http.ListenAndServe("0.0.0.0:9402", adminMux)
// Expose prometheus metrics on /metrics
adminMux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
// Expose a liveness check on /live
adminMux.HandleFunc("/live", health.LiveEndpoint)
// Expose a readiness check on /ready
adminMux.HandleFunc("/ready", health.ReadyEndpoint)
// Make a request to the metrics endpoint and print the response.
fmt.Println(dumpRequest(adminMux, "GET", "/metrics"))
Output: HTTP/1.1 200 OK Content-Length: 245 Content-Type: text/plain; version=0.0.4; charset=utf-8 # HELP example_healthcheck_status Current check status (0 indicates success, 1 indicates failure) # TYPE example_healthcheck_status gauge example_healthcheck_status{check="failing-check"} 1 example_healthcheck_status{check="successful-check"} 0
Index ¶
- Variables
- type Check
- func Async(check Check, interval time.Duration) Check
- func AsyncWithContext(ctx context.Context, check Check, interval time.Duration) Check
- func DNSResolveCheck(host string, timeout time.Duration) Check
- func DatabasePingCheck(database *sql.DB, timeout time.Duration) Check
- func GCMaxPauseCheck(threshold time.Duration) Check
- func GoroutineCountCheck(threshold int) Check
- func HTTPGetCheck(url string, timeout time.Duration) Check
- func TCPDialCheck(addr string, timeout time.Duration) Check
- func Timeout(check Check, timeout time.Duration) Check
- type Handler
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ErrNoData = errors.New("no data yet")
ErrNoData is returned if the first call of an Async() wrapped Check has not yet returned.
Functions ¶
This section is empty.
Types ¶
type Check ¶
type Check func() error
Check is a health/readiness check.
func Async ¶
Async converts a Check into an asynchronous check that runs in a background goroutine at a fixed interval. The check is called at a fixed rate, not with a fixed delay between invocations. If your check takes longer than the interval to execute, the next execution will happen immediately.
Note: if you need to clean up the background goroutine, use AsyncWithContext().
func AsyncWithContext ¶
AsyncWithContext converts a Check into an asynchronous check that runs in a background goroutine at a fixed interval. The check is called at a fixed rate, not with a fixed delay between invocations. If your check takes longer than the interval to execute, the next execution will happen immediately.
Note: if you don't need to cancel execution (because this runs forever), use Async()
func DNSResolveCheck ¶
DNSResolveCheck returns a Check that makes sure the provided host can resolve to at least one IP address within the specified timeout.
func DatabasePingCheck ¶
DatabasePingCheck returns a Check that validates connectivity to a database/sql.DB using Ping().
func GCMaxPauseCheck ¶
GCMaxPauseCheck returns a Check that fails if any recent Go garbage collection pause exceeds the provided threshold.
func GoroutineCountCheck ¶
GoroutineCountCheck returns a Check that fails if too many goroutines are running (which could indicate a resource leak).
func HTTPGetCheck ¶
HTTPGetCheck returns a Check that performs an HTTP GET request against the specified URL. The check fails if the response times out or returns a non-200 status code.
func TCPDialCheck ¶
TCPDialCheck returns a Check that checks TCP connectivity to the provided endpoint.
type Handler ¶
type Handler interface {
// The Handler is an http.Handler, so it can be exposed directly and handle
// /live and /ready endpoints.
http.Handler
// AddLivenessCheck adds a check that indicates that this instance of the
// application should be destroyed or restarted. A failed liveness check
// indicates that this instance is unhealthy, not some upstream dependency.
// Every liveness check is also included as a readiness check.
AddLivenessCheck(name string, check Check)
// AddReadinessCheck adds a check that indicates that this instance of the
// application is currently unable to serve requests because of an upstream
// or some transient failure. If a readiness check fails, this instance
// should no longer receiver requests, but should not be restarted or
// destroyed.
AddReadinessCheck(name string, check Check)
// LiveEndpoint is the HTTP handler for just the /live endpoint, which is
// useful if you need to attach it into your own HTTP handler tree.
LiveEndpoint(http.ResponseWriter, *http.Request)
// ReadyEndpoint is the HTTP handler for just the /ready endpoint, which is
// useful if you need to attach it into your own HTTP handler tree.
ReadyEndpoint(http.ResponseWriter, *http.Request)
}
Handler is an http.Handler with additional methods that register health and readiness checks. It handles handle "/live" and "/ready" HTTP endpoints.
func NewMetricsHandler ¶
func NewMetricsHandler(registry prometheus.Registerer, namespace string) Handler
NewMetricsHandler returns a healthcheck Handler that also exposes metrics into the provided Prometheus registry.