agent

package
v0.5.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2025 License: Apache-2.0 Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AgentLooper

type AgentLooper interface {
	// SendHeartbeat sends a heartbeat to the DBtune server
	SendHeartbeat() error

	// GetMetrics returns the metrics for the agent
	// The metrics should have a format of:
	// {
	//   "no_cpu": { "type": "int", "value": 4 },
	//   "available_memory": { "type": "bytes", "value": 1024 },
	// }
	// The current implementation of GetMetrics is following a concurrent collection
	// approach, where the collectors are executed in parallel and the errors are
	// collected in a channel. The channel is then closed and the results are
	// returned. Uses the errgroup package to delegate the concurrent execution.
	GetMetrics() ([]metrics.FlatValue, error)
	SendMetrics([]metrics.FlatValue) error

	// GetSystemInfo returns the system info of the PostgresSQL server
	// Example of system info:
	// {
	//   "no_cpu": { "type": "int", "value": 4 },
	//   "total_memory": { "type": "bytes", "value": 1024 },
	// }
	// Importantly, you should never return a partial view of the SystemInfo, that is
	// if one step fails, you should abort and not return any metrics, just an error.
	// This is because if only a partial amount of the SystemInfo can be observed, then
	// it means that DBtune will detect this as the system information having been changed
	// and potentially abort an inprogress tuning session.
	GetSystemInfo() ([]metrics.FlatValue, error)
	SendSystemInfo([]metrics.FlatValue) error

	GetActiveConfig() (ConfigArraySchema, error)
	SendActiveConfig(ConfigArraySchema) error
	GetProposedConfig() (*ProposedConfigResponse, error)

	// ApplyConfig applies the configuration to the PostgresSQL server
	// The configuration is applied with the appropriate method, either with a
	// restart or a reload operation
	ApplyConfig(knobs *ProposedConfigResponse) error

	// Guardrails is responsible for triggering a signal to the DBtune server
	// that something is heading towards a failure.
	// An example failure could be memory above a certain threshold (90%)
	// or a rate of disk growth that is more than usual and not acceptable.
	// Returns nil if no guardrail is triggered, otherwise returns the type of guardrail
	// and the metric that is monitored.
	Guardrails() *guardrails.Signal
	// SendGuardrailSignal sends a signal to the DBtune server that something is heading towards a failure.
	// The signal will be send maximum once every 15 seconds.
	SendGuardrailSignal(signal guardrails.Signal) error

	// SendError sends an error report to the DBtune server
	SendError(payload ErrorPayload) error

	// GetLogger returns the logger for the agent
	Logger() *log.Logger
}

type AgentPayload

type AgentPayload struct {
	AgentVersion   string `json:"agent_version"`
	AgentStartTime string `json:"agent_start_time"`
}

type BufferStat added in v0.3.5

type BufferStat struct {
	BlksHit   int64
	BlksRead  int64
	Timestamp time.Time
}

type Caches

type Caches struct {
	// QueryRuntimeList is a list of all the queries in pg_stat_statements
	// The list is used to calculate the runtime of the queries
	// Example data:
	// {
	// 	example_query_id: {
	// 		"query_id": example_query_id,
	// 		"total_exec_time": 1000,
	// 		"calls": 10,
	// 	},
	// }
	QueryRuntimeList map[string]utils.CachedPGStatStatement

	// XactCommit is the number of transactions committed
	// This is used to calculate the TPS between two heartbeats
	XactCommit XactStat

	IOCountersStat IOCounterStat

	// stores metrics from pg_stat_database
	PGDatabase     PGDatabase
	PGBGWriter     PGBGWriter
	PGWAL          PGWAL
	PGCheckPointer PGCheckPointer

	PGUserTables map[string]utils.PGUserTables

	// Hardware specific cache for guardrails
	// {
	// 	"total_memory": 1024,
	// 	"disk_size": 1024,
	// }
	HardwareCache map[string]interface{}
}

Caches is a struct that holds the caches for the agent that is updated between each metric collection beat. Currently, this is fixed for all adapters. TODO: Make this dynamic for each adapter, this could use GJSON and SJSON to update the cache as a string, but the locking then would be a problem in reading and writing without custom methods on state.

type CommonAgent

type CommonAgent struct {
	dbtune.ServerURLs

	APIClient *retryablehttp.Client
	// Time the agent started
	StartTime    string
	MetricsState MetricsState
	// Timeout configuration
	CollectionTimeout time.Duration // Total timeout for all collectors
	IndividualTimeout time.Duration // Timeout for each individual collector
	// Version information
	Version string
	// contains filtered or unexported fields
}

func CreateCommonAgent

func CreateCommonAgent() *CommonAgent

func CreateCommonAgentWithVersion added in v0.3.5

func CreateCommonAgentWithVersion(version string) *CommonAgent

func (*CommonAgent) GetMetrics

func (a *CommonAgent) GetMetrics() ([]metrics.FlatValue, error)

GetMetrics will have a default implementation to handle gracefully error and send partial metrics rather than failing. It is discouraged for every adapter overriding this one.

func (*CommonAgent) GetProposedConfig

func (a *CommonAgent) GetProposedConfig() (*ProposedConfigResponse, error)

func (*CommonAgent) InitCollectors added in v0.3.0

func (a *CommonAgent) InitCollectors(collectors []MetricCollector)

Should be called after creating the common agent is created to attach the collectors. You can also forgo this step if you create the common agent with the collectors already attached.

func (*CommonAgent) Logger

func (a *CommonAgent) Logger() *log.Logger

func (*CommonAgent) SendActiveConfig

func (a *CommonAgent) SendActiveConfig(config ConfigArraySchema) error

func (*CommonAgent) SendError added in v0.4.0

func (a *CommonAgent) SendError(payload ErrorPayload) error

func (*CommonAgent) SendGuardrailSignal

func (a *CommonAgent) SendGuardrailSignal(signal guardrails.Signal) error

SendGuardrailSignal sends a guardrail signal to the DBtune server that something is heading towards a failure.

func (*CommonAgent) SendHeartbeat

func (a *CommonAgent) SendHeartbeat() error

SendHeartbeat sends a heartbeat to the DBtune server to indicate that the agent is running. This method does not need to be overridden by any adapter

func (*CommonAgent) SendMetrics

func (a *CommonAgent) SendMetrics(ms []metrics.FlatValue) error

func (*CommonAgent) SendSystemInfo

func (a *CommonAgent) SendSystemInfo(systemInfo []metrics.FlatValue) error

type ConfigArraySchema

type ConfigArraySchema []interface{}

type ErrorPayload added in v0.4.0

type ErrorPayload struct {
	ErrorMessage string `json:"error_message"`
	ErrorType    string `json:"error_type"`
	Timestamp    string `json:"timestamp"`
}

type IOCounterStat

type IOCounterStat struct {
	ReadCount  uint64
	WriteCount uint64
}

type MetricCollector

type MetricCollector struct {
	Key       string
	Collector func(ctx context.Context, state *MetricsState) error
}

type MetricKey

type MetricKey string
const (
	QueryRuntime      MetricKey = "query_runtime"
	ActiveConnections MetricKey = "active_connections"
)

type MetricsState

type MetricsState struct {
	Collectors []MetricCollector
	// Caching layer, for metrics that are derived based on multiple heartbeats
	Cache Caches
	// Every round of metric collections this array will be filled with the metrics
	// that are collected, and then emptied
	Metrics []metrics.FlatValue
	Mutex   *sync.Mutex
}

func (*MetricsState) AddMetric

func (state *MetricsState) AddMetric(metric metrics.FlatValue)

AddMetric appends a metric in a thread-safe way

type PGBGWriter added in v0.4.0

type PGBGWriter struct {
	BuffersClean    int64
	MaxWrittenClean int64
	BuffersAlloc    int64
	Timestamp       time.Time
}

type PGCheckPointer added in v0.4.0

type PGCheckPointer struct {
	NumTimed       int64
	NumRequested   int64
	WriteTime      float64
	SyncTime       float64
	BuffersWritten int64
	Timestamp      time.Time
}

type PGConfigRow

type PGConfigRow struct {
	Name    string      `json:"name"`
	Setting interface{} `json:"setting"`
	Unit    interface{} `json:"unit"`
	Vartype string      `json:"vartype"`
	Context string      `json:"context"`
}

TODO: extract PostgreSQL specific types + methods to utils/separate place

func (PGConfigRow) GetSettingValue

func (p PGConfigRow) GetSettingValue() (string, error)

GetSettingValue returns the setting value in its appropriate type and format This is needed for cases like Aurora RDS when modifying parameters

type PGDatabase added in v0.4.0

type PGDatabase struct {
	BlksHit               int64
	BlksRead              int64
	TuplesReturned        int64
	TuplesFetched         int64
	TuplesInserted        int64
	TuplesUpdated         int64
	TuplesDeleted         int64
	TempFiles             int64
	TempBytes             int64
	Deadlocks             int64
	IdleInTransactionTime float64
	Timestamp             time.Time
}

type PGWAL added in v0.4.0

type PGWAL struct {
	WALRecords     int64
	WALFpi         int64
	WALBytes       int64
	WALBuffersFull int64
	WALWrite       int64
	WALSync        int64
	WALWriteTime   float64
	WALSyncTime    float64
	Timestamp      time.Time
}

type ProposedConfigResponse

type ProposedConfigResponse struct {
	Config          []PGConfigRow `json:"config"`
	KnobsOverrides  []string      `json:"knobs_overrides"`
	KnobApplication string        `json:"knob_application"`
}

type XactStat

type XactStat struct {
	Count     int64
	Timestamp time.Time
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL