agent

package
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 6, 2025 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AgentLooper

type AgentLooper interface {
	// SendHeartbeat sends a heartbeat to the DBtune server
	SendHeartbeat() error

	// GetMetrics returns the metrics for the agent
	// The metrics should have a format of:
	// {
	//   "no_cpu": { "type": "int", "value": 4 },
	//   "available_memory": { "type": "bytes", "value": 1024 },
	// }
	// The current implementation of GetMetrics is following a concurrent collection
	// approach, where the collectors are executed in parallel and the errors are
	// collected in a channel. The channel is then closed and the results are
	// returned. Uses the errgroup package to delegate the concurrent execution.
	GetMetrics() ([]utils.FlatValue, error)
	SendMetrics([]utils.FlatValue) error

	// GetSystemInfo returns the system info of the PostgresSQL server
	// Example of system info:
	// {
	//   "no_cpu": { "type": "int", "value": 4 },
	//   "total_memory": { "type": "bytes", "value": 1024 },
	// }
	GetSystemInfo() ([]utils.FlatValue, error)
	SendSystemInfo([]utils.FlatValue) error

	GetActiveConfig() (ConfigArraySchema, error)
	SendActiveConfig(ConfigArraySchema) error
	GetProposedConfig() (*ProposedConfigResponse, error)

	// ApplyConfig applies the configuration to the PostgresSQL server
	// The configuration is applied with the appropriate method, either with a
	// restart or a reload operation
	ApplyConfig(knobs *ProposedConfigResponse) error

	// Guardrails is responsible for triggering a signal to the DBtune server
	// that something is heading towards a failure.
	// An example failure could be memory above a certain threshold (90%)
	// or a rate of disk growth that is more than usual and not acceptable.
	// Returns nil if no guardrail is triggered, otherwise returns the type of guardrail
	Guardrails() *GuardrailType
	// SendGuardrailSignal sends a signal to the DBtune server that something is heading towards a failure.
	// The signal will be send maximum once every 15 seconds.
	SendGuardrailSignal(level GuardrailType) error

	// GetLogger returns the logger for the agent
	Logger() *log.Logger
}

type AgentPayload

type AgentPayload struct {
	AgentVersion   string `json:"agent_version"`
	AgentStartTime string `json:"agent_start_time"`
}

type Caches

type Caches struct {
	// QueryRuntimeList is a list of all the queries in pg_stat_statements
	// The list is used to calculate the runtime of the queries
	// Example data:
	// {
	// 	example_query_id: {
	// 		"query_id": example_query_id,
	// 		"time": 1000
	// 	},
	// }
	QueryRuntimeList map[string]utils.CachedPGStatStatement

	// XactCommit is the number of transactions committed
	// This is used to calculate the TPS between two heartbeats
	XactCommit XactStat

	IOCountersStat IOCounterStat

	// Hardware specific cache for guardrails
	// {
	// 	"total_memory": 1024,
	// 	"disk_size": 1024,
	// }
	HardwareCache map[string]interface{}
}

Caches is a struct that holds the caches for the agent that is updated between each metric collection beat. Currently, this is fixed for all adapters. TODO: Make this dynamic for each adapter, this could use GJSON and SJSON to update the cache as a string, but the locking then would be a problem in reading and writing without custom methods on state.

type CommonAgent

type CommonAgent struct {
	utils.ServerURLs

	APIClient *retryablehttp.Client
	// Time the agent started
	StartTime    string
	MetricsState MetricsState
	// Timeout configuration
	CollectionTimeout time.Duration // Total timeout for all collectors
	IndividualTimeout time.Duration // Timeout for each individual collector
	// contains filtered or unexported fields
}

func CreateCommonAgent

func CreateCommonAgent() *CommonAgent

func (*CommonAgent) GetMetrics

func (a *CommonAgent) GetMetrics() ([]utils.FlatValue, error)

GetMetrics will have a default implementation to handle gracefully error and send partial metrics rather than failing. It is discouraged for every adapter overriding this one.

func (*CommonAgent) GetProposedConfig

func (a *CommonAgent) GetProposedConfig() (*ProposedConfigResponse, error)

func (*CommonAgent) Guardrails

func (a *CommonAgent) Guardrails() *GuardrailType

func (*CommonAgent) Logger

func (a *CommonAgent) Logger() *log.Logger

func (*CommonAgent) SendActiveConfig

func (a *CommonAgent) SendActiveConfig(config ConfigArraySchema) error

func (*CommonAgent) SendGuardrailSignal

func (a *CommonAgent) SendGuardrailSignal(level GuardrailType) error

SendGuardrailSignal sends a guardrail signal to the DBtune server that something is heading towards a failure.

func (*CommonAgent) SendHeartbeat

func (a *CommonAgent) SendHeartbeat() error

SendHeartbeat sends a heartbeat to the DBtune server to indicate that the agent is running. This method does not need to be overridden by any adapter

func (*CommonAgent) SendMetrics

func (a *CommonAgent) SendMetrics(metrics []utils.FlatValue) error

func (*CommonAgent) SendSystemInfo

func (a *CommonAgent) SendSystemInfo(systemInfo []utils.FlatValue) error

type ConfigArraySchema

type ConfigArraySchema []interface{}

type GuardrailSignal

type GuardrailSignal struct {
	GuardrailType GuardrailType `json:"level"`
}

type GuardrailType

type GuardrailType string
const (
	// Critical is a guardrail that is critical to the operation of the database
	// and should be reverted immediately. This also means that the DBtune server
	// will revert to the baseline configuration to stabilise the system before recommending
	// a new configuration.
	Critical GuardrailType = "critical"
	// NonCritical is a guardrail that is not critical
	// to the operation of the database, but a new configuration
	// is recommended to be applied.
	NonCritical GuardrailType = "non_critical"
)

type IOCounterStat

type IOCounterStat struct {
	ReadCount  uint64
	WriteCount uint64
}

type MetricCollector

type MetricCollector struct {
	Key        string
	MetricType string
	Collector  func(ctx context.Context, state *MetricsState) error
}

type MetricKey

type MetricKey string
const (
	QueryRuntime      MetricKey = "query_runtime"
	ActiveConnections MetricKey = "active_connections"
)

type MetricsState

type MetricsState struct {
	Collectors []MetricCollector
	// Caching layer, for metrics that are derived based on multiple heartbeats
	Cache Caches
	// Every round of metric collections this array will be filled with the metrics
	// that are collected, and then emptied
	Metrics []utils.FlatValue
	Mutex   *sync.Mutex
}

func (*MetricsState) AddMetric

func (state *MetricsState) AddMetric(metric utils.FlatValue)

AddMetric appends a metric in a thread-safe way

func (*MetricsState) RemoveKey

func (state *MetricsState) RemoveKey(key MetricKey) error

type PGConfigRow

type PGConfigRow struct {
	Name    string      `json:"name"`
	Setting interface{} `json:"setting"`
	Unit    interface{} `json:"unit"`
	Vartype string      `json:"vartype"`
	Context string      `json:"context"`
}

TODO: extract PostgreSQL specific types + methods to utils/separate place

func (PGConfigRow) GetSettingValue

func (p PGConfigRow) GetSettingValue() (string, error)

GetSettingValue returns the setting value in its appropriate type and format This is needed for cases like Aurora RDS when modifying parameters

type ProposedConfigResponse

type ProposedConfigResponse struct {
	Config          []PGConfigRow `json:"config"`
	KnobsOverrides  []string      `json:"knobs_overrides"`
	KnobApplication string        `json:"knob_application"`
}

type XactStat

type XactStat struct {
	Count     int64
	Timestamp time.Time
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL