Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AllocationResources ¶
type AllocationResources struct {
MemoryMB int
CPUMHz int
DiskMB int
MemoryPercent float64
CPUPercent float64
DiskPercent float64
}
AllocationResources represents the allocation resource utilization.
type ClusterAllocation ¶
type ClusterAllocation struct {
// NodeCount is the number of worker nodes in a ready and non-draining state across
// the cluster.
NodeCount int
// ScalingMetric indicates the most-utilized allocation resource across the cluster.
// The most-utilized resource is prioritized when making scaling decisions like
// identifying the least-allocated worker node.
ScalingMetric string
// MaxAllowedUtilization represents the max allowed cluster utilization after
// considering node fault-tolerance and task group scaling overhead.
MaxAllowedUtilization int
// ClusterTotalAllocationCapacity is the total allocation capacity across the cluster.
TotalCapacity AllocationResources
// ClusterUsedAllocationCapacity is the consumed allocation capacity across the cluster.
UsedCapacity AllocationResources
// TaskAllocation represents the total allocation requirements of a single instance
// (count 1) of all running jobs across the cluster. This is used to practively
// ensure the cluster has sufficient available capacity to scale each task by +1
// if an increase in capacity is required.
TaskAllocation AllocationResources
// NodeList is a list of all worker nodes in a known good state.
NodeList []string
// NodeAllocations is a slice of node allocations.
NodeAllocations []*NodeAllocation
// ScalingDirection is the direction in/out of cluster scaling we require after
// performning the proper evalutation.
ScalingDirection string
// LastScalingEvent represents the last time the daemon attempted a scaling action.
LastScalingEvent time.Time
}
ClusterAllocation is the central object used to track cluster status and the data required to make scaling decisions.
type ClusterScaling ¶
type ClusterScaling struct {
// Enabled indicates whether cluster scaling actions are permitted.
Enabled bool `mapstructure:"enabled"`
// MaxSize in the maximum number of instances the nomad node worker count is
// allowed to reach. This stops runaway increases in size due to misbehaviour
// but should be set high enough to accomodate usual workload peaks.
MaxSize int `mapstructure:"max_size"`
// MinSize is the minimum number of instances that should be present within
// the nomad node worker pool.
MinSize int `mapstructure:"min_size"`
// CoolDown is the number of seconds after a scaling activity completes before
// another can begin.
CoolDown float64 `mapstructure:"cool_down"`
// NodeFaultTolerance is the number of Nomad worker nodes the cluster can
// support losing, whilst still maintaining all existing workload.
NodeFaultTolerance int `mapstructure:"node_fault_tolerance"`
// AutoscalingGroup is the name of the ASG assigned to the Nomad worker nodes.
AutoscalingGroup string `mapstructure:"autoscaling_group"`
}
ClusterScaling is the configuration struct for the Nomad worker node scaling activites.
type Config ¶
type Config struct {
// Consul is the location of the Consul instance or cluster endpoint to query
// (may be an IP address or FQDN) with port.
Consul string `mapstructure:"consul"`
// Nomad is the location of the Nomad instance or cluster endpoint to query
// (may be an IP address or FQDN) with port.
Nomad string `mapstructure:"nomad"`
// LogLevel is the level at which the application should log from.
LogLevel string `mapstructure:"log_level"`
// Enforce is the boolean falg which dicates whether or not scaling events are
// actioned, or whether the application runs in report only mode.
Enforce bool `mapstructure:"enforce"`
// Region represents the AWS region the cluster resides in.
Region string `mapstructure:"aws_region"`
// ClusterScaling is the configuration struct that controls the basic Nomad
// worker node scaling.
ClusterScaling *ClusterScaling `mapstructure:"cluster_scaling"`
// JobScaling is the configuration struct that controls the basic Nomad
// job scaling.
JobScaling *JobScaling `mapstructure:"job_scaling"`
// Telemetry is the configuration struct that controls the telemetry settings.
Telemetry *Telemetry `mapstructure:"telemetry"`
// setKeys is the list of config keys that were overridden by the user.
SetKeys map[string]struct{}
// ConsulClient provides a client to interact with the Consul API.
ConsulClient ConsulClient
// NomadClient provides a client to interact with the Nomad API.
NomadClient NomadClient
}
Config is the main configuration struct used to configure the replicator application.
type ConsulClient ¶
type ConsulClient interface {
// GetJobScalingPolicies provides a list of Nomad jobs with a defined scaling
// policy document at a specified Consuk Key/Value Store location. Supports
// the use of an ACL token if required by the Consul cluster.
GetJobScalingPolicies(*Config, NomadClient) ([]*JobScalingPolicy, error)
}
The ConsulClient interface is used to provide common method signatures for interacting with the Consul API.
type GroupScalingPolicy ¶
type GroupScalingPolicy struct {
// GroupName is the jobs Group name which this scaling policy represents.
GroupName string `json:"name"`
// TaskResources is a list
Tasks TaskAllocation `json:"task_resources"`
// ScalingMetric represents the most-utilized resource within the task group.
ScalingMetric string
// Scaling is a list of Scaling objects.
Scaling *Scaling
}
GroupScalingPolicy represents the scaling policy of an individual group within a signle job.
type JobScaling ¶
type JobScaling struct {
// Enabled indicates whether job scaling actions are permitted.
Enabled bool `mapstructure:"enabled"`
// ConsulToken is the Consul ACL token used to access KeyValues from a
// secure Consul installation.
ConsulToken string `mapstructure:"consul_token"`
// ConsulKeyLocation is the Consul key location where scaling policies are
// defined.
ConsulKeyLocation string `mapstructure:"consul_key_location"`
}
JobScaling is the configuration struct for the Nomad job scaling activities.
type JobScalingPolicies ¶
type JobScalingPolicies []*JobScalingPolicy
JobScalingPolicies is a list of ScalingPolicy objects.
type JobScalingPolicy ¶
type JobScalingPolicy struct {
// JobName is the name of the Nomad job represented by the Consul Key/Value.
JobName string
// Enabled is a boolean falg which dictates whether scaling events for the job
// should be enforced and is used for testing purposes.
Enabled bool `json:"enabled"`
// GroupScalingPolicies is a list of GroupScalingPolicy objects.
GroupScalingPolicies []*GroupScalingPolicy `json:"groups"`
}
JobScalingPolicy is a struct which represents an individual job scaling policy document.
type NodeAllocation ¶
type NodeAllocation struct {
// NodeID is the unique ID of the worker node.
NodeID string
// NodeIP is the private IP of the worker node.
NodeIP string
// UsedCapacity represents the percentage of total cluster resources consumed by
// the worker node.
UsedCapacity AllocationResources
}
NodeAllocation describes the resource consumption of a specific worker node.
type NomadClient ¶
type NomadClient interface {
// ClusterAllocationCapacity determines the total cluster capacity and current
// number of worker nodes.
ClusterAllocationCapacity(*ClusterAllocation) error
// ClusterAssignedAllocation determines the consumed capacity across the
// cluster and tracks the resource consumption of each worker node.
ClusterAssignedAllocation(*ClusterAllocation) error
// DrainNode places a worker node in drain mode to stop future allocations and
// migrate existing allocations to other worker nodes.
DrainNode(string) error
// EvaluateClusterCapacity determines if a cluster scaling action is required.
EvaluateClusterCapacity(*ClusterAllocation, *Config) (bool, error)
// EvaluateJobScaling compares the consumed resource percentages of a Job group
// against its scaling policy to determine whether a scaling event is required.
EvaluateJobScaling([]*JobScalingPolicy)
// GetAllocationStats discovers the resources consumed by a particular Nomad
// allocation.
GetAllocationStats(*nomad.Allocation, *GroupScalingPolicy)
// GetJobAllocations identifies all allocations for an active job.
GetJobAllocations([]*nomad.AllocationListStub, *GroupScalingPolicy)
// LeaderCheck determines if the node running replicator is the gossip pool
// leader.
LeaderCheck() bool
// LeaseAllocatedNode determines the worker node consuming the least amount of
// the cluster's mosted-utilized resource.
LeastAllocatedNode(*ClusterAllocation) (string, string)
// MostUtilizedResource calculates which resource is most-utilized across the
// cluster. The worst-case allocation resource is prioritized when making
// scaling decisions.
MostUtilizedResource(*ClusterAllocation)
// IsJobRunning checks to see whether the specified jobID has any currently
// task groups on the cluster.
IsJobRunning(string) bool
// JobScale
JobScale(*JobScalingPolicy)
// TaskAllocationTotals calculates the allocations required by each running
// job and what amount of resources required if we increased the count of
// each job by one. This allows the cluster to proactively ensure it has
// sufficient capacity for scaling events and deal with potential node failures.
TaskAllocationTotals(*ClusterAllocation) error
}
NomadClient exposes all API methods needed to interact with the Nomad API, evaluate cluster capacity and allocations and make scaling decisions.
type Scaling ¶
type Scaling struct {
// Min in the minimum number of tasks the job should have running at any one
// time.
Min int `json:"min"`
// Max in the maximum number of tasks the job should have running at any one
// time.
Max int `json:"max"`
// ScaleDirection is populated by either out/in/none depending on the evalution
// of a scaling event happening.
ScaleDirection string
// ScaleOut is the job scaling out policy which will contain the thresholds
// which control scaling activies.
ScaleOut *scaleout `json:"scaleout"`
// ScaleIn is the job scaling in policy which will contain the thresholds
// which control scaling activies.
ScaleIn *scalein `json:"scalein"`
}
Scaling struct represents the scaling policy of a Nomad Job Group as well as details of any scaling activities which should take place during the current deamon run.
type TaskAllocation ¶
type TaskAllocation struct {
// TaskName is the name given to the task within the job specficiation.
TaskName string
// Resources tracks the resource requirements defined in the job spec and the
// real-time utilization of those resources.
Resources AllocationResources
}
TaskAllocation describes the resource requirements defined in the job specification.
type Telemetry ¶
type Telemetry struct {
// StatsdAddress specifies the address of a statsd server to forward metrics
// to and should include the port.
StatsdAddress string `mapstructure:"statsd_address"`
}
Telemetry is the struct that control the telemetry configuration. If a value is present then telemetry is enabled. Currently statsd is only supported for sending telemetry.