types

package

v1.0.0 Latest Latest Go to latest Published: Oct 13, 2021 License: Apache-2.0, BSD-3-Clause, MIT Imports: 19 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/tencent/caelus

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func AllResCompressible(res []string) bool
func GetDeviceNameFromMetric(metric string) (dev, devMetric, originalMetric string)
func InitHealthCheckConfigFunc(nodeMetrics *MetricsNodeConfig, predictReserved *Resource) func(string) (*HealthCheckConfig, error)
func InitPredictConfig(config *PredictConfig)
func OfflineOnYarn(config *TaskTypeConfig) bool
type ActionConfig
type AggregationsConfig
type AlarmChannel
type AlarmConfig
type CPIManagerConfig
type CaelusConfig
- func ParseJsonConfig(configFile string) (*CaelusConfig, error)
type CheckPointConfig
type ComponentConfig
type CpuIsolateConfig
type CpuQuotaConfig
type CpuSetConfig
type CustomMetric
type DetectActionConfig
type DetectConfig
type Devices
type DiskQuotaConfig
type DiskQuotaSize
type EWMAArgs
type ExpressionArgs
type HealthCheckConfig
type K8sConfig
type LocalAlarm
type LocalPredictConfig
type MemoryNotifyConfig
type MemoryPressureNotifyConfig
type MemoryUsageNotifyConfig
type MetricKind
type MetricsCollectConfig
type MetricsContainerConfig
type MetricsNodeConfig
type MetricsPerfConfig
type MetricsPrometheus
type MetricsRdtConfig
type MetricsSource
type NodeResourceConfig
type NotifyConfig
type OfflineJobs
type OnlineConfig
type OnlineJobConfig
type OnlineMetrics
type OverCommit
type PathInfo
type PidToCgroup
type PredictConfig
type PrometheusData
type RangeResource
type RangeState
type RemoteAlarm
type Resource
type ResourceIsolateConfig
type ResourceUpdateEvent
type RoundOffResource
type RuleCheck
type RuleCheckConfig
type SharedInfo
type SilenceConfig
type TaskTypeConfig
type TimeRangeOverCommit
type VolumeType
- func (vt VolumeType) String() string
type YarnDisksConfig
type YarnNodeResourceConfig

Constants ¶

View Source

const (
	ExpresstionAutoDetect = "auto"

	DetectionExpression = "expression"
	DetectionEWMA       = "ewma"
	DetectionUnion      = "union"
)

View Source

const (
	// LocalPredictorType is the local predictor
	LocalPredictorType = "local"
	// VPAPredictorType is the remote VPA predictor
	VPAPredictorType = "vpa"

	NodeResourceTypeOnlinePredict = "online_predict"
)

View Source

const (
	// TaskType
	OnlineTypeOnK8s      = "k8s"
	OnlineTypeOnLocal    = "local"
	OfflineTypeOnk8s     = "k8s"
	OfflineTypeYarnOnk8s = "yarn_on_k8s"

	AlarmTypeLocal  = "local"
	AlarmTypeRemote = "remote"

	// CpuManagePolicyBT is just for tencent OS
	CpuManagePolicyBT       = "bt"
	CpuManagePolicySet      = "cpuset"
	CpuManagePolicyQuota    = "quota"
	CpuManagePolicyAdaptive = "adaptive"

	// MemUnit translates Mb to byte
	MemUnit   = int64(1024 * 1024)
	MemGbUnit = int64(1024 * 1024 * 1024)
	// CpuUnit translates milli core
	CpuUnit = int64(1000)
	// DiskUnit translates Gi to btye
	DiskUnit = int64(1024 * 1020 * 1024)

	// pod annotation fixed annotation
	PodAnnotationPrefix = "mixer.kubernetes.io/"

	// RootFS is the root directory in container.
	RootFS                     = "/rootfs"
	CgroupKubePods             = "/kubepods"
	CgroupOffline              = "/kubepods/offline"
	CgroupOfflineSystem        = CgroupOffline + "/system"
	SystemComponentOomScoreAdj = "500"
	CgroupYarn                 = "hadoop-yarn"
	// CgroupNonK8sOnline is the cgroup for online jobs, which are not running on k8s, we need to create the cgroup
	// and children cgroup manually.
	CgroupNonK8sOnline = "/onlinejobs"
)

View Source

const (
	// container runtime
	ContainerRuntimeDocker = "docker"
)

Variables ¶

View Source

var (
	AvailablePredictType      = sets.NewString(LocalPredictorType, VPAPredictorType)
	AvailableLocalPredictType = sets.NewString(LocalPredictorType)
)

View Source

var (
	// AvailableOnlineTaskType describe available online tasks, which may be pod or local process
	AvailableOnlineTaskType = sets.NewString(OnlineTypeOnK8s, OnlineTypeOnLocal)
	// AvailableOfflineTaskType describe available offline tasks, which may be pod or yarn job
	AvailableOfflineTaskType = sets.NewString(OfflineTypeOnk8s, OfflineTypeYarnOnk8s)

	// AvailableAlarmType shows available alarm type
	AvailableAlarmType = sets.NewString(AlarmTypeLocal, AlarmTypeRemote)

	// AvailableCpuManagePolicy shows available cpu manage policy
	AvailableCpuManagePolicy = sets.NewString(CpuManagePolicyBT, CpuManagePolicySet, CpuManagePolicyQuota,
		CpuManagePolicyAdaptive)

	CompressibleRes = sets.NewString(string(v1.ResourceCPU))
)

Functions ¶

func AllResCompressible ¶

func AllResCompressible(res []string) bool

AllResCompressible check if the resources are compressible

func GetDeviceNameFromMetric ¶

func GetDeviceNameFromMetric(metric string) (dev, devMetric, originalMetric string)

GetDeviceNameFromMetric parse the metric name, and output the dev and devMetric name

func InitHealthCheckConfigFunc ¶

func InitHealthCheckConfigFunc(nodeMetrics *MetricsNodeConfig,
	predictReserved *Resource) func(string) (*HealthCheckConfig, error)

InitHealthCheckConfigFunc return function to get health check config

func InitPredictConfig ¶

func InitPredictConfig(config *PredictConfig)

InitPredictConfig validate and format predict config

func OfflineOnYarn ¶

func OfflineOnYarn(config *TaskTypeConfig) bool

OfflineOnYarn check if offline job is running on YARN

Types ¶

type ActionConfig ¶

type ActionConfig struct {
	Name    string          `json:"name"`
	ArgsStr json.RawMessage `json:"args"`
	Args    interface{}     `json:"-"`
}

ActionConfig define action config

type AggregationsConfig ¶

type AggregationsConfig struct {
	// MemoryAggregationInterval is the length of a single interval, for
	// which the peak memory usage is computed.
	// Memory usage peaks are aggregated in multiples of this interval. In other words
	// there is one memory usage sample per interval (the maximum usage over that
	// interval).
	MemoryAggregationInterval times.Duration `json:"memory_aggregation_interval"`
	// MemoryAggregationWindowIntervalCount is the number of consecutive MemoryAggregationIntervals
	// which make up the MemoryAggregationWindowLength which in turn is the period for memory
	// usage aggregation by VPA.
	MemoryAggregationIntervalCount int64 `json:"memory_aggregation_interval_count"`
	// MemoryHistogramDecayHalfLife is the amount of time it takes a historical
	// memory usage sample to lose half of its weight. In other words, a fresh
	// usage sample is twice as 'important' as one with age equal to the half
	// life period.
	MemoryHistogramDecayHalfLife times.Duration `json:"memory_histogram_decay_half_life"`
	// CPUHistogramDecayHalfLife is the amount of time it takes a historical
	// CPU usage sample to lose half of its weight.
	CPUHistogramDecayHalfLife times.Duration `json:"cpu_histogram_decay_half_life"`
}

AggregationsConfig is used to configure aggregation behaviour.

type AlarmChannel ¶

type AlarmChannel struct {
	LocalAlarm  *LocalAlarm  `json:"local"`
	RemoteAlarm *RemoteAlarm `json:"remote"`
}

AlarmChannel struct is used to show alarm channel

type AlarmConfig ¶

type AlarmConfig struct {
	Enable                 bool           `json:"enable"`
	Cluster                string         `json:"cluster"`
	MessageBatch           int            `json:"message_batch"`
	MessageDelay           times.Duration `json:"message_delay"`
	ChannelName            string         `json:"channel_name"`
	IgnoreAlarmWhenSilence bool           `json:"ignore_alarm_when_silence"`
	AlarmChannel           `json:"alarm_channel"`
}

AlarmConfig group options to send alarm message

type CPIManagerConfig ¶

type CPIManagerConfig struct {
	// I want this feature disabled by default
	Enable            bool           `json:"enable"`
	WindowDuration    times.Duration `json:"window_duration"`
	PrometheusAddrStr string         `json:"prometheus_addr"`
	PrometheusAddr    url.URL        `json:"-"`
	MaxJobSpecRange   times.Duration `json:"max_job_spec_range"`
}

CPIManagerConfig show the configuration for cpi detecting

type CaelusConfig ¶

type CaelusConfig struct {
	K8sConfig    K8sConfig          `json:"k8s_config"`
	CheckPoint   CheckPointConfig   `json:"check_point"`
	TaskType     TaskTypeConfig     `json:"task_type"`
	NodeResource NodeResourceConfig `json:"node_resource"`
	// If multiple predicts, the first one is used for real prediction. The left are experiment predicts, caelus will
	// only feeds samples to them and expose predict metrics for them.
	Predicts        []PredictConfig       `json:"predicts"`
	Metrics         MetricsCollectConfig  `json:"metrics"`
	ResourceIsolate ResourceIsolateConfig `json:"resource_isolate"`
	CpiManager      CPIManagerConfig      `json:"cpi_manager"`
	Alarm           AlarmConfig           `json:"alarm"`
	Online          OnlineConfig          `json:"online"`
	DiskQuota       DiskQuotaConfig       `json:"disk_quota"`
}

CaelusConfig is the configuration for Caelus

func ParseJsonConfig ¶

func ParseJsonConfig(configFile string) (*CaelusConfig, error)

ParseJsonConfig parse json config

type CheckPointConfig ¶

type CheckPointConfig struct {
	CheckPointDir   string `json:"check_point_dir"`
	NodeResourceKey string `json:"node_resource_key"`
}

CheckPointConfig group info related to check point, which saving state to local file

type ComponentConfig ¶

type ComponentConfig struct {
	Cgroup  string `json:"cgroup"`
	Command string `json:"command"`
}

ComponentConfig is the config to specific a non-containerized component

type CpuIsolateConfig ¶

type CpuIsolateConfig struct {
	// AutoDetect will enable bt feature if supported, and quota as the second choice.
	AutoDetect bool `json:"auto_detect"`
	// ManagePolicy assigns cpu manage policy
	ManagePolicy   string         `json:"manage_policy"`
	CpuSetConfig   CpuSetConfig   `json:"cpuset_config"`
	CpuQuotaConfig CpuQuotaConfig `json:"cpu_quota_config"`
	// KubeletStatic check if cpu manager policy for kubelet is static
	KubeletStatic bool `json:"-"`
}

CpuIsolateConfig is the configuration for cpu isolation

type CpuQuotaConfig ¶

type CpuQuotaConfig struct {
	// set offline job weights, just for quota policy
	OfflineShare *uint64 `json:"offline_share"`
}

CpuQuotaConfig describe configs for cpu quota isolation policy

type CpuSetConfig ¶

type CpuSetConfig struct {
	// isolate online jobs with offline jobs
	EnableOnlineIsolate bool `json:"enable_online_isolate"`
	// cpu list, which offline job will not be assigned
	ReservedCpus string `json:"reserved_cpus"`
}

CpuSetConfig describe configs for cpuset isolation policy

type CustomMetric ¶

type CustomMetric struct {
	MetricServerAddr string         `json:"metric_server_addr"`
	CollectInterval  times.Duration `json:"collect_interval"`
}

CustomMetric define custom metric config

type DetectActionConfig ¶

type DetectActionConfig struct {
	Detects []*DetectConfig `json:"detects"`
	Actions []*ActionConfig `json:"actions"`
}

DetectActionConfig define detectors and actions

type DetectConfig ¶

type DetectConfig struct {
	Name    string          `json:"name"`
	ArgsStr json.RawMessage `json:"args"`
	Args    interface{}     `json:"-"`
}

DetectConfig define detector config

type Devices ¶

type Devices struct {
	// Ifaces are the network interfaces, e.g. eth0, those not exist or down will be filter out
	// these ifaces will be assigned to metrics.node.ifaces
	IfacesWithProperty []string `json:"ifaces_xxx"`
	Ifaces             []string `json:"-"`
	// DiskNames are the disk names, e.g. sda, vda, those not exist will be filter out
	// these ifaces will be assigned to metrics.node.deviceNames
	DiskNames []string `json:"disk_names"`
}

Devices group network and disk devices

type DiskQuotaConfig ¶

type DiskQuotaConfig struct {
	Enabled     bool           `json:"enabled"`
	CheckPeriod times.Duration `json:"check_period"`
	// such as docker or containerd
	ContainerRuntime string `json:"container_runtime"`
	// quota size just for offline job, online jobs need to announce in annotations
	VolumeSizes map[VolumeType]*DiskQuotaSize `json:"volume_sizes"`
}

DiskQuotaConfig group disk quota configurations

type DiskQuotaSize ¶

type DiskQuotaSize struct {
	Quota      uint64 `json:"quota"`
	Inodes     uint64 `json:"inodes"`
	QuotaUsed  uint64 `json:"-"`
	InodesUsed uint64 `json:"-"`
}

shall we support soft feature ?

type EWMAArgs ¶

type EWMAArgs struct {
	Metric string `json:"metric"`
	Nr     int    `json:"nr"`
}

EWMAArgs group args used for ewma detection

type ExpressionArgs ¶

type ExpressionArgs struct {
	Expression      string         `json:"expression"`
	WarningCount    int            `json:"warning_count"`
	WarningDuration times.Duration `json:"warning_duration"`
}

ExpressionArgs group args used for expression detection

type HealthCheckConfig ¶

type HealthCheckConfig struct {
	Disable      bool         `json:"disable"`
	RuleNodes    []string     `json:"rule_nodes"`
	RuleCheck    RuleCheck    `json:"rule_check"`
	CgroupNotify NotifyConfig `json:"cgroup_notify"`
	// assign the value when initialize
	PredictReserved *Resource `json:"-"`
}

HealthCheckConfig is the config for checking health, such as node load or online job interference

type K8sConfig ¶

type K8sConfig struct {
	KubeletRootDir string `json:"kubelet_root_dir"`
}

K8sConfig show kubernetes config

type LocalAlarm ¶

type LocalAlarm struct {
	Executor string `json:"executor"`
}

LocalAlarm struct is used to describe local alarm body

type LocalPredictConfig ¶

type LocalPredictConfig struct {
	// Minimum CPU recommendation for a pod
	PodMinCPUMillicores float64 `json:"pod_min_cpu_millicores"`
	// Minimum memory recommendation for a pod
	PodMinMemoryMb float64 `json:"pod_min_memory_mb"`
	// Fraction of usage added as the safety margin to the recommended request
	SafetyMarginFraction float64 `json:"safety_margin_fraction"`
	// cpu usage percentile to recommend cpu resource
	CPUPercentile float64 `json:"cpu_percentile"`
	// memory usage percentile to recommend cpu resource
	MemoryPeaksPercentile float64 `json:"memory_peaks_percentile"`
	// AggregationsConfig is used to configure aggregation behaviour.
	AggregationsConfig `json:",inline"`
	// Enable tune cpu weight if cpu usage is anomaly
	EnableTuneCPUWeight bool `json:"enable_tune_cpu_weight"`
	// AnomalyDetectorMovingWindow defines how long the moving window of anomaly detector should keep
	AnomalyDetectorMovingWindow times.Duration `json:"anomaly_detector_moving_window"`
	// If detect cpu usage increasing anomaly, the weight of the anomaly sample
	// Base weight is 100
	IncreasingAnomalyWeightFactor int64 `json:"increasing_anomaly_weight_factor"`
	// If detect cpu usage decreasing anomaly, the weight of the anomaly sample
	// Base weight is 100
	DecreasingAnomalyWeightFactor int64 `json:"decreasing_anomaly_weight_factor"`
}

LocalPredictConfig group options for local predictor

type MemoryNotifyConfig ¶

type MemoryNotifyConfig struct {
	Pressures []MemoryPressureNotifyConfig `json:"pressures"`
	Usages    []MemoryUsageNotifyConfig    `json:"usages"`
}

MemoryNotifyConfig describe memory cgroup notify

type MemoryPressureNotifyConfig ¶

type MemoryPressureNotifyConfig struct {
	Cgroups       []string `json:"cgroups"`
	PressureLevel string   `json:"pressure_level"`
	// assign time duration the pressure has kept
	Duration times.Duration `json:"duration"`
	// assign event number in the duration time
	Count int `json:"count"`
}

MemoryPressureNotifyConfig describe memory.pressure_level notify data

type MemoryUsageNotifyConfig ¶

type MemoryUsageNotifyConfig struct {
	Cgroups []string `json:"cgroups"`
	// the distance between limit and threshold
	MarginMb int `json:"margin_mb"`
	// when to handle event after receiving event
	Duration times.Duration `json:"duration"`
}

MemoryUsageNotifyConfig describe memory.usage_in_bytes notify data

type MetricKind ¶

type MetricKind string

MetricKind represent the kind of metrics that cAdvisor exposes.

type MetricsCollectConfig ¶

type MetricsCollectConfig struct {
	Node       MetricsNodeConfig      `json:"node"`
	Container  MetricsContainerConfig `json:"container"`
	Perf       MetricsPerfConfig      `json:"perf"`
	Rdt        MetricsRdtConfig       `json:"rdt"`
	Prometheus MetricsPrometheus      `json:"prometheus"`
}

MetricsCollectConfig is the configuration for metrics collection

type MetricsContainerConfig ¶

type MetricsContainerConfig struct {
	Resources               []string       `json:"resources"`
	Cgroups                 []string       `json:"cgroups"`
	CollectInterval         times.Duration `json:"collect_interval"`
	MaxHousekeepingInterval times.Duration `json:"max_housekeeping_interval"`
}

MetricsContainerConfig is the configuration for container metrics collection

type MetricsNodeConfig ¶

type MetricsNodeConfig struct {
	CollectInterval times.Duration `json:"collect_interval"`
	SystemProcesses []string       `json:"system_processes"`
	OfflineType     string         `json:"-"`
	Devices         `json:",inline"`
}

MetricsNodeConfig is the configuration for node metrics collection

type MetricsPerfConfig ¶

type MetricsPerfConfig struct {
	Disable         bool           `json:"disable"`
	CollectInterval times.Duration `json:"collect_interval"`
	CollectDuration times.Duration `json:"collect_duration"`
	IgnoredCgroups  []string       `json:"ignored_cgroups"`
}

MetricsPerfConfig is the configuration for perf metrics collection

type MetricsPrometheus ¶

type MetricsPrometheus struct {
	CollectInterval times.Duration `json:"collect_interval"`
	// if need to show these metrics with the prefix "caelus_"
	DisableShow bool              `json:"disable_show"`
	Items       []*PrometheusData `json:"items"`
}

MetricsPrometheus describe how to collect prometheus metrics

type MetricsRdtConfig ¶

type MetricsRdtConfig struct {
	Disable         bool           `json:"disable"`
	RdtCommand      string         `json:"rdt_command"`
	CollectInterval times.Duration `json:"collect_interval"`
	CollectDuration times.Duration `json:"collect_duration"`
	ExecuteInterval times.Duration `json:"execute_interval"`
}

MetricsRdtConfig is the configuration for RDT metrics collection

type MetricsSource ¶

type MetricsSource struct {
	CheckInterval times.Duration `json:"check_interval"`
	// MetricsCommand is a command to get job's current metrics value, it must return the format data, like:
	// Its output is {"code":0,"msg":"success","data":[{"job_name":"","metric_name":"","key1":xx,"key2":xx,...}]}
	MetricsCommand []string `json:"metrics_command"`
	// if need to run chroot when executing metrics command
	CmdNeedChroot *bool `json:"cmd_need_chroot"`
	// MetricsURL is a url to get the job's metrics value, it must return the format data, like:
	// Its output is <slo>,<metrics>.
	MetricsURL string `json:"metrics_url"`
}

MetricsSource define metrics source of online services

type NodeResourceConfig ¶

type NodeResourceConfig struct {
	Disable        bool           `json:"disable"`
	UpdateInterval times.Duration `json:"update_interval"`
	OfflineType    string         `json:"-"`
	// DisableKillIfNormal does not kill pod when no resource in conflicting status
	DisableKillIfNormal         bool                   `json:"disable_kill_if_normal"`
	OnlyKillIfIncompressibleRes bool                   `json:"only_kill_if_incompressible_res"`
	YarnConfig                  YarnNodeResourceConfig `json:"yarn_config"`
	Silence                     SilenceConfig          `json:"silence"`
}

NodeResourceConfig group configuration for node

type NotifyConfig ¶

type NotifyConfig struct {
	MemoryCgroup *MemoryNotifyConfig `json:"memory_cgroup"`
}

NotifyConfig monitor resource by kernel notify

type OfflineJobs ¶

type OfflineJobs struct {
	Metadata interface{}
	Request  v1.ResourceList
	Used     v1.ResourceList
	State    string
}

OfflineJobs describe offline job features, such as resource and state

type OnlineConfig ¶

type OnlineConfig struct {
	Enable       bool              `json:"enable"`
	PidToCgroup  PidToCgroup       `json:"pid_to_cgroup"`
	Jobs         []OnlineJobConfig `json:"jobs"`
	CustomMetric CustomMetric      `json:"custom_metric"`
}

OnlineConfig show online job configuration

type OnlineJobConfig ¶

type OnlineJobConfig struct {
	Name string `json:"name"`
	// JobCommand is job's command expression
	Command string          `json:"command"`
	Metrics []OnlineMetrics `json:"metrics"`
}

OnlineJobConfig is the configuration of a online job

type OnlineMetrics ¶

type OnlineMetrics struct {
	Name   string        `json:"name"`
	Source MetricsSource `json:"source"`
}

OnlineMetrics define metric config of online services

type OverCommit ¶

type OverCommit struct {
	Enable            bool                  `json:"enable"`
	OverCommitPercent float64               `json:"over_commit_percent"`
	Periods           []TimeRangeOverCommit `json:"periods"`
}

OverCommit set overcommit percent for resource

type PathInfo ¶

type PathInfo struct {
	Path string
	Size *DiskQuotaSize
	//if we set share limit, SharedInfo containers project id name
	//if not, SharedInfo is nil
	SharedInfo *SharedInfo
}

PathInfo group path and quota options

type PidToCgroup ¶

type PidToCgroup struct {
	// PidCheckInterval could be zero
	PidCheckInterval    times.Duration `json:"pids_check_interval"`
	CgroupCheckInterval times.Duration `json:"cgroup_check_interval"`
	BatchNum            int            `json:"batch_num"`
}

PidToCgroup define online config of pid check

type PredictConfig ¶

type PredictConfig struct {
	Disable       bool           `json:"disable"`
	CheckInterval times.Duration `json:"check_interval"`
	// PredictType must in [local, localv2, vpa]
	PredictType       string   `json:"predict_type"`
	PredictServerAddr string   `json:"predict_server_addr"`
	ReserveResource   Resource `json:"reserve_resource"`
	// PrintInterval is the the time interval to print predict detailed log for debug
	PrintInterval times.Duration `json:"print_interval"`
	// LocalPredictConfig is the configuration for local predictor
	LocalPredictConfig `json:",inline"`
	// The type value of online predict metrics caelus_node_resource{type=""}
	// It's used by experiment predict
	PredictMetricsType string `json:"predict_metrics_type"`
}

PredictConfig group options for predictor

type PrometheusData ¶

type PrometheusData struct {
	Address      string      `json:"address"`
	Collect      []string    `json:"collect"`
	NoCollect    []string    `json:"no_collect"`
	CollectMap   sets.String `json:"-"`
	NoCollectMap sets.String `json:"-"`
}

PrometheusData describe which metrics to collect or not collect

type RangeResource ¶

type RangeResource struct {
	CPUMilli RangeState `json:"cpu_milli"`
	MemMB    RangeState `json:"mem_mb"`
}

RangeResource is used to check if the resource changed is available there is no need to update node resource when changed quantity is small.

type RangeState ¶

type RangeState struct {
	// Minimum is the range quantity
	Min float64 `json:"min"`
	// Maximum is the maxisum range quantity
	Max float64 `json:"max"`
	// Ratio used to calculate change range quantity
	Ratio float64 `json:"ratio"`
}

RangeState describe range resource to drop little changing

type RemoteAlarm ¶

type RemoteAlarm struct {
	RemoteWebhook string `json:"remoteWebhook"`
	WeWorkWebhook string `json:"weWorkWebhook"`
}

RemoteAlarm struct is used to describe remote alarm body

type Resource ¶

type Resource struct {
	CpuMilli      *float64 `json:"cpu_milli"`
	MemMB         *float64 `json:"mem_mb"`
	CpuPercentStr string   `json:"cpu_percent"`
	CpuPercent    *float64 `json:"-"`
	MemPercentStr string   `json:"mem_percent"`
	MemPercent    *float64 `json:"-"`
}

Resource is the cpu and memory configuration

type ResourceIsolateConfig ¶

type ResourceIsolateConfig struct {
	Disable         bool            `json:"disable"`
	ResourceDisable map[string]bool `json:"resource_disable"`
	UpdatePeriod    times.Duration  `json:"update_period"`
	// disks need to set io weight
	DiskNames []string `json:"-"`
	// eni iface for eni network pods
	EniIface string `json:"-"`
	// normal iface for host network and global route network pods
	Iface              string            `json:"-"`
	CpuConfig          CpuIsolateConfig  `json:"cpu_config"`
	OnlineType         string            `json:"-"`
	OfflineType        string            `json:"-"`
	ExternalComponents []ComponentConfig `json:"external_components"`
}

ResourceIsolateConfig is the offline job quota limit configuration for resources

type ResourceUpdateEvent ¶

type ResourceUpdateEvent struct {
	ConflictRes []string
	Reason      string
}

ResourceUpdateEvent define the event when need to update offline resources

type RoundOffResource ¶

type RoundOffResource struct {
	CPUMilli float64 `json:"cpu_milli"`
	MemMB    float64 `json:"mem_mb"`
}

RoundOffResource is used to format resource quantity, such as the origin memory is 1027Mi, we can get 1024Mi after rounding off, making memory 2 times of 512Mi

type RuleCheck ¶

type RuleCheck struct {
	ContainerRules []*RuleCheckConfig `json:"container_rules"`
	NodeRules      []*RuleCheckConfig `json:"node_rules"`
	AppRules       []*RuleCheckConfig `json:"app_rules"`
}

RuleCheck group all rules

type RuleCheckConfig ¶

type RuleCheckConfig struct {
	Name    string   `json:"name"`
	Metrics []string `json:"metrics"`
	// CheckInterval describes the interval to trigger detection
	CheckInterval times.Duration `json:"check_interval"`
	// HandleInterval describes the interval to handle conflicts after detecting abnormal result
	HandleInterval times.Duration `json:"handle_interval"`
	// RecoverInterval describes the interval to recover conflicts after detecting normal result
	RecoverInterval times.Duration        `json:"recover_interval"`
	Rules           []*DetectActionConfig `json:"rules"`
	RecoverRules    []*DetectActionConfig `json:"recover_rules"`
}

RuleCheckConfig define the rule config

type SharedInfo ¶

type SharedInfo struct {
	PodName string
}

SharedInfo indicate a path has shared quota or not

type SilenceConfig ¶

type SilenceConfig struct {
	// [0:00:00, 5:00:00]
	Periods [][2]times.SecondsInDay `json:"periods"`
	// disable schedule before silence
	AheadOfUnSchedule times.Duration `json:"ahead_of_unSchedule"`
}

SilenceConfig describe the period time, do not allow running offline jobs

type TaskTypeConfig ¶

type TaskTypeConfig struct {
	OnlineType  string `json:"online_type"`
	OfflineType string `json:"offline_type"`
}

TaskTypeConfig show the online and offline task type, such as offline is yarn on k8s.

type TimeRangeOverCommit ¶

type TimeRangeOverCommit struct {
	Range             [2]times.SecondsInDay `json:"range"`
	OverCommitPercent float64               `json:"over_commit_percent"`
}

TimeRangeOverCommit set overcommit percent for resource in specific time range

type VolumeType ¶

type VolumeType string

var (
	VolumeTypeRootFs     VolumeType = "rootFs"
	VolumeTypeEmptyDir   VolumeType = "emptyDir"
	VolumeTypeHostPath   VolumeType = "hostPath"
	AvailableVolumeTypes            = sets.NewString(
		VolumeTypeRootFs.String(),
		VolumeTypeEmptyDir.String(),
		VolumeTypeHostPath.String())
)

func (VolumeType) String ¶

func (vt VolumeType) String() string

String output volume type to string

type YarnDisksConfig ¶

type YarnDisksConfig struct {
	// RatioToCore translate disk space to core numbers
	RatioToCore      int64 `json:"ratio_to_core"`
	MultiDiskDisable bool  `json:"multi_disk_disable"`
	// DiskMinCapacityGb drop disks with little disk space
	DiskMinCapacityGb int64          `json:"disk_min_capacity_gb"`
	SpaceCheckEnabled bool           `json:"space_check_enabled"`
	SpaceCheckPeriod  times.Duration `json:"space_check_period"`
	// SpaceCheckReservedGb is used for checking disk space, it will start cleaning space if free disk space is less
	// than SpaceCheckReservedGb
	SpaceCheckReservedGb      int64   `json:"space_check_reserved_gb"`
	SpaceCheckReservedPercent float64 `json:"space_check_reserved_percent"`
	SpaceCleanDisable         bool    `json:"space_clean_disable"`
	// SpaceCleanJustData is enabled, it will just restart nodemanager pod to release /data space, and
	// do not care other disk partitions
	SpaceCleanJustData bool `json:"space_clean_just_data"`
	// OfflineExitedCleanDelay is used to clean nodemanager local or log path when offline pod exited for long time
	OfflineExitedCleanDelay times.Duration `json:"offline_exited_clean_delay"`
}

YarnDisksConfig group disks config

type YarnNodeResourceConfig ¶

type YarnNodeResourceConfig struct {
	// CapacityIncInterval is used to make nodemanager capacity increase not very frequently
	CapacityIncInterval times.Duration    `json:"capacity_inc_interval"`
	NMServer            string            `json:"nm_server"`
	NMReserve           Resource          `json:"nm_reserve"`
	ResourceRoundOff    RoundOffResource  `json:"resource_roundoff"`
	ResourceRange       RangeResource     `json:"resource_range"`
	ScheduleServerPort  string            `json:"schedule_server_port"`
	PortAutoDetect      bool              `json:"port_auto_detect"`
	Properties          map[string]string `json:"properties"`
	Disks               YarnDisksConfig   `json:"disks"`
	ShimServer          string            `json:"shim_server"`
	CpuOverCommit       OverCommit        `json:"cpu_over_commit"`
}

YarnNodeResourceConfig is used to show yarn related configuration

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL