stats

package
v1.4.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 5, 2025 License: MIT Imports: 31 Imported by: 2

Documentation

Overview

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.

Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.

  • Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved.

Index

Constants

View Source
const (
	KindCounter = "counter"
	KindTotal   = "total"
	KindSize    = "size"

	KindSpecial = "special" // uptime

	KindGauge              = "gauge"  // disk I/O
	KindComputedThroughput = "compbw" // disk read/write throughput

	KindLatency    = "latency" // computed internally over 'periodic.stats_time' (milliseconds)
	KindThroughput = "bw"      // ditto (MB/s)
)

enum: `statsValue` kinds

View Source
const (
	VlabBucket    = "bucket"
	VlabXkind     = "xkind"
	VlabMountpath = "mountpath"
)

variable labels

View Source
const (
	// NOTE semantics:
	// - counts all warm GETs
	// - counts all cold GETs (when remote GET is followed by storing new object (or, new object version) locally)
	// - does NOT count internal GetObjReader calls (e.g., by copy or transform jobs)
	// - see also:
	//   - ais/backend/common
	//   - rgetstats
	GetCount = "get.n"

	PutCount    = "put.n" // ditto PUT(object) count = (all PUTs including remote)
	HeadCount   = "head.n"
	AppendCount = "append.n"
	DeleteCount = "del.n"
	RenameCount = "ren.n"
	ListCount   = "lst.n" // list-objects

	GetBlobCount = "getblob.n"

	// error counters
	// see also: `Inc`, `regCommon`, `ioErrNames`
	ErrGetCount    = errPrefix + GetCount
	ErrPutCount    = errPrefix + PutCount
	ErrHeadCount   = errPrefix + HeadCount
	ErrAppendCount = errPrefix + AppendCount
	ErrDeleteCount = errPrefix + DeleteCount
	ErrRenameCount = errPrefix + RenameCount
	ErrListCount   = errPrefix + ListCount

	ErrGetBlobCount = errPrefix + GetBlobCount

	ErrKaliveCount    = errPrefix + "kalive.n"
	ErrHTTPWriteCount = errPrefix + "http.write.n"
)

KindCounter: all basic counters are accompanied by the corresponding (errPrefix + kind) error count: e.g.: "get.n" => "err.get.n", "put.n" => "err.put.n", etc.

View Source
const (
	ListLatency      = "lst.ns"
	KeepAliveLatency = "kalive.ns"
)

KindLatency (most latency metrics are target-only - see target_stats) latency stats have numSamples used to compute average latency

View Source
const (
	// KindSpecial
	Uptime = "up.ns.time"

	// KindGauge, cos.NodeStateFlags enum
	NodeAlerts = cos.NodeAlerts // "state.flags"
)
View Source
const (
	// KindThroughput
	GetThroughput = "get.bps" // bytes per second
	PutThroughput = "put.bps" // ditto

	// same as above via `.cumulative`
	GetSize = "get.size"
	PutSize = "put.size"

	// common latencies
	AppendLatency    = "append.ns"
	GetRedirLatency  = "get.redir.ns"
	PutRedirLatency  = "put.redir.ns"
	HeadLatencyTotal = "head.ns.total"

	// out-of-band
	VerChangeCount = "ver.change.n"
	VerChangeSize  = "ver.change.size"

	// errors (note common prefix convention)
	ErrPutCksumCount = errPrefix + "put.cksum.n"
	ErrFSHCCount     = errPrefix + "fshc.n"

	// IO errors (must have ioErrPrefix)
	IOErrGetCount    = ioErrPrefix + "get.n"
	IOErrPutCount    = ioErrPrefix + "put.n"
	IOErrDeleteCount = ioErrPrefix + "del.n"

	// KindLatency
	GetLatency      = "get.ns"
	GetLatencyTotal = "get.ns.total"

	PutLatency         = "put.ns"
	PutLatencyTotal    = "put.ns.total"     // "pure" remote PUT latency
	PutE2ELatencyTotal = "e2e.put.ns.total" // end to end (e2e) PUT latency

	// rate limit (409, 503)
	RatelimGetRetryCount        = "ratelim.retry.get.n"
	RatelimGetRetryLatencyTotal = "ratelim.retry.get.ns.total"
	RatelimPutRetryCount        = "ratelim.retry.put.n"
	RatelimPutRetryLatencyTotal = "ratelim.retry.put.ns.total"

	// compare w/ common `DeleteCount`
	RemoteDeletedDelCount = core.RemoteDeletedDelCount
)

1. datapath (counters, sizes, latencies) and common errors

View Source
const (
	LcacheCollisionCount = core.LcacheCollisionCount
	LcacheEvictedCount   = core.LcacheEvictedCount
	LcacheErrCount       = core.LcacheErrCount
	LcacheFlushColdCount = core.LcacheFlushColdCount
)

2. object metadata in memory

View Source
const (
	// blob downloader
	GetBlobSize = "getblob.size"

	// LRU eviction
	LruEvictCount = "lru.evict.n"
	LruEvictSize  = "lru.evict.size"

	// space cleanup
	CleanupStoreCount = "cleanup.store.n"
	CleanupStoreSize  = "cleanup.store.size"

	// distributed sort (ext/dsort)
	DsortCreationReqCount    = "dsort.creation.req.n"
	DsortCreationRespCount   = "dsort.creation.resp.n"
	DsortCreationRespLatency = "dsort.creation.resp.ns"
	DsortExtractShardDskCnt  = "dsort.extract.shard.dsk.n"
	DsortExtractShardMemCnt  = "dsort.extract.shard.mem.n"
	DsortExtractShardSize    = "dsort.extract.shard.size" // uncompressed

	// ETL (ext/etl)
	ETLInlineCount         = "etl.inline.n"
	ETLInlineLatencyTotal  = "etl.inline.ns.total"
	ETLInlineSize          = "etl.inline.size"
	ETLOfflineCount        = "etl.offline.n"
	ETLOfflineLatencyTotal = "etl.offline.ns.total"
	ETLOfflineSize         = "etl.offline.size"

	// downloader (ext/dload)
	// (not to confuse with blob downloader)
	DloadSize         = "dl.size"
	DloadLatencyTotal = "dl.ns.total"
	ErrDloadCount     = errPrefix + "dl.n"

	// get-batch (x-moss)
	GetBatchCount     = "getbatch.n"
	GetBatchObjCount  = "getbatch.obj.n"
	GetBatchFileCount = "getbatch.file.n"
	GetBatchObjSize   = "getbatch.obj.size"
	GetBatchFileSize  = "getbatch.file.size"

	GetBatchRxWaitTotal   = "getbatch.rxwait.ns"
	GetBatchThrottleTotal = "getbatch.throttle.ns"

	ErrGetBatchCount     = errPrefix + "getbatch.n"
	GetBatchSoftErrCount = errPrefix + "soft.getbatch.n"
)

3. xactions (jobs)

View Source
const (
	ConstlabNode = "node_id"
)

static labels

View Source
const (
	NgrPrompt = "Number of goroutines"
)

Variables

View Source
var (
	BckVlabs      = []string{VlabBucket}
	EmptyBckVlabs = map[string]string{VlabBucket: ""}

	BckXlabs      = []string{VlabBucket, VlabXkind}
	EmptyBckXlabs = map[string]string{VlabBucket: "", VlabXkind: ""}
)

Functions

func IsErrMetric added in v1.3.16

func IsErrMetric(name string) bool

func IsIOErrMetric added in v1.3.24

func IsIOErrMetric(name string) bool

func LatencyToCounter added in v1.3.24

func LatencyToCounter(latName string) string

see also base.init() in ais/backend/common

func SizeToThroughputCount added in v1.3.26

func SizeToThroughputCount(name, kind string) (string, string)

Types

type Cluster added in v1.3.16

type Cluster struct {
	Proxy  *Node            `json:"proxy"`
	Target map[string]*Node `json:"target"`
}

type ClusterRaw added in v1.3.16

type ClusterRaw struct {
	Proxy  *Node           `json:"proxy"`
	Target cos.JSONRawMsgs `json:"target"`
}

type Extra added in v1.3.24

type Extra struct {
	Labels  cos.StrKVs // static or (same) constant
	StrName string
	Help    string
	VarLabs []string // variable labels: {VlabBucket, ...}
}

type Node added in v1.3.16

type Node struct {
	Snode   *meta.Snode `json:"snode"`
	Tracker copyTracker `json:"tracker"`
	Tcdf    fs.Tcdf     `json:"capacity"`
}

REST API

type NodeStatus added in v1.3.16

type NodeStatus struct {
	RebSnap *core.Snap `json:"rebalance_snap,omitempty"`
	// assorted props
	Status         string `json:"status"`
	DeploymentType string `json:"deployment"`
	Version        string `json:"ais_version"`  // major.minor.build
	BuildTime      string `json:"build_time"`   // YYYY-MM-DD HH:MM:SS-TZ
	K8sPodName     string `json:"k8s_pod_name"` // (via ais-k8s/operator `MY_POD` env var)
	Reserved1      string `json:"reserved1,omitempty"`
	Reserved2      string `json:"reserved2,omitempty"`
	Node
	Cluster     cos.NodeStateInfo
	MemCPUInfo  apc.MemCPUInfo `json:"sys_info"`
	SmapVersion int64          `json:"smap_version,string"`
	Reserved3   int64          `json:"reserved3,omitempty"`
	Reserved4   int64          `json:"reserved4,omitempty"`
}

(includes stats.Node and more; NOTE: direct API call w/ no proxying)

type Prunner

type Prunner struct {
	// contains filtered or unexported fields
}

func (*Prunner) Add

func (r *Prunner) Add(name string, val int64)

func (*Prunner) AddWith added in v1.3.26

func (r *Prunner) AddWith(nvs ...cos.NamedVal64)

(prometheus with variable labels)

func (*Prunner) ClrFlag added in v1.3.24

func (r *Prunner) ClrFlag(name string, clr cos.NodeStateFlags)

func (*Prunner) Get

func (r *Prunner) Get(name string) (val int64)

func (*Prunner) GetMetricNames added in v1.3.16

func (r *Prunner) GetMetricNames() cos.StrKVs

func (*Prunner) GetStats added in v1.3.16

func (r *Prunner) GetStats() *Node

func (*Prunner) Inc added in v1.3.16

func (r *Prunner) Inc(name string)

func (*Prunner) IncBck added in v1.3.26

func (r *Prunner) IncBck(name string, bck *cmn.Bck)

(ditto)

func (*Prunner) IncWith added in v1.3.26

func (r *Prunner) IncWith(name string, vlabs map[string]string)

(ditto; for convenience)

func (*Prunner) Init

func (r *Prunner) Init(p core.Node) *atomic.Bool

All stats that proxy currently has are CoreStats which are registered at startup

func (*Prunner) Name

func (r *Prunner) Name() string

func (*Prunner) PromHandler added in v1.3.26

func (*Prunner) PromHandler() http.Handler

func (*Prunner) RegExtMetric added in v1.3.24

func (r *Prunner) RegExtMetric(snode *meta.Snode, name, kind string, extra *Extra)

func (*Prunner) ResetStats added in v1.3.16

func (r *Prunner) ResetStats(errorsOnly bool)

TODO: reset prometheus as well (assuming, there's an API)

func (*Prunner) Run

func (r *Prunner) Run() error

func (*Prunner) SetClrFlag added in v1.3.24

func (r *Prunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)

func (*Prunner) SetFlag added in v1.3.24

func (r *Prunner) SetFlag(name string, set cos.NodeStateFlags)

func (*Prunner) StartedUp

func (r *Prunner) StartedUp() bool

func (*Prunner) Stop

func (r *Prunner) Stop(err error)

type Tracker

type Tracker interface {
	cos.StatsUpdater

	StartedUp() bool

	PromHandler() http.Handler

	Inc(metric string)
	IncWith(metric string, vlabs map[string]string)
	IncBck(name string, bck *cmn.Bck)

	GetStats() *Node

	ResetStats(errorsOnly bool)
	GetMetricNames() cos.StrKVs // (name, kind) pairs

	// for aistore modules, to add their respective metrics
	RegExtMetric(node *meta.Snode, name, kind string, extra *Extra)
}

type Trunner

type Trunner struct {
	Tcdf fs.Tcdf `json:"cdf"`
	// contains filtered or unexported fields
}

func NewTrunner added in v1.3.16

func NewTrunner(t core.Target) *Trunner

func (*Trunner) Add

func (r *Trunner) Add(name string, val int64)

func (*Trunner) AddWith added in v1.3.26

func (r *Trunner) AddWith(nvs ...cos.NamedVal64)

(prometheus with variable labels)

func (*Trunner) ClrFlag added in v1.3.24

func (r *Trunner) ClrFlag(name string, clr cos.NodeStateFlags)

func (*Trunner) Get

func (r *Trunner) Get(name string) (val int64)

func (*Trunner) GetMetricNames added in v1.3.16

func (r *Trunner) GetMetricNames() cos.StrKVs

func (*Trunner) GetStats added in v1.3.16

func (r *Trunner) GetStats() (ds *Node)

func (*Trunner) Inc added in v1.3.16

func (r *Trunner) Inc(name string)

func (*Trunner) IncBck added in v1.3.26

func (r *Trunner) IncBck(name string, bck *cmn.Bck)

(ditto)

func (*Trunner) IncWith added in v1.3.26

func (r *Trunner) IncWith(name string, vlabs map[string]string)

(ditto; for convenience)

func (*Trunner) Init

func (r *Trunner) Init() *atomic.Bool

func (*Trunner) InitCDF added in v1.3.16

func (r *Trunner) InitCDF(config *cmn.Config) error

func (*Trunner) Name

func (r *Trunner) Name() string

func (*Trunner) PromHandler added in v1.3.26

func (*Trunner) PromHandler() http.Handler

func (*Trunner) RegDiskMetrics

func (r *Trunner) RegDiskMetrics(snode *meta.Snode, disk string)

func (*Trunner) RegExtMetric added in v1.3.24

func (r *Trunner) RegExtMetric(snode *meta.Snode, name, kind string, extra *Extra)

func (*Trunner) RegMetrics

func (r *Trunner) RegMetrics(snode *meta.Snode)

target-specific metrics, in addition to common and already added via regCommon()

func (*Trunner) ResetStats added in v1.3.16

func (r *Trunner) ResetStats(errorsOnly bool)

TODO: reset prometheus as well (assuming, there's an API)

func (*Trunner) Run

func (r *Trunner) Run() error

func (*Trunner) SetClrFlag added in v1.3.24

func (r *Trunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)

func (*Trunner) SetFlag added in v1.3.24

func (r *Trunner) SetFlag(name string, set cos.NodeStateFlags)

func (*Trunner) Standby

func (r *Trunner) Standby(v bool)

func (*Trunner) StartedUp

func (r *Trunner) StartedUp() bool

func (*Trunner) Stop

func (r *Trunner) Stop(err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL