Documentation
¶
Overview ¶
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Package stats provides methods and functionality to register, track, log, and export metrics that, for the most part, include "counter" and "latency" kinds.
- Copyright (c) 2018-2026, NVIDIA CORPORATION. All rights reserved.
Index ¶
- Constants
- Variables
- func IsErrMetric(name string) bool
- func IsIOErrMetric(name string) bool
- func LatencyToCounter(latName string) string
- func SizeToThroughputCount(name, kind string) (string, string)
- type Cluster
- type ClusterRaw
- type Extra
- type Node
- type NodeStatus
- type Prunner
- func (r *Prunner) Add(name string, val int64)
- func (r *Prunner) AddWith(nvs ...cos.NamedVal64)
- func (r *Prunner) ClrFlag(name string, clr cos.NodeStateFlags)
- func (r *Prunner) Get(name string) (val int64)
- func (r *Prunner) GetMetricNames() cos.StrKVs
- func (r *Prunner) GetStats() *Node
- func (r *Prunner) Inc(name string)
- func (r *Prunner) IncBck(name string, bck *cmn.Bck)
- func (r *Prunner) IncWith(name string, vlabs map[string]string)
- func (r *Prunner) Init(p core.Node) *atomic.Bool
- func (r *Prunner) Name() string
- func (r *Prunner) Observe(name string, val float64)
- func (*Prunner) PromHandler() http.Handler
- func (r *Prunner) RegExtMetric(snode *meta.Snode, name, kind string, extra *Extra)
- func (r *Prunner) ResetStats(errorsOnly bool)
- func (r *Prunner) Run() error
- func (r *Prunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)
- func (r *Prunner) SetFlag(name string, set cos.NodeStateFlags)
- func (r *Prunner) StartedUp() bool
- func (r *Prunner) Stop(err error)
- type Tracker
- type Trunner
- func (r *Trunner) Add(name string, val int64)
- func (r *Trunner) AddWith(nvs ...cos.NamedVal64)
- func (r *Trunner) ClrFlag(name string, clr cos.NodeStateFlags)
- func (r *Trunner) Get(name string) (val int64)
- func (r *Trunner) GetMetricNames() cos.StrKVs
- func (r *Trunner) GetStats() (ds *Node)
- func (r *Trunner) Inc(name string)
- func (r *Trunner) IncBck(name string, bck *cmn.Bck)
- func (r *Trunner) IncWith(name string, vlabs map[string]string)
- func (r *Trunner) Init() *atomic.Bool
- func (r *Trunner) InitCDF(config *cmn.Config) error
- func (r *Trunner) Name() string
- func (r *Trunner) Observe(name string, val float64)
- func (*Trunner) PromHandler() http.Handler
- func (r *Trunner) RegDiskMetrics(snode *meta.Snode, disk string)
- func (r *Trunner) RegExtMetric(snode *meta.Snode, name, kind string, extra *Extra)
- func (r *Trunner) RegMetrics(snode *meta.Snode)
- func (r *Trunner) ResetStats(errorsOnly bool)
- func (r *Trunner) Run() error
- func (r *Trunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)
- func (r *Trunner) SetFlag(name string, set cos.NodeStateFlags)
- func (r *Trunner) Standby(v bool)
- func (r *Trunner) StartedUp() bool
- func (r *Trunner) Stop(err error)
Constants ¶
const ( KindCounter = "counter" KindTotal = "total" KindSize = "size" KindSpecial = "special" // uptime KindGauge = "gauge" // disk I/O KindHistogram = "histogram" // direct latency metrics (no internal aggregation) KindComputedThroughput = "compbw" // disk read/write throughput KindLatency = "latency" // computed internally over 'periodic.stats_time' (milliseconds) KindThroughput = "bw" // ditto (MB/s) )
enum: `statsValue` kinds
const ( VlabBucket = "bucket" VlabXkind = "xkind" VlabMountpath = "mountpath" )
variable labels
const ( // NOTE semantics: // - counts all warm GETs // - counts all cold GETs (when remote GET is followed by storing new object (or, new object version) locally) // - does NOT count internal GetObjReader calls (e.g., by copy or transform jobs) // - see also: // - ais/backend/common // - rgetstats GetCount = "get.n" PutCount = "put.n" // ditto PUT(object) count = (all PUTs including remote) HeadCount = "head.n" AppendCount = "append.n" DeleteCount = "del.n" RenameCount = "ren.n" ListCount = "lst.n" // list-objects GetBlobCount = "getblob.n" // error counters // see also: `Inc`, `regCommon`, `ioErrNames` ErrGetCount = errPrefix + GetCount ErrPutCount = errPrefix + PutCount ErrHeadCount = errPrefix + HeadCount ErrAppendCount = errPrefix + AppendCount ErrDeleteCount = errPrefix + DeleteCount ErrRenameCount = errPrefix + RenameCount ErrListCount = errPrefix + ListCount ErrGetBlobCount = errPrefix + GetBlobCount ErrKaliveCount = errPrefix + "kalive.n" ErrHTTPWriteCount = errPrefix + "http.write.n" )
KindCounter: all basic counters are accompanied by the corresponding (errPrefix + kind) error count: e.g.: "get.n" => "err.get.n", "put.n" => "err.put.n", etc.
const ( ListLatency = "lst.ns" KeepAliveLatency = "kalive.ns" )
KindLatency (most latency metrics are target-only - see target_stats) latency stats have numSamples used to compute average latency
const ( // KindSpecial Uptime = "up.ns.time" // KindGauge, cos.NodeStateFlags enum NodeAlerts = cos.NodeAlerts // "state.flags" )
const ( AuthTotalCount = "auth.total.n" AuthSuccessCount = "auth.success.n" AuthFailCount = "auth.fail.n" AuthNoTokenCount = "auth.notoken.n" AuthInvalidTokenCount = "auth.invalidtoken.n" AuthInvalidIssCount = "auth.invalidiss.n" AuthInvalidKidCount = "auth.invalidkid.n" AuthExpiredTokenCount = "auth.expiredtoken.n" ACLTotalCount = "acl.total.n" ACLDeniedCount = "acl.denied.n" AuthIssHist = "auth.iss" AuthJWKSHist = "auth.jwks" )
Authentication and Authorization metrics
const ( // KindThroughput GetThroughput = "get.bps" // bytes per second PutThroughput = "put.bps" // ditto // same as above via `.cumulative` GetSize = "get.size" PutSize = "put.size" // common latencies AppendLatency = "append.ns" GetRedirLatency = "get.redir.ns" PutRedirLatency = "put.redir.ns" HeadLatencyTotal = "head.ns.total" // out-of-band VerChangeCount = "ver.change.n" VerChangeSize = "ver.change.size" // errors (note common prefix convention) ErrPutCksumCount = errPrefix + "put.cksum.n" ErrFSHCCount = errPrefix + "fshc.n" // IO errors (must have ioErrPrefix) IOErrGetCount = ioErrPrefix + "get.n" IOErrPutCount = ioErrPrefix + "put.n" IOErrDeleteCount = ioErrPrefix + "del.n" // KindLatency GetLatency = "get.ns" GetLatencyTotal = "get.ns.total" PutLatency = "put.ns" PutLatencyTotal = "put.ns.total" // "pure" remote PUT latency PutE2ELatencyTotal = "e2e.put.ns.total" // end to end (e2e) PUT latency // rate limit (409, 503) RatelimGetRetryCount = "ratelim.retry.get.n" RatelimGetRetryLatencyTotal = "ratelim.retry.get.ns.total" RatelimPutRetryCount = "ratelim.retry.put.n" RatelimPutRetryLatencyTotal = "ratelim.retry.put.ns.total" // compare w/ common `DeleteCount` RemoteDeletedDelCount = core.RemoteDeletedDelCount )
1. datapath (counters, sizes, latencies) and common errors
const ( LcacheCollisionCount = core.LcacheCollisionCount LcacheEvictedCount = core.LcacheEvictedCount LcacheErrCount = core.LcacheErrCount LcacheFlushColdCount = core.LcacheFlushColdCount )
2. object metadata in memory
const ( // blob downloader GetBlobSize = "getblob.size" // LRU eviction LruEvictCount = "lru.evict.n" LruEvictSize = "lru.evict.size" // space cleanup CleanupStoreCount = "cleanup.store.n" CleanupStoreSize = "cleanup.store.size" // ETL (ext/etl) ETLInlineCount = "etl.inline.n" ETLInlineLatencyTotal = "etl.inline.ns.total" ETLInlineSize = "etl.inline.size" ETLOfflineCount = "etl.offline.n" ETLOfflineLatencyTotal = "etl.offline.ns.total" ETLOfflineSize = "etl.offline.size" // downloader (ext/dload) // (not to confuse with blob downloader) DloadSize = "dl.size" DloadLatencyTotal = "dl.ns.total" ErrDloadCount = errPrefix + "dl.n" // get-batch (x-moss) GetBatchCount = "getbatch.n" GetBatchObjCount = "getbatch.obj.n" GetBatchFileCount = "getbatch.file.n" GetBatchObjSize = "getbatch.obj.size" GetBatchFileSize = "getbatch.file.size" // get-batch GFN (get-from-neighbor) recovery stats GetBatchCountGFN = "getbatch.gfn.n" // total GFN requests GetBatchRxWaitTotal = "getbatch.rxwait.ns" GetBatchThrottleTotal = "getbatch.throttle.ns" ErrGetBatchCount = errPrefix + "getbatch.n" GetBatchSoftErrCount = errPrefix + "soft.getbatch.n" GetBatchErrCountGFN = errPrefix + "gfn.getbatch.n" )
3. xactions (jobs)
const (
ConstlabNode = "node_id"
)
static labels
const (
NgrPrompt = "Number of goroutines"
)
Variables ¶
var ( BckVlabs = []string{VlabBucket} EmptyBckVlabs = map[string]string{VlabBucket: ""} BckXlabs = []string{VlabBucket, VlabXkind} EmptyBckXlabs = map[string]string{VlabBucket: "", VlabXkind: ""} )
Functions ¶
func IsErrMetric ¶ added in v1.3.16
func IsIOErrMetric ¶ added in v1.3.24
func LatencyToCounter ¶ added in v1.3.24
see also base.init() in ais/backend/common
func SizeToThroughputCount ¶ added in v1.3.26
Types ¶
type ClusterRaw ¶ added in v1.3.16
type ClusterRaw struct {
Proxy *Node `json:"proxy"`
Target cos.JSONRawMsgs `json:"target"`
}
type Node ¶ added in v1.3.16
type Node struct {
Snode *meta.Snode `json:"snode"`
Tracker copyTracker `json:"tracker"`
Tcdf fs.Tcdf `json:"capacity"`
}
REST API
type NodeStatus ¶ added in v1.3.16
type NodeStatus struct {
RebSnap *core.Snap `json:"rebalance_snap,omitempty"`
// assorted props
Status string `json:"status"`
DeploymentType string `json:"deployment"`
Version string `json:"ais_version"` // major.minor.build
BuildTime string `json:"build_time"` // YYYY-MM-DD HH:MM:SS-TZ
K8sPodName string `json:"k8s_pod_name"` // (via ais-k8s/operator `MY_POD` env var)
Reserved1 string `json:"reserved1,omitempty"`
Reserved2 string `json:"reserved2,omitempty"`
Node
Cluster cos.NodeStateInfo
MemCPUInfo apc.MemCPUInfo `json:"sys_info"`
SmapVersion int64 `json:"smap_version,string"`
Reserved3 int64 `json:"reserved3,omitempty"`
Reserved4 int64 `json:"reserved4,omitempty"`
}
(includes stats.Node and more; NOTE: direct API call w/ no proxying)
type Prunner ¶
type Prunner struct {
// contains filtered or unexported fields
}
func (*Prunner) AddWith ¶ added in v1.3.26
func (r *Prunner) AddWith(nvs ...cos.NamedVal64)
(prometheus with variable labels)
func (*Prunner) ClrFlag ¶ added in v1.3.24
func (r *Prunner) ClrFlag(name string, clr cos.NodeStateFlags)
func (*Prunner) GetMetricNames ¶ added in v1.3.16
func (*Prunner) Init ¶
All stats that proxy currently has are CoreStats which are registered at startup
func (*Prunner) PromHandler ¶ added in v1.3.26
func (*Prunner) RegExtMetric ¶ added in v1.3.24
func (*Prunner) ResetStats ¶ added in v1.3.16
func (r *Prunner) ResetStats(errorsOnly bool)
TODO: reset prometheus as well (assuming, there's an API)
func (*Prunner) SetClrFlag ¶ added in v1.3.24
func (r *Prunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)
func (*Prunner) SetFlag ¶ added in v1.3.24
func (r *Prunner) SetFlag(name string, set cos.NodeStateFlags)
type Tracker ¶
type Tracker interface {
cos.StatsUpdater
StartedUp() bool
PromHandler() http.Handler
Inc(metric string)
IncWith(metric string, vlabs map[string]string)
IncBck(name string, bck *cmn.Bck)
GetStats() *Node
ResetStats(errorsOnly bool)
GetMetricNames() cos.StrKVs // (name, kind) pairs
// for aistore modules, to add their respective metrics
RegExtMetric(node *meta.Snode, name, kind string, extra *Extra)
}
type Trunner ¶
func NewTrunner ¶ added in v1.3.16
func (*Trunner) AddWith ¶ added in v1.3.26
func (r *Trunner) AddWith(nvs ...cos.NamedVal64)
(prometheus with variable labels)
func (*Trunner) ClrFlag ¶ added in v1.3.24
func (r *Trunner) ClrFlag(name string, clr cos.NodeStateFlags)
func (*Trunner) GetMetricNames ¶ added in v1.3.16
func (*Trunner) PromHandler ¶ added in v1.3.26
func (*Trunner) RegExtMetric ¶ added in v1.3.24
func (*Trunner) RegMetrics ¶
target-specific metrics, in addition to common and already added via regCommon()
func (*Trunner) ResetStats ¶ added in v1.3.16
func (r *Trunner) ResetStats(errorsOnly bool)
TODO: reset prometheus as well (assuming, there's an API)
func (*Trunner) SetClrFlag ¶ added in v1.3.24
func (r *Trunner) SetClrFlag(name string, set, clr cos.NodeStateFlags)
func (*Trunner) SetFlag ¶ added in v1.3.24
func (r *Trunner) SetFlag(name string, set cos.NodeStateFlags)