metrics

package

v0.6.0 Latest Latest Go to latest Published: Mar 3, 2026 License: Apache-2.0 Imports: 30 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/vllm-project/aibrix

Links

Documentation ¶

Index ¶

Constants
Variables
func BuildQuery(queryTemplate string, queryLabels map[string]string) string
func EmitMetricToPrometheus(routingCtx *types.RoutingContext, pod *v1.Pod, metricName string, ...)
func ExtractNumericFromPromResult(r *model.Value) (float64, error)
func GetEngineType(pod v1.Pod) string
func GetGaugeValueForTest(name string, labelValues ...string) float64
func GetLabelValueForKey(metric *dto.Metric, key string) (string, error)
func GetMetricHelp(metricName string) string
func HttpFailureStatusCode(ctx context.Context, err error, resp *http.Response) (string, string)
func IncrementCounterMetric(name string, help string, value float64, labelNames []string, ...)
func InitializePrometheusAPI(endpoint, username, password string) (prometheusv1.API, error)
func ParseMetricFromBody(body []byte, metricName string) (float64, error)
func ParseMetricsFromReader(reader io.Reader) (map[string]*dto.MetricFamily, error)
func ParseMetricsURLWithContext(ctx context.Context, url string) (map[string]*dto.MetricFamily, error)
func SetGaugeMetric(name string, help string, value float64, labelNames []string, ...)
func SetHistogramMetric(name string, help string, value *HistogramMetricValue, labelNames []string, ...)
func SetupCounterMetricsForTest(metricName string, labelNames []string) (*prometheus.CounterVec, func())
func SetupMetricsForTest(metricName string, labelNames []string) (*prometheus.GaugeVec, func())
type EngineMetricsFetcher
- func NewEngineMetricsFetcher() *EngineMetricsFetcher
- func NewEngineMetricsFetcherWithConfig(config EngineMetricsFetcherConfig) *EngineMetricsFetcher
- func (ef *EngineMetricsFetcher) FetchAllTypedMetrics(ctx context.Context, endpoint, engineType, identifier string, ...) (*EngineMetricsResult, error)
- func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint, engineType, identifier, metricName string) (MetricValue, error)
type EngineMetricsFetcherConfig
- func DefaultEngineMetricsFetcherConfig() EngineMetricsFetcherConfig
type EngineMetricsResult
type HistogramMetricValue
- func GetHistogramValue(metric *dto.Metric) (*HistogramMetricValue, error)
- func ParseHistogramFromBody(body []byte, metricName string) (*HistogramMetricValue, error)
- func (h *HistogramMetricValue) GetBucketValue(bucket string) (float64, bool)
- func (h *HistogramMetricValue) GetCount() float64
- func (h *HistogramMetricValue) GetHistogramValue() *HistogramMetricValue
- func (h *HistogramMetricValue) GetLabelValues() map[string]string
- func (h *HistogramMetricValue) GetMean() float64
- func (h *HistogramMetricValue) GetPercentile(percentile float64) (float64, error)
- func (h *HistogramMetricValue) GetPrometheusResult() *model.Value
- func (h *HistogramMetricValue) GetSimpleValue() float64
- func (h *HistogramMetricValue) GetSum() float64
- func (h *HistogramMetricValue) GetValue() interface{}
type LabelValueMetricValue
- func (l *LabelValueMetricValue) GetHistogramValue() *HistogramMetricValue
- func (l *LabelValueMetricValue) GetLabelValues() map[string]string
- func (l *LabelValueMetricValue) GetPrometheusResult() *model.Value
- func (l *LabelValueMetricValue) GetSimpleValue() float64
type Metric
type MetricScope
type MetricSource
type MetricSubscriber
type MetricType
- func (m MetricType) IsQuery() bool
- func (m MetricType) IsRawMetric() bool
type MetricValue
type PrometheusMetricValue
- func (p *PrometheusMetricValue) GetHistogramValue() *HistogramMetricValue
- func (s *PrometheusMetricValue) GetLabelValues() map[string]string
- func (p *PrometheusMetricValue) GetPrometheusResult() *model.Value
- func (p *PrometheusMetricValue) GetSimpleValue() float64
type QueryType
type RawMetricType
type Server
- func NewServer(addr string) *Server
- func (s *Server) Start() error
- func (s *Server) Stop() error
type SimpleMetricValue
- func GetCounterGaugeValue(metric *dto.Metric, metricType dto.MetricType) (*SimpleMetricValue, error)
- func (s *SimpleMetricValue) GetHistogramValue() *HistogramMetricValue
- func (s *SimpleMetricValue) GetLabelValues() map[string]string
- func (s *SimpleMetricValue) GetPrometheusResult() *model.Value
- func (s *SimpleMetricValue) GetSimpleValue() float64

Constants ¶

View Source

const (
	GatewayRequestTotal = "gateway_request_total"
	GatewayE2EDuration  = "gateway_e2e_duration_seconds"
	GatewayInFlight     = "gateway_in_flight_requests"

	// Count of streamed responses where first token delay > 1s
	GatewayFirstTokenDelayOver1sTotal = "gateway_first_token_delay_over_1s_total"

	// counter to track #success & #fail requests
	GatewayRequestSuccessTotal = "gateway_request_success_total"
	GatewayRequestFailTotal    = "gateway_request_fail_total"

	// counter to track #success & #fail requests for each model
	GatewayRequestModelSuccessTotal = "gateway_request_model_success_total"
	GatewayRequestModelFailTotal    = "gateway_request_model_fail_total"

	// counter to track #prompt & #completion tokenss
	GatewayPromptTokenBucketTotal     = "gateway_prompt_token_bucket_total"
	GatewayCompletionTokenBucketTotal = "gateway_completion_token_bucket_total"

	// counter to track #success & #fail prefill requests
	GatewayPrefillRequestSuccessTotal = "gateway_prefill_request_success_total"
	GatewayPrefillRequestFailTotal    = "gateway_prefill_request_fail_total"

	// gauge to track #outstanding prefill & decode requests
	GatewayPrefillOutstandingRequests = "gateway_prefill_outstanding_requests"
	GatewayDecodeOutstandingRequests  = "gateway_decode_outstanding_requests"

	// counter to track #prefill & #decode pods selected by pd
	PDSelectedPrefillPodTotal = "pd_selected_prefill_pod_total"
	PDSelectedDecodePodTotal  = "pd_selected_decode_pod_total"

	// Duration bucket counters for timing breakdowns
	GatewayRoutingTimeBucketTotal    = "gateway_routing_time_bucket_total"
	GatewayPrefillTimeBucketTotal    = "gateway_prefill_time_bucket_total"
	GatewayKVTransferTimeBucketTotal = "gateway_kv_transfer_time_bucket_total"
	GatewayTTFTBucketTotal           = "gateway_ttft_bucket_total"
	GatewayTPOTBucketTotal           = "gateway_tpot_bucket_total"
	GatewayDecodeTimeBucketTotal     = "gateway_decode_time_bucket_total"
	GatewayTotalTimeBucketTotal      = "gateway_total_time_bucket_total"
)

View Source

const (
	NumRequestsRunning          = "num_requests_running"
	NumRequestsWaiting          = "num_requests_waiting"
	EngineSleepState            = "engine_sleep_state"
	HTTPRequestTotal            = "http_requests_total"
	NumPreemptionsTotal         = "num_preemptions_total"
	RequestSuccessTotal         = "request_success_total"
	NumPrefillPreallocQueueReqs = "num_prefill_prealloc_queue_reqs"
	NumDecodePreallocQueueReqs  = "num_decode_prealloc_queue_reqs"

	E2ERequestLatencySeconds        = "e2e_request_latency_seconds"
	RequestQueueTimeSeconds         = "request_queue_time_seconds"
	RequestInferenceTimeSeconds     = "request_inference_time_seconds"
	PerStageReqLatencySeconds       = "per_stage_req_latency_seconds"
	HTTPRequestDurationSeconds      = "http_request_duration_seconds"
	HTTPRequestDurationHighRSeconds = "http_request_duration_highr_seconds"

	TimeToFirstTokenSeconds   = "time_to_first_token_seconds"
	RequestPrefillTimeSeconds = "request_prefill_time_seconds"
	PromptTokenTotal          = "prompt_tokens_total"
	RequestPromptTokens       = "request_prompt_tokens"

	// deprecated (time_per_output_token_seconds), use inter_token_latency_seconds instead
	TimePerOutputTokenSeconds        = "time_per_output_token_seconds"
	InterTokenLatencySeconds         = "inter_token_latency_seconds"
	RequestTimePerOutputTokenSeconds = "request_time_per_output_token_seconds"
	RequestDecodeTimeSeconds         = "request_decode_time_seconds"

	GenerationTokenTotal          = "generation_tokens_total"
	IterationTokensTotal          = "iteration_tokens_total"
	RequestGenerationTokens       = "request_generation_tokens"
	RequestMaxNumGenerationTokens = "request_max_num_generation_tokens"

	KVCacheUsagePerc                = "kv_cache_usage_perc"
	NixlNumFailedTransfers          = "nixl_num_failed_transfers_total"
	NixlNumFailedNotifications      = "nixl_num_failed_notifications_total"
	PrefixCacheHitTotal             = "prefix_cache_hits_total"
	PrefixCacheQueriesTotal         = "prefix_cache_queries_total"
	ExternalPrefixCacheHitsTotal    = "external_prefix_cache_hits_total"
	ExternalPrefixCacheQueriesTotal = "external_prefix_cache_queries_total"

	NixlXferTimeSeconds  = "nixl_xfer_time_seconds"
	NixlPostTimeSeconds  = "nixl_post_time_seconds"
	NixlBytesTransferred = "nixl_bytes_transferred"
	NixlNumDescriptors   = "nixl_num_descriptors"

	DrainRate1m = "drain_rate_1m"

	P95TTFT5m                            = "p95_ttft_5m"
	P95TTFT5mPod                         = "p95_ttft_5m_pod"
	AvgTTFT5mPod                         = "avg_ttft_5m_pod"
	P95TPOT5mPod                         = "p95_tpot_5m_pod"
	AvgTPOT5mPod                         = "avg_tpot_pod_5m"
	AvgPromptToksPerReq                  = "avg_prompt_toks_per_req"
	AvgGenerationToksPerReq              = "avg_generation_toks_per_req"
	GPUCacheUsagePerc                    = "gpu_cache_usage_perc"
	GPUBusyTimeRatio                     = "gpu_busy_time_ratio"
	CPUCacheUsagePerc                    = "cpu_cache_usage_perc"
	EngineUtilization                    = "engine_utilization"
	AvgE2ELatencyPod                     = "avg_e2e_latency_pod"
	AvgRequestsPerMinPod                 = "avg_requests_per_min_pod"
	AvgPromptThroughputToksPerMinPod     = "avg_prompt_throughput_toks_per_min_pod"
	AvgGenerationThroughputToksPerMinPod = "avg_generation_throughput_toks_per_min_pod"
	MaxLora                              = "max_lora"
	WaitingLoraAdapters                  = "waiting_lora_adapters"
	RunningLoraAdapters                  = "running_lora_adapters"
	VTCBucketSizeActive                  = "vtc_bucket_size_active"
	// Realtime metrics
	RealtimeNumRequestsRunning = "realtime_num_requests_running"
	RealtimeNormalizedPendings = "realtime_normalized_pendings"

	// error to read metrics from backend
	PrometheusQueryFail       = "prometheus_query_fail"
	LLMEngineMetricsQueryFail = "llm_engine_metrics_query_fail"

	// Deprecated metrics
	NumRequestsSwapped              = "num_requests_swapped"
	AvgPromptThroughputToksPerS     = "avg_prompt_throughput_toks_per_s"
	AvgGenerationThroughputToksPerS = "avg_generation_throughput_toks_per_s"
)

Variables ¶

View Source

var (

	// Function variables that can be overridden for testing
	SetGaugeMetricFnForTest         = defaultSetGaugeMetric
	IncrementCounterMetricFnForTest = defaultIncrementCounterMetric
)

View Source

var (
	GatewayMetrics = map[string]Metric{
		GatewayRequestTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of requests received by the gateway",
		},

		GatewayRequestModelSuccessTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of successful requests received by the gateway for each model",
		},
		GatewayRequestModelFailTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of failed requests received by the gateway for each model",
		},

		GatewayPrefillRequestSuccessTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of successful prefill requests received by the gateway",
		},
		GatewayPrefillRequestFailTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of failed prefill requests received by the gateway",
		},

		GatewayPrefillOutstandingRequests: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			Description: "Total number of outstanding prefill requests received by the gateway",
		},
		GatewayDecodeOutstandingRequests: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			Description: "Total number of outstanding decode requests received by the gateway",
		},

		GatewayE2EDuration: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			Description: "End-to-end latency distribution of requests received by the gateway",
		},

		GatewayInFlight: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			Description: "Current number of requests in flight (i.e., being processed) by the gateway",
		},

		GatewayFirstTokenDelayOver1sTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Count of streamed responses where first token delay > 1s",
		},

		GatewayPromptTokenBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Requests counted by prompt token bucket",
		},

		GatewayCompletionTokenBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Requests counted by completion token bucket",
		},
		PDSelectedPrefillPodTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total selections of prefill pods by the PD router",
		},
		PDSelectedDecodePodTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total selections of decode pods by the PD router",
		},

		GatewayRoutingTimeBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by routing time bucket",
		},
		GatewayPrefillTimeBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by prefill time bucket",
		},
		GatewayKVTransferTimeBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by KV transfer time bucket",
		},
		GatewayTTFTBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by TTFT bucket",
		},
		GatewayTPOTBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by TPOT bucket",
		},
		GatewayDecodeTimeBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by decode time bucket",
		},
		GatewayTotalTimeBucketTotal: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType:   MetricType{Raw: Counter},
			Description:  "Requests counted by total time bucket",
		},
	}
)

View Source

var (
	// Metrics defines all available metrics, including raw and query-based metrics.
	Metrics = map[string]Metric{
		NumRequestsRunning: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_running",
				"sglang": "sglang:num_running_reqs",
			},
			Description: "Number of running requests",
		},
		NumRequestsWaiting: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_waiting",
				"sglang": "sglang:num_queue_reqs",
			},
			Description: "Number of waiting requests",
		},
		NumRequestsSwapped: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_swapped",
				"sglang": "sglang:num_retracted_reqs",
			},
			Description: "Number of swapped requests",
		},
		EngineSleepState: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:engine_sleep_state",
			},
			Description: "Engine sleep state; awake = 0 means engine is sleeping; awake = 1 means engine is awake; weights_offloaded = 1 means sleep level 1; discard_all = 1 means sleep level 2.",
		},
		HTTPRequestTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:http_requests_total",
			},
			Description: "Total number of requests by method, status and handler.",
		},
		NumPreemptionsTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:num_preemptions_total",
			},
			Description: "Number of preemptions",
		},
		RequestSuccessTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_success_total",
				"sglang": "sglang:num_requests_total",
			},
			Description: "Number of successful requests",
		},
		NumPrefillPreallocQueueReqs: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"sglang": "sglang:num_prefill_prealloc_queue_reqs",
			},
			Description: "Number of prefill preallocation queue requests",
		},
		NumDecodePreallocQueueReqs: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"sglang": "sglang:num_decode_prealloc_queue_reqs",
			},
			Description: "Number of decode preallocation queue requests",
		},

		E2ERequestLatencySeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:e2e_request_latency_seconds",
				"sglang": "sglang:e2e_request_latency_seconds",
			},
			Description: "End-to-end request latency in seconds",
		},
		RequestQueueTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_queue_time_seconds",
			},
			Description: "Request queue time in seconds",
		},
		RequestInferenceTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_inference_time_seconds",
			},
			Description: "Request inference time in seconds",
		},
		PerStageReqLatencySeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"sglang": "sglang:per_stage_req_latency_seconds",
			},
			Description: "Per-stage request latency in seconds",
		},
		HTTPRequestDurationSeconds: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "http_request_duration_seconds",
			},
			Description: "Histogram of request duration in seconds",
		},
		HTTPRequestDurationHighRSeconds: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "http_request_duration_highr_seconds",
			},
			Description: "Histogram of request duration in seconds for high priority requests",
		},
		PromptTokenTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:prompt_tokens_total",
			},
			Description: "Total prompt tokens",
		},
		RequestPromptTokens: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_prompt_tokens",
			},
			Description: "Histogram of prompt tokens",
		},
		GenerationTokenTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:generation_tokens_total",
			},
			Description: "Total generation tokens",
		},
		RequestGenerationTokens: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_generation_tokens",
			},
			Description: "Histogram of generation tokens",
		},
		RequestMaxNumGenerationTokens: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_max_num_generation_tokens",
			},
			Description: "Histogram of max number of generation tokens",
		},
		IterationTokensTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:iteration_tokens_total",
			},
			Description: "Total iteration tokens",
		},
		TimeToFirstTokenSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:time_to_first_token_seconds",
				"sglang": "sglang:time_to_first_token_seconds",
			},
			Description: "Time to first token in seconds",
		},
		TimePerOutputTokenSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:time_per_output_token_seconds",
				"sglang": "sglang:inter_token_latency_seconds",
			},
			Description: "Time per output token in seconds",
		},
		InterTokenLatencySeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:inter_token_latency_seconds",
				"sglang": "sglang:inter_token_latency_seconds",
			},
			Description: "Inter-token latency in seconds",
		},
		RequestDecodeTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_decode_time_seconds",
			},
			Description: "Request decode time in seconds",
		},
		RequestPrefillTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_prefill_time_seconds",
			},
			Description: "Request prefill time in seconds",
		},
		RequestTimePerOutputTokenSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_time_per_output_token_seconds",
			},
			Description: "Time per output token in seconds",
		},
		GPUCacheUsagePerc: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:gpu_cache_usage_perc",
				"sglang": "sglang:token_usage",
				"xllm":   "kv_cache_utilization",
			},
			Description: "GPU cache usage percentage",
		},
		EngineUtilization: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"xllm": "engine_utilization",
			},
			Description: "GPU busy time ratio",
		},
		CPUCacheUsagePerc: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:cpu_cache_usage_perc",
			},
			Description: "CPU cache usage percentage",
		},
		KVCacheUsagePerc: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:kv_cache_usage_perc",
				"sglang": "sglang:token_usage",
				"xllm":   "kv_cache_utilization",
			},
			Description: "KV-cache usage. 1 means 100 percent usage.",
		},
		PrefixCacheQueriesTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:prefix_cache_queries_total",
			},
			Description: "Prefix cache queries, in terms of number of queried tokens..",
		},
		PrefixCacheHitTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:prefix_cache_hits_total",
			},
			Description: "Prefix cache hits, in terms of number of cached tokens.",
		},
		ExternalPrefixCacheQueriesTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:external_prefix_cache_queries_total",
			},
			Description: "External prefix cache queries from KV connector cross-instance cache sharing, in terms of number of queried tokens.",
		},
		ExternalPrefixCacheHitsTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:external_prefix_cache_hits_total",
			},
			Description: "External prefix cache hits from KV connector cross-instance cache sharing, in terms of number of cached tokens.",
		},

		NixlXferTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_xfer_time_seconds",
			},
			Description: "transfer duration for NIXL KV Cache transfers",
		},
		NixlPostTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_post_time_seconds",
			},
			Description: "transfer post time for NIXL KV Cache transfers",
		},
		NixlBytesTransferred: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_bytes_transferred",
			},
			Description: "number of bytes transferred per NIXL KV Cache transfer",
		},
		NixlNumDescriptors: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_num_descriptors",
			},
			Description: "number of descriptors per NIXL  KV Cache transfers",
		},
		NixlNumFailedTransfers: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_num_failed_transfers",
			},
			Description: "number of failed NIXL KV Cache transfers",
		},
		NixlNumFailedNotifications: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:nixl_num_failed_notifications",
			},
			Description: "number of failed NIXL KV Cache notifications",
		},

		DrainRate1m: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},

			PromQL: `
			clamp_min(
				rate(
					sglang:num_requests_total{
						instance="${instance}",
						model_name="${model_name}",
						job="pods"
					}[1m]
				),
				0.01
			)`,
			Description: "1-minute average rate of finished requests (Drains), clamped to avoid zero",
		},

		P95TTFT5m: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{instance="${instance}", model_name="${model_name}", job="pods"}[5m])))`,
			Description: "95th ttft in last 5 mins",
		},
		P95TTFT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{instance="${instance}", job="pods"}[5m])))`,
			Description: "95th ttft in last 5 mins",
		},
		AvgTTFT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:time_to_first_token_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:time_to_first_token_seconds_count{instance="${instance}", job="pods"}[5m])`,
			Description: "Average ttft in last 5 mins",
		},
		P95TPOT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{instance="${instance}", job="pods"}[5m])))`,
			Description: "95th tpot in last 5 mins",
		},
		AvgTPOT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:time_per_output_token_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:time_per_output_token_seconds_sum{instance="${instance}", job="pods"}[5m])`,
			Description: "Average tpot in last 5 mins",
		},
		AvgPromptToksPerReq: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_prompt_tokens_sum{instance="${instance}", model_name="${model_name}", job="pods"}[1d]) / increase(vllm:request_prompt_tokens_count{instance="${instance}", model_name="${model_name}", job="pods"}[1d])`,
			Description: "Average prompt tokens per request in last day",
		},
		AvgGenerationToksPerReq: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_generation_tokens_sum{instance="${instance}", model_name="${model_name}", job="pods"}[1d]) / increase(vllm:request_generation_tokens_count{instance="${instance}", model_name="${model_name}", job="pods"}[1d])`,
			Description: "Average generation tokens per request in last day",
		},
		AvgE2ELatencyPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:e2e_request_latency_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:e2e_request_latency_seconds_count{instance="${instance}", job="pods"}[5m])`,
			Description: "Average End-to-end latency in last 5 mins",
		},
		AvgRequestsPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_success_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average requests throughput per minute in last 5 mins",
		},
		AvgPromptThroughputToksPerS: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:avg_prompt_throughput_toks_per_s",
			},
			Description: "Average prompt throughput in tokens per second",
		},
		AvgGenerationThroughputToksPerS: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:avg_generation_throughput_toks_per_s",
				"sglang": "sglang:gen_throughput",
			},
			Description: "Average generation throughput in tokens per second",
		},
		AvgPromptThroughputToksPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:prompt_tokens_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average prompt throughput in tokens per minute in last 5 mins",
		},
		AvgGenerationThroughputToksPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:generation_tokens_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average generation throughput in tokens per minute in last 5 mins",
		},
		MaxLora: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "max_lora",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Max count of Lora Adapters",
		},
		RunningLoraAdapters: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "running_lora_adapters",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Count of running Lora Adapters",
		},
		WaitingLoraAdapters: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "waiting_lora_adapters",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Count of waiting Lora Adapters",
		},
		VTCBucketSizeActive: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			Description: "Current adaptive bucket size used by VTC algorithm for token normalization",
		},
		PrometheusQueryFail: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of Prometheus query failures",
		},
		LLMEngineMetricsQueryFail: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			Description: "Total number of LLM engine metrics query failures",
		},
	}
)

Functions ¶

func BuildQuery ¶

func BuildQuery(queryTemplate string, queryLabels map[string]string) string

BuildQuery dynamically injects labels into a PromQL query template.

func EmitMetricToPrometheus ¶ added in v0.6.0

func EmitMetricToPrometheus(routingCtx *types.RoutingContext, pod *v1.Pod, metricName string, metricValue MetricValue, extra map[string]string)

func ExtractNumericFromPromResult ¶ added in v0.6.0

func ExtractNumericFromPromResult(r *model.Value) (float64, error)

func GetEngineType ¶ added in v0.5.0

func GetEngineType(pod v1.Pod) string

GetEngineType extracts the engine type from pod labels, defaults to "vllm" for backward compatibility This function is centralized to avoid duplication across packages

func GetGaugeValueForTest ¶

func GetGaugeValueForTest(name string, labelValues ...string) float64

func GetLabelValueForKey ¶

func GetLabelValueForKey(metric *dto.Metric, key string) (string, error)

func GetMetricHelp ¶

func GetMetricHelp(metricName string) string

func HttpFailureStatusCode ¶ added in v0.6.0

func HttpFailureStatusCode(ctx context.Context, err error, resp *http.Response) (string, string)

func IncrementCounterMetric ¶

func IncrementCounterMetric(name string, help string, value float64, labelNames []string, labelValues ...string)

func InitializePrometheusAPI ¶

func InitializePrometheusAPI(endpoint, username, password string) (prometheusv1.API, error)

InitializePrometheusAPI initializes the Prometheus API client.

func ParseMetricFromBody ¶

func ParseMetricFromBody(body []byte, metricName string) (float64, error)

ParseMetricFromBody parses a simple metric from the Prometheus response body.

func ParseMetricsFromReader ¶ added in v0.5.0

func ParseMetricsFromReader(reader io.Reader) (map[string]*dto.MetricFamily, error)

ParseMetricsFromReader parses Prometheus metrics from an io.Reader (extracted for reuse)

func ParseMetricsURLWithContext ¶ added in v0.4.0

func ParseMetricsURLWithContext(ctx context.Context, url string) (map[string]*dto.MetricFamily, error)

func SetGaugeMetric ¶

func SetGaugeMetric(name string, help string, value float64, labelNames []string, labelValues ...string)

func SetHistogramMetric ¶ added in v0.6.0

func SetHistogramMetric(name string, help string, value *HistogramMetricValue, labelNames []string, labelValues ...string)

func SetupCounterMetricsForTest ¶

func SetupCounterMetricsForTest(metricName string, labelNames []string) (*prometheus.CounterVec, func())

func SetupMetricsForTest ¶

func SetupMetricsForTest(metricName string, labelNames []string) (*prometheus.GaugeVec, func())

Types ¶

type EngineMetricsFetcher ¶ added in v0.5.0

type EngineMetricsFetcher struct {
	// contains filtered or unexported fields
}

EngineMetricsFetcher provides a unified interface for fetching typed metrics from inference engine pods It leverages the centralized metrics registry and type system in pkg/metrics

func NewEngineMetricsFetcher ¶ added in v0.5.0

func NewEngineMetricsFetcher() *EngineMetricsFetcher

NewEngineMetricsFetcher creates a new engine metrics fetcher with default configuration

func NewEngineMetricsFetcherWithConfig ¶ added in v0.5.0

func NewEngineMetricsFetcherWithConfig(config EngineMetricsFetcherConfig) *EngineMetricsFetcher

NewEngineMetricsFetcherWithConfig creates a new engine metrics fetcher with custom configuration

func (*EngineMetricsFetcher) FetchAllTypedMetrics ¶ added in v0.5.0

func (ef *EngineMetricsFetcher) FetchAllTypedMetrics(ctx context.Context, endpoint, engineType, identifier string, requestedMetrics []string) (*EngineMetricsResult, error)

FetchAllTypedMetrics fetches all available typed metrics from an engine endpoint

func (*EngineMetricsFetcher) FetchTypedMetric ¶ added in v0.5.0

func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint, engineType, identifier, metricName string) (MetricValue, error)

FetchTypedMetric fetches a single typed metric from an engine endpoint Note: if the client needs to fetch multiple metrics, it's better to use FetchAllTypedMetrics

type EngineMetricsFetcherConfig ¶ added in v0.5.0

type EngineMetricsFetcherConfig struct {
	Timeout     time.Duration
	MaxRetries  int
	BaseDelay   time.Duration
	MaxDelay    time.Duration
	InsecureTLS bool
}

EngineMetricsFetcherConfig holds configuration for engine metrics fetching

func DefaultEngineMetricsFetcherConfig ¶ added in v0.5.0

func DefaultEngineMetricsFetcherConfig() EngineMetricsFetcherConfig

DefaultEngineMetricsFetcherConfig returns sensible defaults for engine metrics fetching

type EngineMetricsResult ¶ added in v0.5.0

type EngineMetricsResult struct {
	Identifier   string // Caller-provided identifier (e.g., pod name)
	Endpoint     string // The endpoint that was queried
	EngineType   string
	Metrics      map[string]MetricValue // Pod-scoped metrics
	ModelMetrics map[string]MetricValue // Pod+Model-scoped metrics (key format: "model/metric")
	Errors       []error                // Any errors encountered during fetching
}

EngineMetricsResult contains the result of fetching metrics from an engine endpoint

type HistogramMetricValue ¶

type HistogramMetricValue struct {
	Sum     float64
	Count   float64
	Buckets map[string]float64 // e.g., {"0.1": 5, "0.5": 3, "1.0": 2}
	Labels  map[string]string  // Optional: Additional labels for the histogram.
}

HistogramMetricValue represents a detailed histogram metric.

func GetHistogramValue ¶

func GetHistogramValue(metric *dto.Metric) (*HistogramMetricValue, error)

func ParseHistogramFromBody ¶

func ParseHistogramFromBody(body []byte, metricName string) (*HistogramMetricValue, error)

ParseHistogramFromBody parses a histogram metric from the Prometheus response body.

func (*HistogramMetricValue) GetBucketValue ¶

func (h *HistogramMetricValue) GetBucketValue(bucket string) (float64, bool)

GetBucketValue returns the count for a specific bucket.

func (*HistogramMetricValue) GetCount ¶

func (h *HistogramMetricValue) GetCount() float64

GetCount returns the total count of values in the histogram.

func (*HistogramMetricValue) GetHistogramValue ¶

func (h *HistogramMetricValue) GetHistogramValue() *HistogramMetricValue

func (*HistogramMetricValue) GetLabelValues ¶ added in v0.6.0

func (h *HistogramMetricValue) GetLabelValues() map[string]string

func (*HistogramMetricValue) GetMean ¶

func (h *HistogramMetricValue) GetMean() float64

GetMean returns the mean value of the histogram (Sum / Count).

func (*HistogramMetricValue) GetPercentile ¶

func (h *HistogramMetricValue) GetPercentile(percentile float64) (float64, error)

func (*HistogramMetricValue) GetPrometheusResult ¶

func (h *HistogramMetricValue) GetPrometheusResult() *model.Value

func (*HistogramMetricValue) GetSimpleValue ¶

func (h *HistogramMetricValue) GetSimpleValue() float64

func (*HistogramMetricValue) GetSum ¶

func (h *HistogramMetricValue) GetSum() float64

GetSum returns the sum of the histogram values.

func (*HistogramMetricValue) GetValue ¶

func (h *HistogramMetricValue) GetValue() interface{}

type LabelValueMetricValue ¶

type LabelValueMetricValue struct {
	Value string
}

PrometheusMetricValue represents Prometheus query results.

func (*LabelValueMetricValue) GetHistogramValue ¶

func (l *LabelValueMetricValue) GetHistogramValue() *HistogramMetricValue

func (*LabelValueMetricValue) GetLabelValues ¶ added in v0.6.0

func (l *LabelValueMetricValue) GetLabelValues() map[string]string

func (*LabelValueMetricValue) GetPrometheusResult ¶

func (l *LabelValueMetricValue) GetPrometheusResult() *model.Value

func (*LabelValueMetricValue) GetSimpleValue ¶

func (l *LabelValueMetricValue) GetSimpleValue() float64

type Metric ¶

type Metric struct {
	MetricSource             MetricSource
	MetricType               MetricType
	PromQL                   string            // Optional: Only applicable for PromQL-based metrics
	LabelKey                 string            // Optional: Only applicable for QueryLabel-based metrics
	EngineMetricsNameMapping map[string]string // Optional: Mapping from engine type to raw metric name.
	Description              string
	MetricScope              MetricScope
}

Metric defines a unique metric with metadata.

type MetricScope ¶

type MetricScope string

MetricScope defines the scope of a metric (e.g., model or pod or podmodel).

const (
	ModelMetricScope    MetricScope = "Model"
	PodMetricScope      MetricScope = "Pod"
	PodModelMetricScope MetricScope = "PodModel" // model in pod
)

type MetricSource ¶

type MetricSource string

MetricSource defines the metric source

const (
	// PrometheusEndpoint indicates metrics are queried from a remote Prometheus server.
	// This source allows querying both raw and aggregated metrics, leveraging PromQL for advanced analytics.
	PrometheusEndpoint MetricSource = "PrometheusEndpoint"
	// PodRawMetrics indicates metrics are collected directly from the metricPort of a Pod.
	PodRawMetrics MetricSource = "PodRawMetrics"
)

type MetricSubscriber ¶

type MetricSubscriber interface {
	SubscribedMetrics() []string
}

type MetricType ¶

type MetricType struct {
	Raw   RawMetricType // Optional: Represents the type of raw metric.
	Query QueryType     // Optional: Represents the query type for derived metrics.
}

MetricType defines the type of a metric, including raw metrics and queries.

func (MetricType) IsQuery ¶

func (m MetricType) IsQuery() bool

func (MetricType) IsRawMetric ¶

func (m MetricType) IsRawMetric() bool

type MetricValue ¶

type MetricValue interface {
	GetSimpleValue() float64
	GetHistogramValue() *HistogramMetricValue
	GetPrometheusResult() *model.Value
	GetLabelValues() map[string]string
}

MetricValue is the interface for all metric values.

type PrometheusMetricValue ¶

type PrometheusMetricValue struct {
	Result *model.Value
}

PrometheusMetricValue represents Prometheus query results.

func (*PrometheusMetricValue) GetHistogramValue ¶

func (p *PrometheusMetricValue) GetHistogramValue() *HistogramMetricValue

func (*PrometheusMetricValue) GetLabelValues ¶ added in v0.6.0

func (s *PrometheusMetricValue) GetLabelValues() map[string]string

func (*PrometheusMetricValue) GetPrometheusResult ¶

func (p *PrometheusMetricValue) GetPrometheusResult() *model.Value

func (*PrometheusMetricValue) GetSimpleValue ¶

func (p *PrometheusMetricValue) GetSimpleValue() float64

type QueryType ¶

type QueryType string

QueryType defines the type of metric query, such as PromQL.

const (
	PromQL     QueryType = "PromQL"     // PromQL represents a Prometheus query language expression.
	QueryLabel QueryType = "QueryLabel" // Query Label value from raw metrics.
)

type RawMetricType ¶

type RawMetricType string

RawMetricType defines the type of raw metrics (e.g., collected directly from a source).

const (
	Gauge     RawMetricType = "Gauge"     // Gauge represents a snapshot value.
	Counter   RawMetricType = "Counter"   // Counter represents a cumulative value.
	Histogram RawMetricType = "Histogram" // Histogram represents a distribution of values.
)

type Server ¶ added in v0.4.0

type Server struct {
	// contains filtered or unexported fields
}

func NewServer ¶ added in v0.4.0

func NewServer(addr string) *Server

func (*Server) Start ¶ added in v0.4.0

func (s *Server) Start() error

func (*Server) Stop ¶ added in v0.4.0

func (s *Server) Stop() error

type SimpleMetricValue ¶

type SimpleMetricValue struct {
	Value  float64
	Labels map[string]string // Optional: Additional labels for the metric.
}

SimpleMetricValue represents simple metrics (e.g., gauge or counter).

func GetCounterGaugeValue ¶

func GetCounterGaugeValue(metric *dto.Metric, metricType dto.MetricType) (*SimpleMetricValue, error)

func (*SimpleMetricValue) GetHistogramValue ¶

func (s *SimpleMetricValue) GetHistogramValue() *HistogramMetricValue

func (*SimpleMetricValue) GetLabelValues ¶ added in v0.6.0

func (s *SimpleMetricValue) GetLabelValues() map[string]string

func (*SimpleMetricValue) GetPrometheusResult ¶

func (s *SimpleMetricValue) GetPrometheusResult() *model.Value

func (*SimpleMetricValue) GetSimpleValue ¶

func (s *SimpleMetricValue) GetSimpleValue() float64

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL