metrics

package

v0.5.0 Latest Latest Go to latest Published: Nov 9, 2025 License: Apache-2.0 Imports: 24 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/vllm-project/aibrix

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func BuildQuery(queryTemplate string, queryLabels map[string]string) string
func GetCounterGaugeValue(metric *dto.Metric, metricType dto.MetricType) (float64, error)
func GetEngineType(pod v1.Pod) string
func GetGaugeValueForTest(name string, labelValues ...string) float64
func GetLabelValueForKey(metric *dto.Metric, key string) (string, error)
func GetMetricHelp(metricName string) string
func IncrementCounterMetric(name string, help string, value float64, labelNames []string, ...)
func InitializePrometheusAPI(endpoint, username, password string) (prometheusv1.API, error)
func ParseMetricFromBody(body []byte, metricName string) (float64, error)
func ParseMetricsFromReader(reader io.Reader) (map[string]*dto.MetricFamily, error)
func ParseMetricsURLWithContext(ctx context.Context, url string) (map[string]*dto.MetricFamily, error)
func SetGaugeMetric(name string, help string, value float64, labelNames []string, ...)
func SetupCounterMetricsForTest(metricName string, labelNames []string) (*prometheus.CounterVec, func())
func SetupMetricsForTest(metricName string, labelNames []string) (*prometheus.GaugeVec, func())
type EngineMetricsFetcher
- func NewEngineMetricsFetcher() *EngineMetricsFetcher
- func NewEngineMetricsFetcherWithConfig(config EngineMetricsFetcherConfig) *EngineMetricsFetcher
- func (ef *EngineMetricsFetcher) FetchAllTypedMetrics(ctx context.Context, endpoint, engineType, identifier string, ...) (*EngineMetricsResult, error)
- func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint, engineType, identifier, metricName string) (MetricValue, error)
type EngineMetricsFetcherConfig
- func DefaultEngineMetricsFetcherConfig() EngineMetricsFetcherConfig
type EngineMetricsResult
type HistogramMetricValue
- func GetHistogramValue(metric *dto.Metric) (*HistogramMetricValue, error)
- func ParseHistogramFromBody(body []byte, metricName string) (*HistogramMetricValue, error)
- func (h *HistogramMetricValue) GetBucketValue(bucket string) (float64, bool)
- func (h *HistogramMetricValue) GetCount() float64
- func (h *HistogramMetricValue) GetHistogramValue() *HistogramMetricValue
- func (s *HistogramMetricValue) GetLabelValue() string
- func (h *HistogramMetricValue) GetMean() float64
- func (h *HistogramMetricValue) GetPercentile(percentile float64) (float64, error)
- func (h *HistogramMetricValue) GetPrometheusResult() *model.Value
- func (h *HistogramMetricValue) GetSimpleValue() float64
- func (h *HistogramMetricValue) GetSum() float64
- func (h *HistogramMetricValue) GetValue() interface{}
type LabelValueMetricValue
- func (l *LabelValueMetricValue) GetHistogramValue() *HistogramMetricValue
- func (l *LabelValueMetricValue) GetLabelValue() string
- func (l *LabelValueMetricValue) GetPrometheusResult() *model.Value
- func (l *LabelValueMetricValue) GetSimpleValue() float64
type Metric
type MetricScope
type MetricSource
type MetricSubscriber
type MetricType
- func (m MetricType) IsQuery() bool
- func (m MetricType) IsRawMetric() bool
type MetricValue
type PrometheusMetricValue
- func (p *PrometheusMetricValue) GetHistogramValue() *HistogramMetricValue
- func (s *PrometheusMetricValue) GetLabelValue() string
- func (p *PrometheusMetricValue) GetPrometheusResult() *model.Value
- func (p *PrometheusMetricValue) GetSimpleValue() float64
type QueryType
type RawMetricType
type Server
- func NewServer(addr string) *Server
- func (s *Server) Start() error
- func (s *Server) Stop() error
type SimpleMetricValue
- func (s *SimpleMetricValue) GetHistogramValue() *HistogramMetricValue
- func (s *SimpleMetricValue) GetLabelValue() string
- func (s *SimpleMetricValue) GetPrometheusResult() *model.Value
- func (s *SimpleMetricValue) GetSimpleValue() float64

Constants ¶

View Source

const (
	NumRequestsRunning                   = "num_requests_running"
	NumRequestsWaiting                   = "num_requests_waiting"
	NumRequestsSwapped                   = "num_requests_swapped"
	PromptTokenTotal                     = "prompt_token_total"
	GenerationTokenTotal                 = "generation_token_total"
	AvgPromptThroughputToksPerS          = "avg_prompt_throughput_toks_per_s"
	AvgGenerationThroughputToksPerS      = "avg_generation_throughput_toks_per_s"
	IterationTokensTotal                 = "iteration_tokens_total"
	TimeToFirstTokenSeconds              = "time_to_first_token_seconds"
	TimePerOutputTokenSeconds            = "time_per_output_token_seconds"
	E2ERequestLatencySeconds             = "e2e_request_latency_seconds"
	RequestQueueTimeSeconds              = "request_queue_time_seconds"
	RequestInferenceTimeSeconds          = "request_inference_time_seconds"
	RequestDecodeTimeSeconds             = "request_decode_time_seconds"
	RequestPrefillTimeSeconds            = "request_prefill_time_seconds"
	P95TTFT5m                            = "p95_ttft_5m"
	P95TTFT5mPod                         = "p95_ttft_5m_pod"
	AvgTTFT5mPod                         = "avg_ttft_5m_pod"
	P95TPOT5mPod                         = "p95_tpot_5m_pod"
	AvgTPOT5mPod                         = "avg_tpot_pod_5m"
	AvgPromptToksPerReq                  = "avg_prompt_toks_per_req"
	AvgGenerationToksPerReq              = "avg_generation_toks_per_req"
	GPUCacheUsagePerc                    = "gpu_cache_usage_perc"
	GPUBusyTimeRatio                     = "gpu_busy_time_ratio"
	CPUCacheUsagePerc                    = "cpu_cache_usage_perc"
	EngineUtilization                    = "engine_utilization"
	AvgE2ELatencyPod                     = "avg_e2e_latency_pod"
	AvgRequestsPerMinPod                 = "avg_requests_per_min_pod"
	AvgPromptThroughputToksPerMinPod     = "avg_prompt_throughput_toks_per_min_pod"
	AvgGenerationThroughputToksPerMinPod = "avg_generation_throughput_toks_per_min_pod"
	MaxLora                              = "max_lora"
	WaitingLoraAdapters                  = "waiting_lora_adapters"
	RunningLoraAdapters                  = "running_lora_adapters"
	VTCBucketSizeActive                  = "vtc_bucket_size_active"
	// Realtime metrics
	RealtimeNumRequestsRunning = "realtime_num_requests_running"
	RealtimeNormalizedPendings = "realtime_normalized_pendings"
)

Variables ¶

View Source

var (

	// Function variables that can be overridden for testing
	SetGaugeMetricFnForTest         = defaultSetGaugeMetric
	IncrementCounterMetricFnForTest = defaultIncrementCounterMetric
)

View Source

var (
	// Metrics defines all available metrics, including raw and query-based metrics.
	Metrics = map[string]Metric{

		NumRequestsRunning: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_running",
				"sglang": "sglang:num_running_reqs",
			},
			Description: "Number of running requests",
		},
		NumRequestsWaiting: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:num_requests_waiting",
				"sglang": "sglang:num_waiting_reqs",
			},
			Description: "Number of waiting requests",
		},
		NumRequestsSwapped: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:num_requests_swapped",
			},
			Description: "Number of swapped requests",
		},

		PromptTokenTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:prompt_tokens_total",
			},
			Description: "Total prompt tokens",
		},
		GenerationTokenTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:generation_tokens_total",
			},
			Description: "Total generation tokens",
		},
		AvgPromptThroughputToksPerS: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:avg_prompt_throughput_toks_per_s",
			},
			Description: "Average prompt throughput in tokens per second",
		},
		AvgGenerationThroughputToksPerS: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:avg_generation_throughput_toks_per_s",
				"sglang": "sglang:gen_throughput",
			},
			Description: "Average generation throughput in tokens per second",
		},

		IterationTokensTotal: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:iteration_tokens_total",
			},
			Description: "Total iteration tokens",
		},
		TimeToFirstTokenSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:time_to_first_token_seconds",
				"sglang": "sglang:time_to_first_token_seconds",
			},
			Description: "Time to first token in seconds",
		},
		TimePerOutputTokenSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:time_per_output_token_seconds",
				"sglang": "sglang:inter_token_latency_seconds",
			},
			Description: "Time per output token in seconds",
		},
		E2ERequestLatencySeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:e2e_request_latency_seconds",
				"sglang": "sglang:e2e_request_latency_seconds",
			},
			Description: "End-to-end request latency in seconds",
		},
		RequestQueueTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_queue_time_seconds",
			},
			Description: "Request queue time in seconds",
		},
		RequestInferenceTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_inference_time_seconds",
			},
			Description: "Request inference time in seconds",
		},
		RequestDecodeTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_decode_time_seconds",
			},
			Description: "Request decode time in seconds",
		},
		RequestPrefillTimeSeconds: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Histogram,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:request_prefill_time_seconds",
			},
			Description: "Request prefill time in seconds",
		},

		P95TTFT5m: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{instance="${instance}", model_name="${model_name}", job="pods"}[5m])))`,
			Description: "95th ttft in last 5 mins",
		},
		P95TTFT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{instance="${instance}", job="pods"}[5m])))`,
			Description: "95th ttft in last 5 mins",
		},
		AvgTTFT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:time_to_first_token_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:time_to_first_token_seconds_count{instance="${instance}", job="pods"}[5m])`,
			Description: "Average ttft in last 5 mins",
		},
		P95TPOT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{instance="${instance}", job="pods"}[5m])))`,
			Description: "95th tpot in last 5 mins",
		},
		AvgTPOT5mPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:time_per_output_token_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:time_per_output_token_seconds_sum{instance="${instance}", job="pods"}[5m])`,
			Description: "Average tpot in last 5 mins",
		},
		AvgPromptToksPerReq: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_prompt_tokens_sum{instance="${instance}", model_name="${model_name}", job="pods"}[1d]) / increase(vllm:request_prompt_tokens_count{instance="${instance}", model_name="${model_name}", job="pods"}[1d])`,
			Description: "Average prompt tokens per request in last day",
		},
		AvgGenerationToksPerReq: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_generation_tokens_sum{instance="${instance}", model_name="${model_name}", job="pods"}[1d]) / increase(vllm:request_generation_tokens_count{instance="${instance}", model_name="${model_name}", job="pods"}[1d])`,
			Description: "Average generation tokens per request in last day",
		},
		GPUCacheUsagePerc: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm":   "vllm:gpu_cache_usage_perc",
				"sglang": "sglang:token_usage",
				"xllm":   "kv_cache_utilization",
			},
			Description: "GPU cache usage percentage",
		},
		EngineUtilization: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			EngineMetricsNameMapping: map[string]string{
				"xllm": "engine_utilization",
			},
			Description: "GPU busy time ratio",
		},
		CPUCacheUsagePerc: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Counter,
			},
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:cpu_cache_usage_perc",
			},
			Description: "CPU cache usage percentage",
		},
		AvgE2ELatencyPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:e2e_request_latency_seconds_sum{instance="${instance}", job="pods"}[5m]) / increase(vllm:e2e_request_latency_seconds_count{instance="${instance}", job="pods"}[5m])`,
			Description: "Average End-to-end latency in last 5 mins",
		},
		AvgRequestsPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:request_success_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average requests throughput per minute in last 5 mins",
		},
		AvgPromptThroughputToksPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:prompt_tokens_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average prompt throughput in tokens per minute in last 5 mins",
		},
		AvgGenerationThroughputToksPerMinPod: {
			MetricScope:  PodMetricScope,
			MetricSource: PrometheusEndpoint,
			MetricType: MetricType{
				Query: PromQL,
			},
			PromQL:      `increase(vllm:generation_tokens_total{instance="${instance}", job="pods"}[5m]) / 5`,
			Description: "Average generation throughput in tokens per minute in last 5 mins",
		},
		MaxLora: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "max_lora",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Max count of Lora Adapters",
		},
		RunningLoraAdapters: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "running_lora_adapters",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Count of running Lora Adapters",
		},
		WaitingLoraAdapters: {
			MetricScope:  PodMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Query: QueryLabel,
			},
			LabelKey: "waiting_lora_adapters",
			EngineMetricsNameMapping: map[string]string{
				"vllm": "vllm:lora_requests_info",
			},
			Description: "Count of waiting Lora Adapters",
		},
		VTCBucketSizeActive: {
			MetricScope:  PodModelMetricScope,
			MetricSource: PodRawMetrics,
			MetricType: MetricType{
				Raw: Gauge,
			},
			Description: "Current adaptive bucket size used by VTC algorithm for token normalization",
		},
	}
)

Functions ¶

func BuildQuery ¶

func BuildQuery(queryTemplate string, queryLabels map[string]string) string

BuildQuery dynamically injects labels into a PromQL query template.

func GetCounterGaugeValue ¶

func GetCounterGaugeValue(metric *dto.Metric, metricType dto.MetricType) (float64, error)

func GetEngineType ¶ added in v0.5.0

func GetEngineType(pod v1.Pod) string

GetEngineType extracts the engine type from pod labels, defaults to "vllm" for backward compatibility This function is centralized to avoid duplication across packages

func GetGaugeValueForTest ¶

func GetGaugeValueForTest(name string, labelValues ...string) float64

func GetLabelValueForKey ¶

func GetLabelValueForKey(metric *dto.Metric, key string) (string, error)

func GetMetricHelp ¶

func GetMetricHelp(metricName string) string

func IncrementCounterMetric ¶

func IncrementCounterMetric(name string, help string, value float64, labelNames []string, labelValues ...string)

func InitializePrometheusAPI ¶

func InitializePrometheusAPI(endpoint, username, password string) (prometheusv1.API, error)

InitializePrometheusAPI initializes the Prometheus API client.

func ParseMetricFromBody ¶

func ParseMetricFromBody(body []byte, metricName string) (float64, error)

ParseMetricFromBody parses a simple metric from the Prometheus response body.

func ParseMetricsFromReader ¶ added in v0.5.0

func ParseMetricsFromReader(reader io.Reader) (map[string]*dto.MetricFamily, error)

ParseMetricsFromReader parses Prometheus metrics from an io.Reader (extracted for reuse)

func ParseMetricsURLWithContext ¶ added in v0.4.0

func ParseMetricsURLWithContext(ctx context.Context, url string) (map[string]*dto.MetricFamily, error)

func SetGaugeMetric ¶

func SetGaugeMetric(name string, help string, value float64, labelNames []string, labelValues ...string)

func SetupCounterMetricsForTest ¶

func SetupCounterMetricsForTest(metricName string, labelNames []string) (*prometheus.CounterVec, func())

func SetupMetricsForTest ¶

func SetupMetricsForTest(metricName string, labelNames []string) (*prometheus.GaugeVec, func())

Types ¶

type EngineMetricsFetcher ¶ added in v0.5.0

type EngineMetricsFetcher struct {
	// contains filtered or unexported fields
}

EngineMetricsFetcher provides a unified interface for fetching typed metrics from inference engine pods It leverages the centralized metrics registry and type system in pkg/metrics

func NewEngineMetricsFetcher ¶ added in v0.5.0

func NewEngineMetricsFetcher() *EngineMetricsFetcher

NewEngineMetricsFetcher creates a new engine metrics fetcher with default configuration

func NewEngineMetricsFetcherWithConfig ¶ added in v0.5.0

func NewEngineMetricsFetcherWithConfig(config EngineMetricsFetcherConfig) *EngineMetricsFetcher

NewEngineMetricsFetcherWithConfig creates a new engine metrics fetcher with custom configuration

func (*EngineMetricsFetcher) FetchAllTypedMetrics ¶ added in v0.5.0

func (ef *EngineMetricsFetcher) FetchAllTypedMetrics(ctx context.Context, endpoint, engineType, identifier string, requestedMetrics []string) (*EngineMetricsResult, error)

FetchAllTypedMetrics fetches all available typed metrics from an engine endpoint

func (*EngineMetricsFetcher) FetchTypedMetric ¶ added in v0.5.0

func (ef *EngineMetricsFetcher) FetchTypedMetric(ctx context.Context, endpoint, engineType, identifier, metricName string) (MetricValue, error)

FetchTypedMetric fetches a single typed metric from an engine endpoint Note: if the client needs to fetch multiple metrics, it's better to use FetchAllTypedMetrics

type EngineMetricsFetcherConfig ¶ added in v0.5.0

type EngineMetricsFetcherConfig struct {
	Timeout     time.Duration
	MaxRetries  int
	BaseDelay   time.Duration
	MaxDelay    time.Duration
	InsecureTLS bool
}

EngineMetricsFetcherConfig holds configuration for engine metrics fetching

func DefaultEngineMetricsFetcherConfig ¶ added in v0.5.0

func DefaultEngineMetricsFetcherConfig() EngineMetricsFetcherConfig

DefaultEngineMetricsFetcherConfig returns sensible defaults for engine metrics fetching

type EngineMetricsResult ¶ added in v0.5.0

type EngineMetricsResult struct {
	Identifier   string // Caller-provided identifier (e.g., pod name)
	Endpoint     string // The endpoint that was queried
	EngineType   string
	Metrics      map[string]MetricValue // Pod-scoped metrics
	ModelMetrics map[string]MetricValue // Pod+Model-scoped metrics (key format: "model/metric")
	Errors       []error                // Any errors encountered during fetching
}

EngineMetricsResult contains the result of fetching metrics from an engine endpoint

type HistogramMetricValue ¶

type HistogramMetricValue struct {
	Sum     float64
	Count   float64
	Buckets map[string]float64 // e.g., {"0.1": 5, "0.5": 3, "1.0": 2}
}

HistogramMetricValue represents a detailed histogram metric.

func GetHistogramValue ¶

func GetHistogramValue(metric *dto.Metric) (*HistogramMetricValue, error)

func ParseHistogramFromBody ¶

func ParseHistogramFromBody(body []byte, metricName string) (*HistogramMetricValue, error)

ParseHistogramFromBody parses a histogram metric from the Prometheus response body.

func (*HistogramMetricValue) GetBucketValue ¶

func (h *HistogramMetricValue) GetBucketValue(bucket string) (float64, bool)

GetBucketValue returns the count for a specific bucket.

func (*HistogramMetricValue) GetCount ¶

func (h *HistogramMetricValue) GetCount() float64

GetCount returns the total count of values in the histogram.

func (*HistogramMetricValue) GetHistogramValue ¶

func (h *HistogramMetricValue) GetHistogramValue() *HistogramMetricValue

func (*HistogramMetricValue) GetLabelValue ¶

func (s *HistogramMetricValue) GetLabelValue() string

func (*HistogramMetricValue) GetMean ¶

func (h *HistogramMetricValue) GetMean() float64

GetMean returns the mean value of the histogram (Sum / Count).

func (*HistogramMetricValue) GetPercentile ¶

func (h *HistogramMetricValue) GetPercentile(percentile float64) (float64, error)

func (*HistogramMetricValue) GetPrometheusResult ¶

func (h *HistogramMetricValue) GetPrometheusResult() *model.Value

func (*HistogramMetricValue) GetSimpleValue ¶

func (h *HistogramMetricValue) GetSimpleValue() float64

func (*HistogramMetricValue) GetSum ¶

func (h *HistogramMetricValue) GetSum() float64

GetSum returns the sum of the histogram values.

func (*HistogramMetricValue) GetValue ¶

func (h *HistogramMetricValue) GetValue() interface{}

type LabelValueMetricValue ¶

type LabelValueMetricValue struct {
	Value string
}

PrometheusMetricValue represents Prometheus query results.

func (*LabelValueMetricValue) GetHistogramValue ¶

func (l *LabelValueMetricValue) GetHistogramValue() *HistogramMetricValue

func (*LabelValueMetricValue) GetLabelValue ¶

func (l *LabelValueMetricValue) GetLabelValue() string

func (*LabelValueMetricValue) GetPrometheusResult ¶

func (l *LabelValueMetricValue) GetPrometheusResult() *model.Value

func (*LabelValueMetricValue) GetSimpleValue ¶

func (l *LabelValueMetricValue) GetSimpleValue() float64

type Metric ¶

type Metric struct {
	MetricSource             MetricSource
	MetricType               MetricType
	PromQL                   string            // Optional: Only applicable for PromQL-based metrics
	LabelKey                 string            // Optional: Only applicable for QueryLabel-based metrics
	EngineMetricsNameMapping map[string]string // Optional: Mapping from engine type to raw metric name.
	Description              string
	MetricScope              MetricScope
}

Metric defines a unique metric with metadata.

type MetricScope ¶

type MetricScope string

MetricScope defines the scope of a metric (e.g., model or pod or podmodel).

const (
	ModelMetricScope    MetricScope = "Model"
	PodMetricScope      MetricScope = "Pod"
	PodModelMetricScope MetricScope = "PodModel" // model in pod
)

type MetricSource ¶

type MetricSource string

MetricSource defines the metric source

const (
	// PrometheusEndpoint indicates metrics are queried from a remote Prometheus server.
	// This source allows querying both raw and aggregated metrics, leveraging PromQL for advanced analytics.
	PrometheusEndpoint MetricSource = "PrometheusEndpoint"
	// PodRawMetrics indicates metrics are collected directly from the metricPort of a Pod.
	PodRawMetrics MetricSource = "PodRawMetrics"
)

type MetricSubscriber ¶

type MetricSubscriber interface {
	SubscribedMetrics() []string
}

type MetricType ¶

type MetricType struct {
	Raw   RawMetricType // Optional: Represents the type of raw metric.
	Query QueryType     // Optional: Represents the query type for derived metrics.
}

MetricType defines the type of a metric, including raw metrics and queries.

func (MetricType) IsQuery ¶

func (m MetricType) IsQuery() bool

func (MetricType) IsRawMetric ¶

func (m MetricType) IsRawMetric() bool

type MetricValue ¶

type MetricValue interface {
	GetSimpleValue() float64
	GetHistogramValue() *HistogramMetricValue
	GetPrometheusResult() *model.Value
	GetLabelValue() string
}

MetricValue is the interface for all metric values.

type PrometheusMetricValue ¶

type PrometheusMetricValue struct {
	Result *model.Value
}

PrometheusMetricValue represents Prometheus query results.

func (*PrometheusMetricValue) GetHistogramValue ¶

func (p *PrometheusMetricValue) GetHistogramValue() *HistogramMetricValue

func (*PrometheusMetricValue) GetLabelValue ¶

func (s *PrometheusMetricValue) GetLabelValue() string

func (*PrometheusMetricValue) GetPrometheusResult ¶

func (p *PrometheusMetricValue) GetPrometheusResult() *model.Value

func (*PrometheusMetricValue) GetSimpleValue ¶

func (p *PrometheusMetricValue) GetSimpleValue() float64

type QueryType ¶

type QueryType string

QueryType defines the type of metric query, such as PromQL.

const (
	PromQL     QueryType = "PromQL"     // PromQL represents a Prometheus query language expression.
	QueryLabel QueryType = "QueryLabel" // Query Label value from raw metrics.
)

type RawMetricType ¶

type RawMetricType string

RawMetricType defines the type of raw metrics (e.g., collected directly from a source).

const (
	Gauge     RawMetricType = "Gauge"     // Gauge represents a snapshot value.
	Counter   RawMetricType = "Counter"   // Counter represents a cumulative value.
	Histogram RawMetricType = "Histogram" // Histogram represents a distribution of values.
)

type Server ¶ added in v0.4.0

type Server struct {
	// contains filtered or unexported fields
}

func NewServer ¶ added in v0.4.0

func NewServer(addr string) *Server

func (*Server) Start ¶ added in v0.4.0

func (s *Server) Start() error

func (*Server) Stop ¶ added in v0.4.0

func (s *Server) Stop() error

type SimpleMetricValue ¶

type SimpleMetricValue struct {
	Value float64
}

SimpleMetricValue represents simple metrics (e.g., gauge or counter).

func (*SimpleMetricValue) GetHistogramValue ¶

func (s *SimpleMetricValue) GetHistogramValue() *HistogramMetricValue

func (*SimpleMetricValue) GetLabelValue ¶

func (s *SimpleMetricValue) GetLabelValue() string

func (*SimpleMetricValue) GetPrometheusResult ¶

func (s *SimpleMetricValue) GetPrometheusResult() *model.Value

func (*SimpleMetricValue) GetSimpleValue ¶

func (s *SimpleMetricValue) GetSimpleValue() float64

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL