arrow

package
v0.0.0-...-6fb550b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 16, 2026 License: Apache-2.0 Imports: 14 Imported by: 0

Documentation

Index

Constants

View Source
const (
	TableTraces       = "otel_traces"
	TableLogs         = "otel_logs"
	TableGauge        = "otel_metrics_gauge"
	TableSum          = "otel_metrics_sum"
	TableHistogram    = "otel_metrics_histogram"
	TableExpHistogram = "otel_metrics_exp_histogram"
	TableSummary      = "otel_metrics_summary"
)

Table names for each signal/metric type.

Variables

View Source
var DefaultLogsPromoted = []string{
	"service.name",
	"log.file.path",
	"exception.type",
	"exception.message",
}

DefaultLogsPromoted is the default set of promoted attributes for logs.

View Source
var DefaultMetricsPromoted = []string{
	"service.name",
	"host.name",
}

DefaultMetricsPromoted is the default set of promoted attributes for metrics.

View Source
var DefaultTracesPromoted = []string{
	"service.name",
	"http.method",
	"http.status_code",
	"http.url",
	"http.route",
	"db.system",
	"rpc.method",
	"rpc.service",
}

DefaultTracesPromoted is the default set of promoted attributes for traces.

View Source
var TimestampType arrow.DataType = arrow.FixedWidthTypes.Timestamp_us

TimestampType is the Arrow type used for OTel timestamp columns. Microsecond UTC timestamps map to Iceberg's timestamptz, which supports time-based partition transforms (e.g. HourTransform).

Functions

func DefaultCompression

func DefaultCompression() compress.Compression

DefaultCompression returns the default Parquet compression codec.

func ExpHistogramSchema

func ExpHistogramSchema(promoted []string) *arrow.Schema

ExpHistogramSchema returns the Arrow schema for exponential histogram metrics.

func GaugeSchema

func GaugeSchema(promoted []string) *arrow.Schema

GaugeSchema returns the Arrow schema for gauge metrics.

func HistogramSchema

func HistogramSchema(promoted []string) *arrow.Schema

HistogramSchema returns the Arrow schema for histogram metrics.

func LogsSchema

func LogsSchema(promoted []string) *arrow.Schema

LogsSchema returns the Arrow schema for the logs table.

func MapToJSON

func MapToJSON(m pcommon.Map) string

MapToJSON serializes a pcommon.Map to a JSON string. Returns empty string for empty maps.

func MergePromoted

func MergePromoted(defaults, overrides []string) []string

MergePromoted returns the defaults if overrides is empty, otherwise the overrides.

func MergeRecords

func MergeRecords(alloc memory.Allocator, records []arrow.RecordBatch) (arrow.RecordBatch, error)

MergeRecords concatenates multiple Arrow records with the same schema into one. All input records must share the same schema. Caller must release the returned record.

func NanoToMicro

func NanoToMicro(nanos int64) int64

NanoToMicro converts a nanosecond Unix timestamp to microseconds.

func SumSchema

func SumSchema(promoted []string) *arrow.Schema

SumSchema returns the Arrow schema for sum (counter/cumulative) metrics.

func SummarySchema

func SummarySchema(promoted []string) *arrow.Schema

SummarySchema returns the Arrow schema for summary metrics.

func TracesSchema

func TracesSchema(promoted []string) *arrow.Schema

TracesSchema returns the Arrow schema for the traces table. Unsigned OTLP ints are mapped to signed for Iceberg compatibility.

func WriteParquet

func WriteParquet(rec arrow.RecordBatch, compression compress.Compression) ([]byte, error)

WriteParquet serializes an Arrow record to Parquet bytes using the given compression.

Types

type Granularity

type Granularity string

Granularity controls partition time resolution.

const (
	GranularityHour  Granularity = "hour"
	GranularityDay   Granularity = "day"
	GranularityMonth Granularity = "month"
)

type LogsConverter

type LogsConverter struct {
	// contains filtered or unexported fields
}

LogsConverter converts plog.Logs to Arrow records.

func NewLogsConverter

func NewLogsConverter(promoted []string) *LogsConverter

NewLogsConverter creates a converter with the given promoted attributes.

func (*LogsConverter) Convert

func (c *LogsConverter) Convert(ld plog.Logs) (arrow.RecordBatch, error)

Convert transforms plog.Logs into an Arrow record.

func (*LogsConverter) Schema

func (c *LogsConverter) Schema() *arrow.Schema

Schema returns the Arrow schema used by this converter.

type MetricRecords

type MetricRecords struct {
	Gauge        arrowlib.RecordBatch
	Sum          arrowlib.RecordBatch
	Histogram    arrowlib.RecordBatch
	ExpHistogram arrowlib.RecordBatch
	Summary      arrowlib.RecordBatch
}

MetricRecords holds optional Arrow records for each metric type. Only non-nil records should be written.

func (*MetricRecords) Release

func (mr *MetricRecords) Release()

Release releases all non-nil records.

type MetricsConverter

type MetricsConverter struct {
	// contains filtered or unexported fields
}

MetricsConverter converts pmetric.Metrics to Arrow records.

func NewMetricsConverter

func NewMetricsConverter(promoted []string) *MetricsConverter

NewMetricsConverter creates a converter with the given promoted attributes.

func (*MetricsConverter) Convert

func (c *MetricsConverter) Convert(md pmetric.Metrics) (*MetricRecords, error)

Convert transforms pmetric.Metrics into per-type Arrow records.

type PartitionKey

type PartitionKey struct {
	Year  int
	Month int
	Day   int
	Hour  int
	// contains filtered or unexported fields
}

PartitionKey represents a time-based partition boundary.

func PartitionKeyFromMicro

func PartitionKeyFromMicro(micros int64) PartitionKey

PartitionKeyFromMicro derives an hour-granularity partition key from a Unix microsecond timestamp.

func PartitionKeyFromNano

func PartitionKeyFromNano(nanos int64) PartitionKey

PartitionKeyFromNano derives an hour-granularity partition key from a Unix nanosecond timestamp.

func (PartitionKey) HivePath

func (pk PartitionKey) HivePath() string

HivePath returns the Hive-style partition path for this key.

func (PartitionKey) PartitionValues

func (pk PartitionKey) PartitionValues() map[string]string

PartitionValues returns the Hive-style key=value pairs for this partition.

type PartitionedRecord

type PartitionedRecord struct {
	Key    PartitionKey
	Record arrowlib.RecordBatch
}

PartitionedRecord holds a record and its partition key.

func SplitByPartition

func SplitByPartition(alloc memory.Allocator, rec arrowlib.RecordBatch, timestampCol string, granularity Granularity) ([]PartitionedRecord, error)

SplitByPartition splits an Arrow record by time partitions at the given granularity. The timestampCol parameter specifies which column contains the microsecond Arrow Timestamp used for partitioning. Caller is responsible for releasing the returned records.

type PromotedResult

type PromotedResult struct {
	// Values maps promoted attribute names to their string values.
	// Missing attributes have empty string values.
	Values map[string]string
	// Remainder is the JSON-encoded map of non-promoted attributes.
	// Empty string if no remaining attributes.
	Remainder string
}

PromotedResult holds the extracted promoted attribute values and the JSON remainder string for attributes that were not promoted.

func ExtractPromotedAttributes

func ExtractPromotedAttributes(attrs pcommon.Map, promoted []string) PromotedResult

ExtractPromotedAttributes splits a pcommon.Map into promoted values and a JSON remainder string. Promoted attributes are removed from the remainder.

type TracesConverter

type TracesConverter struct {
	// contains filtered or unexported fields
}

TracesConverter converts ptrace.Traces to Arrow records.

func NewTracesConverter

func NewTracesConverter(promoted []string) *TracesConverter

NewTracesConverter creates a converter with the given promoted attributes.

func (*TracesConverter) Convert

func (c *TracesConverter) Convert(td ptrace.Traces) (arrow.RecordBatch, error)

Convert transforms ptrace.Traces into an Arrow record.

func (*TracesConverter) Schema

func (c *TracesConverter) Schema() *arrow.Schema

Schema returns the Arrow schema used by this converter.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL