Documentation
¶
Index ¶
- Constants
- Variables
- func DefaultCompression() compress.Compression
- func ExpHistogramSchema(promoted []string) *arrow.Schema
- func GaugeSchema(promoted []string) *arrow.Schema
- func HistogramSchema(promoted []string) *arrow.Schema
- func LogsSchema(promoted []string) *arrow.Schema
- func MapToJSON(m pcommon.Map) string
- func MergePromoted(defaults, overrides []string) []string
- func MergeRecords(alloc memory.Allocator, records []arrow.RecordBatch) (arrow.RecordBatch, error)
- func NanoToMicro(nanos int64) int64
- func SumSchema(promoted []string) *arrow.Schema
- func SummarySchema(promoted []string) *arrow.Schema
- func TracesSchema(promoted []string) *arrow.Schema
- func WriteParquet(rec arrow.RecordBatch, compression compress.Compression) ([]byte, error)
- type Granularity
- type LogsConverter
- type MetricRecords
- type MetricsConverter
- type PartitionKey
- type PartitionedRecord
- type PromotedResult
- type TracesConverter
Constants ¶
const ( TableTraces = "otel_traces" TableLogs = "otel_logs" TableGauge = "otel_metrics_gauge" TableSum = "otel_metrics_sum" TableHistogram = "otel_metrics_histogram" TableExpHistogram = "otel_metrics_exp_histogram" TableSummary = "otel_metrics_summary" )
Table names for each signal/metric type.
Variables ¶
var DefaultLogsPromoted = []string{
"service.name",
"log.file.path",
"exception.type",
"exception.message",
}
DefaultLogsPromoted is the default set of promoted attributes for logs.
var DefaultMetricsPromoted = []string{
"service.name",
"host.name",
}
DefaultMetricsPromoted is the default set of promoted attributes for metrics.
var DefaultTracesPromoted = []string{
"service.name",
"http.method",
"http.status_code",
"http.url",
"http.route",
"db.system",
"rpc.method",
"rpc.service",
}
DefaultTracesPromoted is the default set of promoted attributes for traces.
var TimestampType arrow.DataType = arrow.FixedWidthTypes.Timestamp_us
TimestampType is the Arrow type used for OTel timestamp columns. Microsecond UTC timestamps map to Iceberg's timestamptz, which supports time-based partition transforms (e.g. HourTransform).
Functions ¶
func DefaultCompression ¶
func DefaultCompression() compress.Compression
DefaultCompression returns the default Parquet compression codec.
func ExpHistogramSchema ¶
ExpHistogramSchema returns the Arrow schema for exponential histogram metrics.
func GaugeSchema ¶
GaugeSchema returns the Arrow schema for gauge metrics.
func HistogramSchema ¶
HistogramSchema returns the Arrow schema for histogram metrics.
func LogsSchema ¶
LogsSchema returns the Arrow schema for the logs table.
func MapToJSON ¶
MapToJSON serializes a pcommon.Map to a JSON string. Returns empty string for empty maps.
func MergePromoted ¶
MergePromoted returns the defaults if overrides is empty, otherwise the overrides.
func MergeRecords ¶
func MergeRecords(alloc memory.Allocator, records []arrow.RecordBatch) (arrow.RecordBatch, error)
MergeRecords concatenates multiple Arrow records with the same schema into one. All input records must share the same schema. Caller must release the returned record.
func NanoToMicro ¶
NanoToMicro converts a nanosecond Unix timestamp to microseconds.
func SummarySchema ¶
SummarySchema returns the Arrow schema for summary metrics.
func TracesSchema ¶
TracesSchema returns the Arrow schema for the traces table. Unsigned OTLP ints are mapped to signed for Iceberg compatibility.
func WriteParquet ¶
func WriteParquet(rec arrow.RecordBatch, compression compress.Compression) ([]byte, error)
WriteParquet serializes an Arrow record to Parquet bytes using the given compression.
Types ¶
type Granularity ¶
type Granularity string
Granularity controls partition time resolution.
const ( GranularityHour Granularity = "hour" GranularityDay Granularity = "day" GranularityMonth Granularity = "month" )
type LogsConverter ¶
type LogsConverter struct {
// contains filtered or unexported fields
}
LogsConverter converts plog.Logs to Arrow records.
func NewLogsConverter ¶
func NewLogsConverter(promoted []string) *LogsConverter
NewLogsConverter creates a converter with the given promoted attributes.
func (*LogsConverter) Convert ¶
func (c *LogsConverter) Convert(ld plog.Logs) (arrow.RecordBatch, error)
Convert transforms plog.Logs into an Arrow record.
func (*LogsConverter) Schema ¶
func (c *LogsConverter) Schema() *arrow.Schema
Schema returns the Arrow schema used by this converter.
type MetricRecords ¶
type MetricRecords struct {
Gauge arrowlib.RecordBatch
Sum arrowlib.RecordBatch
Histogram arrowlib.RecordBatch
ExpHistogram arrowlib.RecordBatch
Summary arrowlib.RecordBatch
}
MetricRecords holds optional Arrow records for each metric type. Only non-nil records should be written.
func (*MetricRecords) Release ¶
func (mr *MetricRecords) Release()
Release releases all non-nil records.
type MetricsConverter ¶
type MetricsConverter struct {
// contains filtered or unexported fields
}
MetricsConverter converts pmetric.Metrics to Arrow records.
func NewMetricsConverter ¶
func NewMetricsConverter(promoted []string) *MetricsConverter
NewMetricsConverter creates a converter with the given promoted attributes.
func (*MetricsConverter) Convert ¶
func (c *MetricsConverter) Convert(md pmetric.Metrics) (*MetricRecords, error)
Convert transforms pmetric.Metrics into per-type Arrow records.
type PartitionKey ¶
type PartitionKey struct {
Year int
Month int
Day int
Hour int
// contains filtered or unexported fields
}
PartitionKey represents a time-based partition boundary.
func PartitionKeyFromMicro ¶
func PartitionKeyFromMicro(micros int64) PartitionKey
PartitionKeyFromMicro derives an hour-granularity partition key from a Unix microsecond timestamp.
func PartitionKeyFromNano ¶
func PartitionKeyFromNano(nanos int64) PartitionKey
PartitionKeyFromNano derives an hour-granularity partition key from a Unix nanosecond timestamp.
func (PartitionKey) HivePath ¶
func (pk PartitionKey) HivePath() string
HivePath returns the Hive-style partition path for this key.
func (PartitionKey) PartitionValues ¶
func (pk PartitionKey) PartitionValues() map[string]string
PartitionValues returns the Hive-style key=value pairs for this partition.
type PartitionedRecord ¶
type PartitionedRecord struct {
Key PartitionKey
Record arrowlib.RecordBatch
}
PartitionedRecord holds a record and its partition key.
func SplitByPartition ¶
func SplitByPartition(alloc memory.Allocator, rec arrowlib.RecordBatch, timestampCol string, granularity Granularity) ([]PartitionedRecord, error)
SplitByPartition splits an Arrow record by time partitions at the given granularity. The timestampCol parameter specifies which column contains the microsecond Arrow Timestamp used for partitioning. Caller is responsible for releasing the returned records.
type PromotedResult ¶
type PromotedResult struct {
// Values maps promoted attribute names to their string values.
// Missing attributes have empty string values.
Values map[string]string
// Remainder is the JSON-encoded map of non-promoted attributes.
// Empty string if no remaining attributes.
Remainder string
}
PromotedResult holds the extracted promoted attribute values and the JSON remainder string for attributes that were not promoted.
func ExtractPromotedAttributes ¶
func ExtractPromotedAttributes(attrs pcommon.Map, promoted []string) PromotedResult
ExtractPromotedAttributes splits a pcommon.Map into promoted values and a JSON remainder string. Promoted attributes are removed from the remainder.
type TracesConverter ¶
type TracesConverter struct {
// contains filtered or unexported fields
}
TracesConverter converts ptrace.Traces to Arrow records.
func NewTracesConverter ¶
func NewTracesConverter(promoted []string) *TracesConverter
NewTracesConverter creates a converter with the given promoted attributes.
func (*TracesConverter) Convert ¶
func (c *TracesConverter) Convert(td ptrace.Traces) (arrow.RecordBatch, error)
Convert transforms ptrace.Traces into an Arrow record.
func (*TracesConverter) Schema ¶
func (c *TracesConverter) Schema() *arrow.Schema
Schema returns the Arrow schema used by this converter.