Documentation
¶
Overview ¶
Package columnar provides columnar storage for ultra-low memory footprint
Index ¶
- type BatchIterator
- type BatchProcessor
- type BoolColumn
- type CSVToColumnar
- type Column
- type ColumnStore
- func (s *ColumnStore) AddColumn(name string, colType ColumnType) error
- func (s *ColumnStore) AppendBatch(rows []map[string]interface{}) error
- func (s *ColumnStore) AppendRow(data map[string]interface{}) error
- func (s *ColumnStore) Clear()
- func (s *ColumnStore) ColumnCount() int
- func (s *ColumnStore) ColumnNames() []string
- func (s *ColumnStore) GetColumn(name string) (Column, bool)
- func (s *ColumnStore) GetRow(index int) (map[string]interface{}, error)
- func (s *ColumnStore) MemoryPerRecord() float64
- func (s *ColumnStore) MemoryUsage() int64
- func (s *ColumnStore) NewBatchIterator(batchSize int) *BatchIterator
- func (s *ColumnStore) NewIterator() *Iterator
- func (s *ColumnStore) RowCount() int
- type ColumnType
- type ColumnarCompressionConfig
- type ColumnarPipeline
- type CompressedColumnStore
- type DirectCSVToColumnar
- type FieldSchema
- type FloatColumn
- type IntColumn
- type Iterator
- type RecordAdapter
- type Schema
- type StreamingColumnarWriter
- type StreamingDirectCSVToColumnar
- type StringColumn
- type TimestampColumn
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BatchIterator ¶
type BatchIterator struct {
// contains filtered or unexported fields
}
BatchIterator provides batch access to rows
func (*BatchIterator) NextBatch ¶
func (it *BatchIterator) NextBatch() ([]map[string]interface{}, bool)
NextBatch returns the next batch of rows
type BatchProcessor ¶
type BatchProcessor struct {
// contains filtered or unexported fields
}
BatchProcessor provides efficient batch processing over columnar data
func NewBatchProcessor ¶
func NewBatchProcessor(store *ColumnStore, batchSize int) *BatchProcessor
NewBatchProcessor creates a new batch processor
type BoolColumn ¶
type BoolColumn struct {
// contains filtered or unexported fields
}
BoolColumn stores boolean values efficiently
func (*BoolColumn) Append ¶
func (c *BoolColumn) Append(value interface{}) error
func (*BoolColumn) Clear ¶
func (c *BoolColumn) Clear()
func (*BoolColumn) Get ¶
func (c *BoolColumn) Get(i int) interface{}
func (*BoolColumn) Len ¶
func (c *BoolColumn) Len() int
func (*BoolColumn) MemoryUsage ¶
func (c *BoolColumn) MemoryUsage() int64
func (*BoolColumn) Type ¶
func (c *BoolColumn) Type() ColumnType
type CSVToColumnar ¶
type CSVToColumnar struct {
// contains filtered or unexported fields
}
CSVToColumnar provides optimized CSV to columnar conversion
func NewCSVToColumnar ¶
func NewCSVToColumnar() *CSVToColumnar
NewCSVToColumnar creates a new CSV to columnar converter
func (*CSVToColumnar) AddBatch ¶
func (c *CSVToColumnar) AddBatch(rows [][]string) error
AddBatch adds multiple CSV rows efficiently
func (*CSVToColumnar) AddRow ¶
func (c *CSVToColumnar) AddRow(row []string) error
AddRow adds a CSV row to columnar storage
func (*CSVToColumnar) GetStore ¶
func (c *CSVToColumnar) GetStore() *ColumnStore
GetStore returns the columnar store
func (*CSVToColumnar) OptimizeTypes ¶
func (c *CSVToColumnar) OptimizeTypes() error
OptimizeTypes analyzes data and converts columns to optimal types
func (*CSVToColumnar) SetHeaders ¶
func (c *CSVToColumnar) SetHeaders(headers []string)
SetHeaders sets the CSV headers
type Column ¶
type Column interface {
Type() ColumnType
Len() int
Get(i int) interface{}
Append(value interface{}) error
Clear()
MemoryUsage() int64
}
Column is the base interface for all column types
type ColumnStore ¶
type ColumnStore struct {
// contains filtered or unexported fields
}
ColumnStore provides columnar storage for records
func NewColumnStoreWithSchema ¶
func NewColumnStoreWithSchema(schema *Schema) *ColumnStore
NewColumnStoreWithSchema creates a new column store with predefined schema
func (*ColumnStore) AddColumn ¶
func (s *ColumnStore) AddColumn(name string, colType ColumnType) error
AddColumn adds a new column to the store
func (*ColumnStore) AppendBatch ¶
func (s *ColumnStore) AppendBatch(rows []map[string]interface{}) error
AppendBatch adds multiple rows efficiently
func (*ColumnStore) AppendRow ¶
func (s *ColumnStore) AppendRow(data map[string]interface{}) error
AppendRow adds a new row to the store
func (*ColumnStore) ColumnCount ¶
func (s *ColumnStore) ColumnCount() int
ColumnCount returns the number of columns
func (*ColumnStore) ColumnNames ¶
func (s *ColumnStore) ColumnNames() []string
ColumnNames returns all column names
func (*ColumnStore) GetColumn ¶
func (s *ColumnStore) GetColumn(name string) (Column, bool)
GetColumn retrieves a column by name
func (*ColumnStore) GetRow ¶
func (s *ColumnStore) GetRow(index int) (map[string]interface{}, error)
GetRow retrieves a row by index
func (*ColumnStore) MemoryPerRecord ¶
func (s *ColumnStore) MemoryPerRecord() float64
MemoryPerRecord returns average memory usage per record
func (*ColumnStore) MemoryUsage ¶
func (s *ColumnStore) MemoryUsage() int64
MemoryUsage returns total memory usage in bytes
func (*ColumnStore) NewBatchIterator ¶
func (s *ColumnStore) NewBatchIterator(batchSize int) *BatchIterator
NewBatchIterator creates a new batch iterator
func (*ColumnStore) NewIterator ¶
func (s *ColumnStore) NewIterator() *Iterator
NewIterator creates a new iterator over the store
func (*ColumnStore) RowCount ¶
func (s *ColumnStore) RowCount() int
RowCount returns the number of rows
type ColumnType ¶
type ColumnType int
ColumnType represents the data type of a column
const ( ColumnTypeString ColumnType = iota ColumnTypeInt ColumnTypeFloat ColumnTypeBool ColumnTypeTimestamp ColumnTypeBytes )
type ColumnarCompressionConfig ¶
type ColumnarCompressionConfig struct {
Algorithm compression.Algorithm
Level compression.Level
CompressStrings bool
CompressNumbers bool
MinColumnSize int // Minimum column size to compress
}
ColumnarCompressionConfig configures columnar compression
func DefaultColumnarCompressionConfig ¶
func DefaultColumnarCompressionConfig() ColumnarCompressionConfig
DefaultColumnarCompressionConfig returns default compression settings
type ColumnarPipeline ¶
type ColumnarPipeline struct {
// contains filtered or unexported fields
}
ColumnarPipeline provides a pipeline interface for columnar processing
func NewColumnarPipeline ¶
func NewColumnarPipeline() *ColumnarPipeline
NewColumnarPipeline creates a new columnar pipeline
func (*ColumnarPipeline) AddBatch ¶
func (p *ColumnarPipeline) AddBatch(records []*pool.Record) error
AddBatch adds multiple records efficiently
func (*ColumnarPipeline) AddRecord ¶
func (p *ColumnarPipeline) AddRecord(record *pool.Record) error
AddRecord adds a record to the columnar store
func (*ColumnarPipeline) GetStore ¶
func (p *ColumnarPipeline) GetStore() *ColumnStore
GetStore returns the underlying columnar store
func (*ColumnarPipeline) Stats ¶
func (p *ColumnarPipeline) Stats() map[string]interface{}
Stats returns memory and performance statistics
type CompressedColumnStore ¶
type CompressedColumnStore struct {
// contains filtered or unexported fields
}
CompressedColumnStore wraps a ColumnStore with compression
func NewCompressedColumnStore ¶
func NewCompressedColumnStore(algorithm compression.Algorithm) (*CompressedColumnStore, error)
NewCompressedColumnStore creates a new compressed column store
func (*CompressedColumnStore) CompressAll ¶
func (c *CompressedColumnStore) CompressAll() error
CompressAll compresses all columns
func (*CompressedColumnStore) CompressColumn ¶
func (c *CompressedColumnStore) CompressColumn(name string) error
CompressColumn compresses a single column
func (*CompressedColumnStore) GetCompressedSize ¶
func (c *CompressedColumnStore) GetCompressedSize() int64
GetCompressedSize returns the total compressed size
func (*CompressedColumnStore) GetCompressionRatio ¶
func (c *CompressedColumnStore) GetCompressionRatio() float64
GetCompressionRatio returns the compression ratio
type DirectCSVToColumnar ¶
type DirectCSVToColumnar struct {
// contains filtered or unexported fields
}
DirectCSVToColumnar provides zero-intermediate CSV to columnar conversion
func NewDirectCSVToColumnar ¶
func NewDirectCSVToColumnar() *DirectCSVToColumnar
NewDirectCSVToColumnar creates an optimized CSV to columnar converter
func (*DirectCSVToColumnar) GetRowCount ¶
func (d *DirectCSVToColumnar) GetRowCount() int
GetRowCount returns the number of rows processed
func (*DirectCSVToColumnar) GetStore ¶
func (d *DirectCSVToColumnar) GetStore() *ColumnStore
GetStore returns the columnar store
func (*DirectCSVToColumnar) OptimizeTypes ¶
func (d *DirectCSVToColumnar) OptimizeTypes() error
OptimizeTypes converts string columns to appropriate types
func (*DirectCSVToColumnar) ProcessCSV ¶
func (d *DirectCSVToColumnar) ProcessCSV(reader *csv.Reader) error
ProcessCSV reads and converts CSV directly to columnar storage
type FieldSchema ¶
type FieldSchema struct {
Name string
Type ColumnType
}
FieldSchema defines a single field in the schema
type FloatColumn ¶
type FloatColumn struct {
// contains filtered or unexported fields
}
FloatColumn stores floating point values
func (*FloatColumn) Append ¶
func (c *FloatColumn) Append(value interface{}) error
func (*FloatColumn) Clear ¶
func (c *FloatColumn) Clear()
func (*FloatColumn) Get ¶
func (c *FloatColumn) Get(i int) interface{}
func (*FloatColumn) Len ¶
func (c *FloatColumn) Len() int
func (*FloatColumn) MemoryUsage ¶
func (c *FloatColumn) MemoryUsage() int64
func (*FloatColumn) Type ¶
func (c *FloatColumn) Type() ColumnType
type IntColumn ¶
type IntColumn struct {
// contains filtered or unexported fields
}
IntColumn stores integer values efficiently
func (*IntColumn) MemoryUsage ¶
func (*IntColumn) Type ¶
func (c *IntColumn) Type() ColumnType
type Iterator ¶
type Iterator struct {
// contains filtered or unexported fields
}
Iterator provides sequential access to rows
type RecordAdapter ¶
type RecordAdapter struct {
// contains filtered or unexported fields
}
RecordAdapter provides a pool.Record compatible interface over columnar storage
func NewRecordAdapter ¶
func NewRecordAdapter(store *ColumnStore, rowIndex int) *RecordAdapter
NewRecordAdapter creates a new adapter for a specific row
func (*RecordAdapter) Data ¶
func (r *RecordAdapter) Data() map[string]interface{}
Data returns all data fields
func (*RecordAdapter) GetData ¶
func (r *RecordAdapter) GetData(key string) (interface{}, bool)
GetData retrieves a data field
func (*RecordAdapter) Release ¶
func (r *RecordAdapter) Release()
Release returns the adapter to the pool (no-op for columnar)
func (*RecordAdapter) SetData ¶
func (r *RecordAdapter) SetData(key string, value interface{})
SetData sets a data field (note: this updates the cache, not the columnar store)
type Schema ¶
type Schema struct {
Fields []FieldSchema
}
Schema defines the structure of a columnar store
type StreamingColumnarWriter ¶
type StreamingColumnarWriter struct {
// contains filtered or unexported fields
}
StreamingColumnarWriter provides streaming write capabilities
func NewStreamingColumnarWriter ¶
func NewStreamingColumnarWriter(bufferSize int) *StreamingColumnarWriter
NewStreamingColumnarWriter creates a new streaming writer
func (*StreamingColumnarWriter) Flush ¶
func (w *StreamingColumnarWriter) Flush() error
Flush writes buffered records to columnar store
func (*StreamingColumnarWriter) GetStore ¶
func (w *StreamingColumnarWriter) GetStore() *ColumnStore
GetStore returns the columnar store
func (*StreamingColumnarWriter) Stats ¶
func (w *StreamingColumnarWriter) Stats() map[string]interface{}
Stats returns writer statistics
type StreamingDirectCSVToColumnar ¶
type StreamingDirectCSVToColumnar struct {
// contains filtered or unexported fields
}
StreamingDirectCSVToColumnar provides streaming CSV to columnar conversion
func NewStreamingDirectCSVToColumnar ¶
func NewStreamingDirectCSVToColumnar(bufferSize int) *StreamingDirectCSVToColumnar
NewStreamingDirectCSVToColumnar creates a streaming converter
func (*StreamingDirectCSVToColumnar) AddRow ¶
func (s *StreamingDirectCSVToColumnar) AddRow(row []string) error
AddRow adds a single row with buffering
func (*StreamingDirectCSVToColumnar) Flush ¶
func (s *StreamingDirectCSVToColumnar) Flush() error
Flush writes buffered rows to columnar store
func (*StreamingDirectCSVToColumnar) GetStore ¶
func (s *StreamingDirectCSVToColumnar) GetStore() *ColumnStore
GetStore returns the columnar store
func (*StreamingDirectCSVToColumnar) SetHeaders ¶
func (s *StreamingDirectCSVToColumnar) SetHeaders(headers []string)
SetHeaders initializes columns from headers
type StringColumn ¶
type StringColumn struct {
// contains filtered or unexported fields
}
StringColumn stores string values efficiently
func NewStringColumn ¶
func NewStringColumn() *StringColumn
NewStringColumn creates a new string column
func (*StringColumn) Append ¶
func (c *StringColumn) Append(value interface{}) error
func (*StringColumn) Clear ¶
func (c *StringColumn) Clear()
func (*StringColumn) Get ¶
func (c *StringColumn) Get(i int) interface{}
func (*StringColumn) Len ¶
func (c *StringColumn) Len() int
func (*StringColumn) MemoryUsage ¶
func (c *StringColumn) MemoryUsage() int64
func (*StringColumn) Type ¶
func (c *StringColumn) Type() ColumnType
type TimestampColumn ¶
type TimestampColumn struct {
// contains filtered or unexported fields
}
TimestampColumn stores timestamp values
func NewTimestampColumn ¶
func NewTimestampColumn() *TimestampColumn
NewTimestampColumn creates a new timestamp column
func (*TimestampColumn) Append ¶
func (c *TimestampColumn) Append(value interface{}) error
func (*TimestampColumn) Clear ¶
func (c *TimestampColumn) Clear()
func (*TimestampColumn) Get ¶
func (c *TimestampColumn) Get(i int) interface{}
func (*TimestampColumn) Len ¶
func (c *TimestampColumn) Len() int
func (*TimestampColumn) MemoryUsage ¶
func (c *TimestampColumn) MemoryUsage() int64
func (*TimestampColumn) Type ¶
func (c *TimestampColumn) Type() ColumnType