columnar

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 14, 2025 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package columnar provides columnar storage for ultra-low memory footprint

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type BatchIterator

type BatchIterator struct {
	// contains filtered or unexported fields
}

BatchIterator provides batch access to rows

func (*BatchIterator) NextBatch

func (it *BatchIterator) NextBatch() ([]map[string]interface{}, bool)

NextBatch returns the next batch of rows

type BatchProcessor

type BatchProcessor struct {
	// contains filtered or unexported fields
}

BatchProcessor provides efficient batch processing over columnar data

func NewBatchProcessor

func NewBatchProcessor(store *ColumnStore, batchSize int) *BatchProcessor

NewBatchProcessor creates a new batch processor

func (*BatchProcessor) Process

func (bp *BatchProcessor) Process(fn func(records []*pool.Record) error) error

Process applies a function to all records in batches

type BoolColumn

type BoolColumn struct {
	// contains filtered or unexported fields
}

BoolColumn stores boolean values efficiently

func NewBoolColumn

func NewBoolColumn() *BoolColumn

NewBoolColumn creates a new boolean column

func (*BoolColumn) Append

func (c *BoolColumn) Append(value interface{}) error

func (*BoolColumn) Clear

func (c *BoolColumn) Clear()

func (*BoolColumn) Get

func (c *BoolColumn) Get(i int) interface{}

func (*BoolColumn) Len

func (c *BoolColumn) Len() int

func (*BoolColumn) MemoryUsage

func (c *BoolColumn) MemoryUsage() int64

func (*BoolColumn) Type

func (c *BoolColumn) Type() ColumnType

type CSVToColumnar

type CSVToColumnar struct {
	// contains filtered or unexported fields
}

CSVToColumnar provides optimized CSV to columnar conversion

func NewCSVToColumnar

func NewCSVToColumnar() *CSVToColumnar

NewCSVToColumnar creates a new CSV to columnar converter

func (*CSVToColumnar) AddBatch

func (c *CSVToColumnar) AddBatch(rows [][]string) error

AddBatch adds multiple CSV rows efficiently

func (*CSVToColumnar) AddRow

func (c *CSVToColumnar) AddRow(row []string) error

AddRow adds a CSV row to columnar storage

func (*CSVToColumnar) GetStore

func (c *CSVToColumnar) GetStore() *ColumnStore

GetStore returns the columnar store

func (*CSVToColumnar) OptimizeTypes

func (c *CSVToColumnar) OptimizeTypes() error

OptimizeTypes analyzes data and converts columns to optimal types

func (*CSVToColumnar) SetHeaders

func (c *CSVToColumnar) SetHeaders(headers []string)

SetHeaders sets the CSV headers

type Column

type Column interface {
	Type() ColumnType
	Len() int
	Get(i int) interface{}
	Append(value interface{}) error
	Clear()
	MemoryUsage() int64
}

Column is the base interface for all column types

type ColumnStore

type ColumnStore struct {
	// contains filtered or unexported fields
}

ColumnStore provides columnar storage for records

func NewColumnStore

func NewColumnStore() *ColumnStore

NewColumnStore creates a new column store

func NewColumnStoreWithSchema

func NewColumnStoreWithSchema(schema *Schema) *ColumnStore

NewColumnStoreWithSchema creates a new column store with predefined schema

func (*ColumnStore) AddColumn

func (s *ColumnStore) AddColumn(name string, colType ColumnType) error

AddColumn adds a new column to the store

func (*ColumnStore) AppendBatch

func (s *ColumnStore) AppendBatch(rows []map[string]interface{}) error

AppendBatch adds multiple rows efficiently

func (*ColumnStore) AppendRow

func (s *ColumnStore) AppendRow(data map[string]interface{}) error

AppendRow adds a new row to the store

func (*ColumnStore) Clear

func (s *ColumnStore) Clear()

Clear removes all data from the store

func (*ColumnStore) ColumnCount

func (s *ColumnStore) ColumnCount() int

ColumnCount returns the number of columns

func (*ColumnStore) ColumnNames

func (s *ColumnStore) ColumnNames() []string

ColumnNames returns all column names

func (*ColumnStore) GetColumn

func (s *ColumnStore) GetColumn(name string) (Column, bool)

GetColumn retrieves a column by name

func (*ColumnStore) GetRow

func (s *ColumnStore) GetRow(index int) (map[string]interface{}, error)

GetRow retrieves a row by index

func (*ColumnStore) MemoryPerRecord

func (s *ColumnStore) MemoryPerRecord() float64

MemoryPerRecord returns average memory usage per record

func (*ColumnStore) MemoryUsage

func (s *ColumnStore) MemoryUsage() int64

MemoryUsage returns total memory usage in bytes

func (*ColumnStore) NewBatchIterator

func (s *ColumnStore) NewBatchIterator(batchSize int) *BatchIterator

NewBatchIterator creates a new batch iterator

func (*ColumnStore) NewIterator

func (s *ColumnStore) NewIterator() *Iterator

NewIterator creates a new iterator over the store

func (*ColumnStore) RowCount

func (s *ColumnStore) RowCount() int

RowCount returns the number of rows

type ColumnType

type ColumnType int

ColumnType represents the data type of a column

const (
	ColumnTypeString ColumnType = iota
	ColumnTypeInt
	ColumnTypeFloat
	ColumnTypeBool
	ColumnTypeTimestamp
	ColumnTypeBytes
)

type ColumnarCompressionConfig

type ColumnarCompressionConfig struct {
	Algorithm       compression.Algorithm
	Level           compression.Level
	CompressStrings bool
	CompressNumbers bool
	MinColumnSize   int // Minimum column size to compress
}

ColumnarCompressionConfig configures columnar compression

func DefaultColumnarCompressionConfig

func DefaultColumnarCompressionConfig() ColumnarCompressionConfig

DefaultColumnarCompressionConfig returns default compression settings

type ColumnarPipeline

type ColumnarPipeline struct {
	// contains filtered or unexported fields
}

ColumnarPipeline provides a pipeline interface for columnar processing

func NewColumnarPipeline

func NewColumnarPipeline() *ColumnarPipeline

NewColumnarPipeline creates a new columnar pipeline

func (*ColumnarPipeline) AddBatch

func (p *ColumnarPipeline) AddBatch(records []*pool.Record) error

AddBatch adds multiple records efficiently

func (*ColumnarPipeline) AddRecord

func (p *ColumnarPipeline) AddRecord(record *pool.Record) error

AddRecord adds a record to the columnar store

func (*ColumnarPipeline) GetStore

func (p *ColumnarPipeline) GetStore() *ColumnStore

GetStore returns the underlying columnar store

func (*ColumnarPipeline) Stats

func (p *ColumnarPipeline) Stats() map[string]interface{}

Stats returns memory and performance statistics

type CompressedColumnStore

type CompressedColumnStore struct {
	// contains filtered or unexported fields
}

CompressedColumnStore wraps a ColumnStore with compression

func NewCompressedColumnStore

func NewCompressedColumnStore(algorithm compression.Algorithm) (*CompressedColumnStore, error)

NewCompressedColumnStore creates a new compressed column store

func (*CompressedColumnStore) CompressAll

func (c *CompressedColumnStore) CompressAll() error

CompressAll compresses all columns

func (*CompressedColumnStore) CompressColumn

func (c *CompressedColumnStore) CompressColumn(name string) error

CompressColumn compresses a single column

func (*CompressedColumnStore) GetCompressedSize

func (c *CompressedColumnStore) GetCompressedSize() int64

GetCompressedSize returns the total compressed size

func (*CompressedColumnStore) GetCompressionRatio

func (c *CompressedColumnStore) GetCompressionRatio() float64

GetCompressionRatio returns the compression ratio

type DirectCSVToColumnar

type DirectCSVToColumnar struct {
	// contains filtered or unexported fields
}

DirectCSVToColumnar provides zero-intermediate CSV to columnar conversion

func NewDirectCSVToColumnar

func NewDirectCSVToColumnar() *DirectCSVToColumnar

NewDirectCSVToColumnar creates an optimized CSV to columnar converter

func (*DirectCSVToColumnar) GetRowCount

func (d *DirectCSVToColumnar) GetRowCount() int

GetRowCount returns the number of rows processed

func (*DirectCSVToColumnar) GetStore

func (d *DirectCSVToColumnar) GetStore() *ColumnStore

GetStore returns the columnar store

func (*DirectCSVToColumnar) OptimizeTypes

func (d *DirectCSVToColumnar) OptimizeTypes() error

OptimizeTypes converts string columns to appropriate types

func (*DirectCSVToColumnar) ProcessCSV

func (d *DirectCSVToColumnar) ProcessCSV(reader *csv.Reader) error

ProcessCSV reads and converts CSV directly to columnar storage

type FieldSchema

type FieldSchema struct {
	Name string
	Type ColumnType
}

FieldSchema defines a single field in the schema

type FloatColumn

type FloatColumn struct {
	// contains filtered or unexported fields
}

FloatColumn stores floating point values

func NewFloatColumn

func NewFloatColumn() *FloatColumn

NewFloatColumn creates a new float column

func (*FloatColumn) Append

func (c *FloatColumn) Append(value interface{}) error

func (*FloatColumn) Clear

func (c *FloatColumn) Clear()

func (*FloatColumn) Get

func (c *FloatColumn) Get(i int) interface{}

func (*FloatColumn) Len

func (c *FloatColumn) Len() int

func (*FloatColumn) MemoryUsage

func (c *FloatColumn) MemoryUsage() int64

func (*FloatColumn) Type

func (c *FloatColumn) Type() ColumnType

type IntColumn

type IntColumn struct {
	// contains filtered or unexported fields
}

IntColumn stores integer values efficiently

func NewIntColumn

func NewIntColumn() *IntColumn

NewIntColumn creates a new integer column

func (*IntColumn) Append

func (c *IntColumn) Append(value interface{}) error

func (*IntColumn) Clear

func (c *IntColumn) Clear()

func (*IntColumn) Get

func (c *IntColumn) Get(i int) interface{}

func (*IntColumn) Len

func (c *IntColumn) Len() int

func (*IntColumn) MemoryUsage

func (c *IntColumn) MemoryUsage() int64

func (*IntColumn) Type

func (c *IntColumn) Type() ColumnType

type Iterator

type Iterator struct {
	// contains filtered or unexported fields
}

Iterator provides sequential access to rows

func (*Iterator) Next

func (it *Iterator) Next() bool

Next advances to the next row

func (*Iterator) Row

func (it *Iterator) Row() map[string]interface{}

Row returns the current row

type RecordAdapter

type RecordAdapter struct {
	// contains filtered or unexported fields
}

RecordAdapter provides a pool.Record compatible interface over columnar storage

func NewRecordAdapter

func NewRecordAdapter(store *ColumnStore, rowIndex int) *RecordAdapter

NewRecordAdapter creates a new adapter for a specific row

func (*RecordAdapter) Data

func (r *RecordAdapter) Data() map[string]interface{}

Data returns all data fields

func (*RecordAdapter) GetData

func (r *RecordAdapter) GetData(key string) (interface{}, bool)

GetData retrieves a data field

func (*RecordAdapter) Release

func (r *RecordAdapter) Release()

Release returns the adapter to the pool (no-op for columnar)

func (*RecordAdapter) SetData

func (r *RecordAdapter) SetData(key string, value interface{})

SetData sets a data field (note: this updates the cache, not the columnar store)

type Schema

type Schema struct {
	Fields []FieldSchema
}

Schema defines the structure of a columnar store

type StreamingColumnarWriter

type StreamingColumnarWriter struct {
	// contains filtered or unexported fields
}

StreamingColumnarWriter provides streaming write capabilities

func NewStreamingColumnarWriter

func NewStreamingColumnarWriter(bufferSize int) *StreamingColumnarWriter

NewStreamingColumnarWriter creates a new streaming writer

func (*StreamingColumnarWriter) Flush

func (w *StreamingColumnarWriter) Flush() error

Flush writes buffered records to columnar store

func (*StreamingColumnarWriter) GetStore

func (w *StreamingColumnarWriter) GetStore() *ColumnStore

GetStore returns the columnar store

func (*StreamingColumnarWriter) Stats

func (w *StreamingColumnarWriter) Stats() map[string]interface{}

Stats returns writer statistics

func (*StreamingColumnarWriter) Write

func (w *StreamingColumnarWriter) Write(record *pool.Record) error

Write adds a record to the buffer

type StreamingDirectCSVToColumnar

type StreamingDirectCSVToColumnar struct {
	// contains filtered or unexported fields
}

StreamingDirectCSVToColumnar provides streaming CSV to columnar conversion

func NewStreamingDirectCSVToColumnar

func NewStreamingDirectCSVToColumnar(bufferSize int) *StreamingDirectCSVToColumnar

NewStreamingDirectCSVToColumnar creates a streaming converter

func (*StreamingDirectCSVToColumnar) AddRow

func (s *StreamingDirectCSVToColumnar) AddRow(row []string) error

AddRow adds a single row with buffering

func (*StreamingDirectCSVToColumnar) Flush

Flush writes buffered rows to columnar store

func (*StreamingDirectCSVToColumnar) GetStore

GetStore returns the columnar store

func (*StreamingDirectCSVToColumnar) SetHeaders

func (s *StreamingDirectCSVToColumnar) SetHeaders(headers []string)

SetHeaders initializes columns from headers

type StringColumn

type StringColumn struct {
	// contains filtered or unexported fields
}

StringColumn stores string values efficiently

func NewStringColumn

func NewStringColumn() *StringColumn

NewStringColumn creates a new string column

func (*StringColumn) Append

func (c *StringColumn) Append(value interface{}) error

func (*StringColumn) Clear

func (c *StringColumn) Clear()

func (*StringColumn) Get

func (c *StringColumn) Get(i int) interface{}

func (*StringColumn) Len

func (c *StringColumn) Len() int

func (*StringColumn) MemoryUsage

func (c *StringColumn) MemoryUsage() int64

func (*StringColumn) Type

func (c *StringColumn) Type() ColumnType

type TimestampColumn

type TimestampColumn struct {
	// contains filtered or unexported fields
}

TimestampColumn stores timestamp values

func NewTimestampColumn

func NewTimestampColumn() *TimestampColumn

NewTimestampColumn creates a new timestamp column

func (*TimestampColumn) Append

func (c *TimestampColumn) Append(value interface{}) error

func (*TimestampColumn) Clear

func (c *TimestampColumn) Clear()

func (*TimestampColumn) Get

func (c *TimestampColumn) Get(i int) interface{}

func (*TimestampColumn) Len

func (c *TimestampColumn) Len() int

func (*TimestampColumn) MemoryUsage

func (c *TimestampColumn) MemoryUsage() int64

func (*TimestampColumn) Type

func (c *TimestampColumn) Type() ColumnType

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL