memory

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 30, 2026 License: MIT Imports: 14 Imported by: 0

Documentation

Overview

Package memory provides a lightweight Go-slice-backed compute engine for the dataset package. It implements dataset.ColumnFactory, dataset.BuilderFactory, dataset.Aggregator, and dataset.Caster.

Usage:

eng := memory.NewEngine(context.Background())
f := eng.(dataset.ColumnFactory)
ds, _ := f.FromColumns(
    dataset.NewSchema(dataset.FloatCol("x"), dataset.StringCol("label")),
    f.NewFloat64Column("x", []float64{1, 2, 3}),
    f.NewStringColumn("label", []string{"a", "b", "c"}),
)

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Engine

type Engine struct {
	// contains filtered or unexported fields
}

Engine is the Go-slice compute backend.

func NewEngine

func NewEngine(ctx context.Context) *Engine

NewEngine creates a memory engine with the given lifecycle context.

func (*Engine) Abs

func (e *Engine) Abs(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Acos

func (e *Engine) Acos(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) AddCols

func (e *Engine) AddCols(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) AddScalar

func (e *Engine) AddScalar(col dataset.AnyColumn, val float64) (dataset.AnyColumn, error)

func (*Engine) Asin

func (e *Engine) Asin(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Atan

func (e *Engine) Atan(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Atan2

func (e *Engine) Atan2(y, x dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) BitAnd

func (e *Engine) BitAnd(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) BitNot

func (e *Engine) BitNot(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) BitOr

func (e *Engine) BitOr(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) BitShiftLeft

func (e *Engine) BitShiftLeft(col dataset.AnyColumn, n int) (dataset.AnyColumn, error)

func (*Engine) BitShiftRight

func (e *Engine) BitShiftRight(col dataset.AnyColumn, n int) (dataset.AnyColumn, error)

func (*Engine) BitXor

func (e *Engine) BitXor(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Cast

func (e *Engine) Cast(col dataset.AnyColumn, target dataset.DType) (dataset.AnyColumn, error)

func (*Engine) Ceil

func (e *Engine) Ceil(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Combine

func (e *Engine) Combine(datasets ...dataset.Table) (dataset.Table, error)

func (*Engine) Complete

func (e *Engine) Complete(ds dataset.Table, cols ...string) (dataset.Table, error)

Complete generates all combinations of the specified columns' unique values, filling missing rows with null values.

func (*Engine) Concatenate

func (e *Engine) Concatenate(ds dataset.Table, col string, from []string, sep string) (dataset.Table, error)

Concatenate joins multiple string columns into one with a separator.

func (*Engine) Context

func (e *Engine) Context() context.Context

Context returns the engine's lifecycle context.

func (*Engine) Cos

func (e *Engine) Cos(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Count

func (e *Engine) Count(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) CumMax

func (e *Engine) CumMax(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) CumMin

func (e *Engine) CumMin(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) CumSum

func (e *Engine) CumSum(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) DenseRank

func (e *Engine) DenseRank(col dataset.AnyColumn) (dataset.AnyColumn, error)

DenseRank returns dense rank (no gaps). E.g. [10,20,20,30] → [1,2,2,3].

func (*Engine) DivCols

func (e *Engine) DivCols(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) DropNA

func (e *Engine) DropNA(ds dataset.Table, cols ...string) (dataset.Table, error)

func (*Engine) Erf

func (e *Engine) Erf(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Exp

func (e *Engine) Exp(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Fill

func (*Engine) Filter

func (e *Engine) Filter(ds dataset.Table, mask dataset.Masker) (dataset.Table, error)

func (*Engine) FilterIndices

func (e *Engine) FilterIndices(mask []bool) []int

func (*Engine) Floor

func (e *Engine) Floor(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) FromColumns

func (e *Engine) FromColumns(schema *dataset.Schema, cols ...dataset.AnyColumn) (dataset.Table, error)

func (*Engine) Join

func (e *Engine) Join(left, right dataset.Table, spec dataset.JoinSpec) (dataset.Table, error)

Join implements the Joiner interface with a hash-join algorithm. It supports Inner, Left, Right, Full, Semi, and Anti joins.

func (*Engine) Lag

func (e *Engine) Lag(col dataset.AnyColumn, n int) (dataset.AnyColumn, error)

func (*Engine) Lead

func (e *Engine) Lead(col dataset.AnyColumn, n int) (dataset.AnyColumn, error)

func (*Engine) Ln

func (*Engine) Log2

func (e *Engine) Log2(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Log10

func (e *Engine) Log10(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Mean

func (e *Engine) Mean(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Median

func (e *Engine) Median(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) MinMax

func (*Engine) MulCols

func (e *Engine) MulCols(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) MulScalar

func (e *Engine) MulScalar(col dataset.AnyColumn, val float64) (dataset.AnyColumn, error)

func (*Engine) Name

func (e *Engine) Name() string

Name returns "memory".

func (*Engine) Neg

func (e *Engine) Neg(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) NewBoolColumn

func (e *Engine) NewBoolColumn(name string, data []bool) dataset.AnyColumn

func (*Engine) NewBuilder

func (e *Engine) NewBuilder(schema *dataset.Schema) dataset.Builder

func (*Engine) NewFloat64Column

func (e *Engine) NewFloat64Column(name string, data []float64) dataset.AnyColumn

func (*Engine) NewInt64Column

func (e *Engine) NewInt64Column(name string, data []int64) dataset.AnyColumn

func (*Engine) NewStringColumn

func (e *Engine) NewStringColumn(name string, data []string) dataset.AnyColumn

func (*Engine) NewTimestampColumn

func (e *Engine) NewTimestampColumn(name string, data []int64) dataset.AnyColumn

func (*Engine) PercentRank

func (e *Engine) PercentRank(col dataset.AnyColumn) (dataset.AnyColumn, error)

PercentRank returns (rank - 1) / (n - 1) as float64. Returns 0 for single element.

func (*Engine) PivotLonger

func (e *Engine) PivotLonger(ds dataset.Table, spec dataset.PivotLongerSpec) (dataset.Table, error)

PivotLonger reshapes a wide dataset to long format. Columns listed in spec.Cols are "gathered" into two new columns: spec.NamesTo (holds original column names) and spec.ValuesTo (holds values). All other columns are repeated for each gathered column.

func (*Engine) PivotWider

func (e *Engine) PivotWider(ds dataset.Table, spec dataset.PivotWiderSpec) (dataset.Table, error)

PivotWider reshapes a long dataset to wide format. spec.NamesFrom identifies the column whose unique values become new column names. spec.ValuesFrom identifies the column whose values fill the new columns. All other columns are the "id" columns that define unique rows.

func (*Engine) Pow

func (e *Engine) Pow(col dataset.AnyColumn, exp float64) (dataset.AnyColumn, error)

func (*Engine) Rank

func (e *Engine) Rank(col dataset.AnyColumn) (dataset.AnyColumn, error)

Rank returns competition rank (1-indexed). Ties get the same rank, next rank skips. E.g. [10,20,20,30] → [1,2,2,4].

func (*Engine) ReadCSV

func (e *Engine) ReadCSV(ctx context.Context, r io.Reader, cfg dataset.CSVConfig) (dataset.Table, error)

ReadCSV reads CSV data using go-simdcsv with schema inference.

func (*Engine) ReadParquet

func (e *Engine) ReadParquet(ctx context.Context, r io.ReaderAt, size int64, cfg dataset.ParquetConfig) (dataset.Table, error)

ReadParquet reads Parquet data using parquet-go (row-based reader).

func (*Engine) ReplaceNA

func (e *Engine) ReplaceNA(col dataset.AnyColumn, defaultVal float64) (dataset.AnyColumn, error)

func (*Engine) Round

func (e *Engine) Round(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) RowNumber

func (e *Engine) RowNumber(n int) (dataset.AnyColumn, error)

RowNumber returns a 1-indexed sequential column of length n.

func (*Engine) Select

func (e *Engine) Select(col dataset.AnyColumn, indices []int) (dataset.AnyColumn, error)

func (*Engine) Separate

func (e *Engine) Separate(ds dataset.Table, col string, into []string, sep string) (dataset.Table, error)

Separate splits a string column by a delimiter into multiple columns.

func (*Engine) Sigmoid

func (e *Engine) Sigmoid(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Sign

func (e *Engine) Sign(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Sin

func (e *Engine) Sin(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Slice

func (e *Engine) Slice(col dataset.AnyColumn, start, end int) (dataset.AnyColumn, error)

func (*Engine) SortIndices

func (e *Engine) SortIndices(col dataset.AnyColumn) ([]int, error)

func (*Engine) Sqrt

func (e *Engine) Sqrt(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Stack

func (e *Engine) Stack(datasets ...dataset.Table) (dataset.Table, error)

func (*Engine) SubCols

func (e *Engine) SubCols(a, b dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Sum

func (e *Engine) Sum(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Tan

func (e *Engine) Tan(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Tanh

func (e *Engine) Tanh(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) Variance

func (e *Engine) Variance(col dataset.AnyColumn) (dataset.AnyColumn, error)

func (*Engine) WriteCSV

func (e *Engine) WriteCSV(ctx context.Context, w io.Writer, ds dataset.Table, cfg dataset.CSVConfig) error

WriteCSV writes a Dataset as CSV using go-simdcsv.

func (*Engine) WriteParquet

func (e *Engine) WriteParquet(ctx context.Context, w io.Writer, ds dataset.Table, cfg dataset.ParquetConfig) error

WriteParquet writes a Dataset as Parquet using parquet-go.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL