gen

package
v0.0.0-...-bd113ff Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 2, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Index

Constants

View Source
const DefaultBatchSize = 65536

DefaultBatchSize is the default number of rows per Arrow RecordBatch.

Variables

This section is empty.

Functions

This section is empty.

Types

type ColumnSpec

type ColumnSpec struct {
	Name         string
	Type         arrow.DataType
	Distribution Distribution
	NullRate     float64 // 0.0 = no nulls, 0.1 = 10% nulls
}

ColumnSpec describes how to generate data for a single column.

type Constant

type Constant struct {
	Value int64
}

Constant generates the same value for every element.

func (Constant) GenerateFloat64

func (c Constant) GenerateFloat64(buf []float64, _ *rand.Rand)

func (Constant) GenerateInt64

func (c Constant) GenerateInt64(buf []int64, _ *rand.Rand)

func (Constant) GenerateString

func (c Constant) GenerateString(buf []string, _ *rand.Rand)

type Distribution

type Distribution interface {
	GenerateInt64(buf []int64, rng *rand.Rand)
	GenerateFloat64(buf []float64, rng *rand.Rand)
	GenerateString(buf []string, rng *rand.Rand)
}

Distribution defines how values are generated for a column.

type GenReader

type GenReader struct {
	// contains filtered or unexported fields
}

GenReader implements dio.Reader by lazily generating Arrow RecordBatches.

func NewReader

func NewReader(spec TableSpec, opts ...ReaderOption) *GenReader

NewReader creates a GenReader from a TableSpec.

func (*GenReader) Read

func (r *GenReader) Read(ctx context.Context, opts ...dio.ReadOption) (dio.BatchStream, error)

Read opens a stream of Arrow RecordBatches with lazy generation. Each call resets the RNG from the configured seed for deterministic re-reads.

func (*GenReader) Schema

func (r *GenReader) Schema() *arrow.Schema

Schema returns the Arrow schema for records produced by this reader.

type ReaderOption

type ReaderOption func(*readerConfig)

ReaderOption configures a GenReader.

func WithBatchSize

func WithBatchSize(size int) ReaderOption

WithBatchSize overrides the TableSpec's BatchSize.

func WithSeed

func WithSeed(seed int64) ReaderOption

WithSeed sets the random seed for data generation. Default: 42.

type Sequential

type Sequential struct {
	Start int64
	Step  int64
	// contains filtered or unexported fields
}

Sequential generates monotonically increasing values: start, start+step, start+2*step, ... State is tracked across calls so values continue from where the last call left off.

func (*Sequential) GenerateFloat64

func (s *Sequential) GenerateFloat64(buf []float64, _ *rand.Rand)

func (*Sequential) GenerateInt64

func (s *Sequential) GenerateInt64(buf []int64, _ *rand.Rand)

func (*Sequential) GenerateString

func (s *Sequential) GenerateString(buf []string, _ *rand.Rand)

type StringPool

type StringPool struct {
	Cardinality int
	MinLen      int
	MaxLen      int
	// contains filtered or unexported fields
}

StringPool pre-generates a pool of unique random strings, then picks uniformly.

func (*StringPool) GenerateFloat64

func (sp *StringPool) GenerateFloat64(buf []float64, rng *rand.Rand)

func (*StringPool) GenerateInt64

func (sp *StringPool) GenerateInt64(buf []int64, rng *rand.Rand)

func (*StringPool) GenerateString

func (sp *StringPool) GenerateString(buf []string, rng *rand.Rand)

type TableSpec

type TableSpec struct {
	Name      string
	Columns   []ColumnSpec
	RowCount  int
	BatchSize int // default 65536
}

TableSpec describes a full table to generate.

type Uniform

type Uniform struct {
	Min int64
	Max int64
}

Uniform generates values uniformly in [min, max).

func (Uniform) GenerateFloat64

func (u Uniform) GenerateFloat64(buf []float64, rng *rand.Rand)

func (Uniform) GenerateInt64

func (u Uniform) GenerateInt64(buf []int64, rng *rand.Rand)

func (Uniform) GenerateString

func (u Uniform) GenerateString(buf []string, rng *rand.Rand)

type ZipfDist

type ZipfDist struct {
	S    float64 // Exponent (> 1 for more skew)
	V    float64 // Offset (>= 1)
	IMax uint64  // Maximum value
}

ZipfDist generates values following a Zipfian (power-law) distribution. Most values cluster near 0, with a long tail.

func (ZipfDist) GenerateFloat64

func (z ZipfDist) GenerateFloat64(buf []float64, rng *rand.Rand)

func (ZipfDist) GenerateInt64

func (z ZipfDist) GenerateInt64(buf []int64, rng *rand.Rand)

func (ZipfDist) GenerateString

func (z ZipfDist) GenerateString(buf []string, rng *rand.Rand)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL