Documentation
¶
Index ¶
Constants ¶
const DefaultBatchSize = 65536
DefaultBatchSize is the default number of rows per Arrow RecordBatch.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ColumnSpec ¶
type ColumnSpec struct {
Name string
Type arrow.DataType
Distribution Distribution
NullRate float64 // 0.0 = no nulls, 0.1 = 10% nulls
}
ColumnSpec describes how to generate data for a single column.
type Constant ¶
type Constant struct {
Value int64
}
Constant generates the same value for every element.
type Distribution ¶
type Distribution interface {
GenerateInt64(buf []int64, rng *rand.Rand)
GenerateFloat64(buf []float64, rng *rand.Rand)
GenerateString(buf []string, rng *rand.Rand)
}
Distribution defines how values are generated for a column.
type GenReader ¶
type GenReader struct {
// contains filtered or unexported fields
}
GenReader implements dio.Reader by lazily generating Arrow RecordBatches.
func NewReader ¶
func NewReader(spec TableSpec, opts ...ReaderOption) *GenReader
NewReader creates a GenReader from a TableSpec.
func (*GenReader) Read ¶
func (r *GenReader) Read(ctx context.Context, opts ...dio.ReadOption) (dio.BatchStream, error)
Read opens a stream of Arrow RecordBatches with lazy generation. Each call resets the RNG from the configured seed for deterministic re-reads.
type ReaderOption ¶
type ReaderOption func(*readerConfig)
ReaderOption configures a GenReader.
func WithBatchSize ¶
func WithBatchSize(size int) ReaderOption
WithBatchSize overrides the TableSpec's BatchSize.
func WithSeed ¶
func WithSeed(seed int64) ReaderOption
WithSeed sets the random seed for data generation. Default: 42.
type Sequential ¶
Sequential generates monotonically increasing values: start, start+step, start+2*step, ... State is tracked across calls so values continue from where the last call left off.
func (*Sequential) GenerateFloat64 ¶
func (s *Sequential) GenerateFloat64(buf []float64, _ *rand.Rand)
func (*Sequential) GenerateInt64 ¶
func (s *Sequential) GenerateInt64(buf []int64, _ *rand.Rand)
func (*Sequential) GenerateString ¶
func (s *Sequential) GenerateString(buf []string, _ *rand.Rand)
type StringPool ¶
type StringPool struct {
Cardinality int
MinLen int
MaxLen int
// contains filtered or unexported fields
}
StringPool pre-generates a pool of unique random strings, then picks uniformly.
func (*StringPool) GenerateFloat64 ¶
func (sp *StringPool) GenerateFloat64(buf []float64, rng *rand.Rand)
func (*StringPool) GenerateInt64 ¶
func (sp *StringPool) GenerateInt64(buf []int64, rng *rand.Rand)
func (*StringPool) GenerateString ¶
func (sp *StringPool) GenerateString(buf []string, rng *rand.Rand)
type TableSpec ¶
type TableSpec struct {
Name string
Columns []ColumnSpec
RowCount int
BatchSize int // default 65536
}
TableSpec describes a full table to generate.