Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func PrintStats ¶
func PrintStats(stats *TableStats, format string)
Types ¶
type AggregateStats ¶
type AggregateStats struct {
Count int64
Sum float64
Mean float64
Median float64
StdDev float64
Variance float64
Percentiles map[int]float64 // 25th, 50th, 75th, 90th, 95th, 99th
}
AggregateStats represents statistical aggregations
type CSVReader ¶
type CSVReader struct {
Delimiter rune
}
CSVReader implements TableReader for CSV files with probabilistic sampling
func NewCSVReader ¶
func (*CSVReader) GetFormatName ¶
func (*CSVReader) ReadTable ¶
func (r *CSVReader) ReadTable(filePath string, config SamplingConfig) (*TableStats, error)
type ParquetReader ¶
type ParquetReader struct {
}
ParquetReader implements TableReader for Parquet files
func NewParquetReader ¶
func NewParquetReader() *ParquetReader
func (*ParquetReader) GetFormatName ¶
func (r *ParquetReader) GetFormatName() string
func (*ParquetReader) ReadTable ¶
func (r *ParquetReader) ReadTable(filePath string, config SamplingConfig) (*TableStats, error)
type SamplingConfig ¶
type SamplingConfig struct {
SampleSize int // Number of rows to sample
RandomPositions int // Number of random positions to seek to
Confidence float64 // Confidence level for estimates
MaxFileSize int64 // Max file size to process entirely
}
SamplingConfig controls the sampling behavior
func DefaultSamplingConfig ¶
func DefaultSamplingConfig() SamplingConfig
DefaultSamplingConfig returns sensible defaults
type StatisticsGenerator ¶
type StatisticsGenerator struct {
// contains filtered or unexported fields
}
StatisticsGenerator is the context that uses the strategy
func NewStatisticsGenerator ¶
func NewStatisticsGenerator(reader TableReader, config SamplingConfig) *StatisticsGenerator
NewStatisticsGenerator creates a new statistics generator with a specific reader strategy
func (*StatisticsGenerator) GenerateStats ¶
func (sg *StatisticsGenerator) GenerateStats(filePath string) (*TableStats, error)
GenerateStats generates statistics using the current reader strategy
func (*StatisticsGenerator) SetReader ¶
func (sg *StatisticsGenerator) SetReader(reader TableReader)
SetReader allows changing the strategy at runtime
type TSVReader ¶
type TSVReader struct {
*CSVReader
}
TSVReader implements TableReader for TSV files
func NewTSVReader ¶
func NewTSVReader() *TSVReader
func (*TSVReader) GetFormatName ¶
type TableReader ¶
type TableReader interface {
ReadTable(filePath string, config SamplingConfig) (*TableStats, error)
GetFormatName() string
}
TableReader defines the strategy interface for reading different table formats
type TableStats ¶
type TableStats struct {
RowCount int64
EstimatedRows int64 // Estimated total rows based on sampling
ColumnCount int
ColumnNames []string
ColumnTypes map[string]string
NullCounts map[string]int64
NullPercentage map[string]float64
MinValues map[string]interface{}
MaxValues map[string]interface{}
SampleData [][]string
Aggregates map[string]*AggregateStats // For numeric columns
SamplingConfig SamplingConfig
}
TableStats represents the statistics we want to collect