csv

package
v0.9.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 13, 2025 License: MIT Imports: 8 Imported by: 0

Documentation

Overview

Package csv provides unified CSV parsing, writing, and validation functionality for the GoPCA monorepo. It consolidates previously scattered CSV operations into a single, well-tested package following the DRY principle.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AnalyzeMissingValues

func AnalyzeMissingValues(data *Data) map[string]interface{}

AnalyzeMissingValues analyzes missing value patterns

func Save

func Save(w io.Writer, data *Data, opts Options) error

Save is a convenience function for writing CSV to an io.Writer

func SaveFile

func SaveFile(filename string, data *Data, opts Options) error

SaveFile is a convenience function for simple CSV writing

func SaveMatrix

func SaveMatrix(filename string, matrix types.Matrix, headers []string, rowNames []string, opts Options) error

SaveMatrix is a convenience function for writing a matrix to CSV

func ToNumericMatrix

func ToNumericMatrix(stringData [][]string, nullValues []string) (types.Matrix, [][]bool, error)

ToNumericMatrix converts string data to numeric matrix with missing value tracking

func ToStringMatrix

func ToStringMatrix(matrix types.Matrix, precision int) [][]string

ToStringMatrix converts numeric matrix to string representation

func ValidateStructure

func ValidateStructure(data *Data) error

ValidateStructure performs basic structural validation

Types

type CSVWritable

type CSVWritable interface {
	DataProvider
	WriteHeaders(w io.Writer, opts Options) error
	WriteRow(w io.Writer, index int, opts Options) error
}

CSVWritable is an interface for data that can be written to CSV

type ColumnStatistics

type ColumnStatistics struct {
	Name            string
	Index           int
	DataType        string // "numeric", "categorical", "mixed"
	NonMissing      int
	Missing         int
	MissingPercent  float64
	Mean            float64 // For numeric columns
	StdDev          float64 // For numeric columns
	Min             float64 // For numeric columns
	Max             float64 // For numeric columns
	UniqueValues    int     // For categorical columns
	HasZeroVariance bool    // Warning flag
}

ColumnStatistics contains statistics for a single column

type Data

type Data struct {
	// Core numeric data (always present for PCA)
	Matrix      types.Matrix // Numeric data matrix
	Headers     []string     // Column names
	RowNames    []string     // Row names
	MissingMask [][]bool     // Track missing values (true = missing)
	Rows        int          // Number of data rows
	Columns     int          // Number of data columns

	// Additional data types (optional)
	StringData           [][]string           // Raw string data (for GoCSV)
	CategoricalColumns   map[string][]string  // Categorical columns by name
	NumericTargetColumns map[string][]float64 // Numeric target columns
}

Data represents parsed CSV data with support for different data types

func Parse

func Parse(r io.Reader, opts Options) (*Data, error)

Parse is a convenience function for parsing CSV from a reader

func ParseFile

func ParseFile(filename string, opts Options) (*Data, error)

ParseFile is a convenience function for simple CSV parsing

type DataProvider

type DataProvider interface {
	GetHeaders() []string
	GetRowNames() []string
	GetDimensions() (rows, cols int)
	HasNumericData() bool
	HasStringData() bool
}

DataProvider is an interface that different data representations can implement to provide consistent access to CSV data regardless of internal structure

type Options

type Options struct {
	// Parsing options
	Delimiter        rune      // Field delimiter: ',', ';', '\t'
	DecimalSeparator rune      // Decimal separator: '.', ','
	HasHeaders       bool      // First row contains column names
	HasRowNames      bool      // First column contains row names
	NullValues       []string  // Strings to treat as missing values
	ParseMode        ParseMode // How to parse the data
	TargetSuffix     string    // Suffix to identify target columns (e.g., "#target")

	// Reading options (for large files)
	SkipRows      int   // Number of rows to skip at start
	MaxRows       int   // Maximum rows to read (0 for all)
	Columns       []int // Specific columns to read (empty for all)
	StreamingMode bool  // Enable streaming for large files

	// Writing options
	FloatFormat byte // Format for float output: 'g', 'f', 'e'
	Precision   int  // Decimal precision for float output (-1 for auto)
}

Options provides unified configuration for CSV operations

func DefaultOptions

func DefaultOptions() Options

DefaultOptions returns sensible default options for CSV operations

func EuropeanOptions

func EuropeanOptions() Options

EuropeanOptions returns options for European CSV format (semicolon delimiter, comma decimal)

func TabDelimitedOptions

func TabDelimitedOptions() Options

TabDelimitedOptions returns options for tab-delimited files

type ParseMode

type ParseMode int

ParseMode defines how CSV data should be parsed

const (
	// ParseNumeric treats all data as numeric values
	ParseNumeric ParseMode = iota
	// ParseString treats all data as strings
	ParseString
	// ParseMixed automatically detects column types
	ParseMixed
	// ParseMixedWithTargets detects columns and identifies target columns
	ParseMixedWithTargets
)

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader provides unified CSV reading functionality

func NewReader

func NewReader(opts Options) *Reader

NewReader creates a new CSV reader with the given options

func (*Reader) Read

func (r *Reader) Read(input io.Reader) (*Data, error)

Read parses CSV data from an io.Reader

func (*Reader) ReadFile

func (r *Reader) ReadFile(filename string) (*Data, error)

ReadFile reads and parses a CSV file

type ValidationResult

type ValidationResult struct {
	Valid       bool
	Errors      []string
	Warnings    []string
	ColumnStats []ColumnStatistics
}

ValidationResult contains the results of CSV validation

func ValidateFile

func ValidateFile(filename string, opts Options) (*ValidationResult, error)

ValidateFile validates a CSV file

type Validator

type Validator struct {
	// contains filtered or unexported fields
}

Validator provides CSV validation functionality

func NewValidator

func NewValidator(opts Options) *Validator

NewValidator creates a new CSV validator with the given options

func (*Validator) Validate

func (v *Validator) Validate(data *Data) *ValidationResult

Validate performs comprehensive validation on CSV data

type Writer

type Writer struct {
	// contains filtered or unexported fields
}

Writer provides unified CSV writing functionality

func NewWriter

func NewWriter(opts Options) *Writer

NewWriter creates a new CSV writer with the given options

func (*Writer) Write

func (w *Writer) Write(output io.Writer, data *Data) error

Write writes CSV data to an io.Writer

func (*Writer) WriteFile

func (w *Writer) WriteFile(filename string, data *Data) error

WriteFile writes CSV data to a file

func (*Writer) WriteMatrix

func (w *Writer) WriteMatrix(output io.Writer, matrix types.Matrix, headers []string, rowNames []string) error

WriteMatrix writes a numeric matrix to CSV

func (*Writer) WriteMatrixFile

func (w *Writer) WriteMatrixFile(filename string, matrix types.Matrix, headers []string, rowNames []string) error

WriteMatrixFile writes a numeric matrix to a CSV file

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL