Documentation
¶
Index ¶
- Constants
- func DetectColumnTypes(r io.Reader, format CSVFormat) (numericCols []int, categoricalCols []int, headers []string, err error)
- type CSVData
- type CSVFormat
- type CSVParser
- type DiagnosticLimits
- type EigencorrelationResult
- type EllipseParams
- type FeatureData
- type JSONFloat64
- type Matrix
- type MetricsCalculator
- type MetricsConfig
- type MetricsData
- type MissingValueInfo
- type MissingValueStrategy
- type ModelComponents
- type ModelConfig
- type ModelMetadata
- type PCAConfig
- type PCAEngine
- type PCAMetadata
- type PCAMetrics
- type PCAOutputData
- type PCAResult
- type PreprocessingInfo
- type PreprocessingParams
- type PreservedColumns
- type ResultsData
- type SampleData
- type SampleMetrics
- type SamplesResults
Constants ¶
const DefaultColumnTypeDetectionSampleSize = 10
DefaultColumnTypeDetectionSampleSize defines the default number of rows to check when detecting column types
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CSVData ¶
type CSVData struct {
Matrix Matrix // The numerical data
Headers []string // Column names (if present)
RowNames []string // Row names (if present)
MissingMask [][]bool // Track NaN locations (true = missing)
Rows int // Number of data rows
Columns int // Number of data columns
}
CSVData represents parsed CSV data with metadata
func ParseCSVMixed ¶
ParseCSVMixed parses a CSV file that may contain both numeric and categorical columns
func ParseCSVMixedWithTargets ¶
func ParseCSVMixedWithTargets(r io.Reader, format CSVFormat, targetColumns []string) (*CSVData, map[string][]string, map[string][]float64, error)
ParseCSVMixedWithTargets parses CSV data with support for numeric target columns Target columns are numeric columns that should be available for visualization but not included in PCA Columns with "#target" suffix (with or without space) are automatically detected as target columns
func (*CSVData) GetMissingValueInfo ¶
func (d *CSVData) GetMissingValueInfo(selectedColumns []int) *MissingValueInfo
GetMissingValueInfo returns information about missing values in selected columns
type CSVFormat ¶
type CSVFormat struct {
FieldDelimiter rune // Field separator: ',', ';', '\t'
DecimalSeparator rune // Decimal separator: '.', ','
HasHeaders bool // First row contains column names
HasRowNames bool // First column contains row names
NullValues []string // Strings to treat as missing values
}
CSVFormat defines the format and parsing options for CSV files
func DefaultCSVFormat ¶
func DefaultCSVFormat() CSVFormat
DefaultCSVFormat returns the default CSV format options
func DetectFormat ¶
DetectFormat attempts to detect the CSV format from a sample of the file
type CSVParser ¶
type CSVParser struct {
// contains filtered or unexported fields
}
CSVParser provides methods for parsing CSV files
func NewCSVParser ¶
NewCSVParser creates a new CSV parser with the given format
type DiagnosticLimits ¶
type DiagnosticLimits struct {
T2Limit95 float64 `json:"t2_limit_95,omitempty"`
T2Limit99 float64 `json:"t2_limit_99,omitempty"`
QLimit95 float64 `json:"q_limit_95,omitempty"`
QLimit99 float64 `json:"q_limit_99,omitempty"`
}
DiagnosticLimits contains statistical limits for diagnostics
type EigencorrelationResult ¶
type EigencorrelationResult struct {
Correlations map[string][]float64 `json:"correlations"` // Variable name -> correlations with each PC
PValues map[string][]float64 `json:"pValues"` // Variable name -> p-values
Variables []string `json:"variables"` // Order of variables
Components []string `json:"components"` // PC labels
Method string `json:"method"` // Correlation method used
}
EigencorrelationResult contains correlations between PC scores and metadata variables
type EllipseParams ¶
type EllipseParams struct {
// Center coordinates of the ellipse (typically the mean of scores)
CenterX float64
CenterY float64
// Semi-major and semi-minor axes lengths
MajorAxis float64
MinorAxis float64
// Rotation angle in radians
Angle float64
// Confidence level (e.g., 0.95 for 95% confidence)
ConfidenceLevel float64
}
EllipseParams defines parameters for confidence ellipse visualization
type FeatureData ¶
type FeatureData struct {
Names []string `json:"names"` // Feature names from input
Loadings Matrix `json:"loadings"` // Loadings (c × k)
Means []float64 `json:"means"` // Original means (k)
StdDevs []float64 `json:"stddevs"` // Original std devs (k)
}
FeatureData contains feature-space results
type JSONFloat64 ¶
type JSONFloat64 float64
JSONFloat64 is a float64 that marshals NaN and Inf values as null in JSON. This ensures compatibility with JavaScript and other JSON consumers that don't support these special float values.
func (JSONFloat64) Float64 ¶
func (f JSONFloat64) Float64() float64
Float64 returns the underlying float64 value.
func (JSONFloat64) IsInf ¶
func (f JSONFloat64) IsInf() bool
IsInf returns true if the value is infinite.
func (JSONFloat64) IsNaN ¶
func (f JSONFloat64) IsNaN() bool
IsNaN returns true if the value is NaN.
func (JSONFloat64) MarshalJSON ¶
func (f JSONFloat64) MarshalJSON() ([]byte, error)
MarshalJSON implements the json.Marshaler interface. NaN and Inf values are marshaled as null to ensure JSON compatibility.
func (*JSONFloat64) UnmarshalJSON ¶
func (f *JSONFloat64) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface. null values are unmarshaled as NaN.
type MetricsCalculator ¶
type MetricsCalculator interface {
// CalculateMetrics computes all diagnostic metrics for the given PCA result
// Parameters:
// - result: The PCA result containing scores, loadings, and eigenvalues
// - data: The original data matrix (observations x variables)
// - config: Configuration options for metrics calculation
// Returns:
// - PCAMetrics containing all calculated metrics
// - error if calculation fails
CalculateMetrics(result *PCAResult, data Matrix, config MetricsConfig) (*PCAMetrics, error)
// DetectOutliers identifies outliers based on Hotelling's T² statistic
// Parameters:
// - metrics: Previously calculated PCA metrics
// - significance: Significance level for outlier detection (e.g., 0.01)
// Returns:
// - Boolean array indicating outliers (true = outlier)
DetectOutliers(metrics *PCAMetrics, significance float64) []bool
// CalculateContributions computes variable contributions to each PC
// Parameters:
// - result: The PCA result containing loadings
// - data: The original data matrix
// Returns:
// - Matrix of contributions (variables x components)
CalculateContributions(result *PCAResult, data Matrix) [][]float64
}
MetricsCalculator defines the interface for PCA metrics computation
type MetricsConfig ¶
type MetricsConfig struct {
// NumComponents specifies how many components to use for metrics calculation
// If 0, uses all available components from the PCA result
NumComponents int
// SignificanceLevel for outlier detection (e.g., 0.01 for 1% significance)
// Default is 0.01 if not specified
SignificanceLevel float64
// CalculateContributions determines whether to compute variable contributions
// Default is true
CalculateContributions bool
// CalculateConfidenceEllipse determines whether to compute ellipse parameters
// Default is true
CalculateConfidenceEllipse bool
}
MetricsConfig contains configuration options for metrics calculation
type MetricsData ¶
type MetricsData struct {
HotellingT2 []float64 `json:"hotelling_t2"`
Mahalanobis []float64 `json:"mahalanobis"`
RSS []float64 `json:"rss"`
IsOutlier []bool `json:"is_outlier"`
}
MetricsData contains diagnostic metrics for samples
type MissingValueInfo ¶
type MissingValueInfo struct {
ColumnIndices []int // Columns that contain missing values
RowsAffected []int // Rows that contain missing values in selected columns
TotalMissing int // Total number of missing values
MissingByColumn map[int]int // Missing count per column
}
MissingValueInfo contains information about missing values in the data
func (*MissingValueInfo) GetSummary ¶
func (m *MissingValueInfo) GetSummary() string
GetSummary returns a human-readable summary of missing values
func (*MissingValueInfo) HasMissing ¶
func (m *MissingValueInfo) HasMissing() bool
HasMissing returns true if there are any missing values
type MissingValueStrategy ¶
type MissingValueStrategy string
MissingValueStrategy defines how to handle missing values
const ( // MissingError returns an error when missing values are found MissingError MissingValueStrategy = "error" // MissingDrop removes rows containing missing values MissingDrop MissingValueStrategy = "drop" // MissingMean replaces missing values with column mean MissingMean MissingValueStrategy = "mean" // MissingMedian replaces missing values with column median MissingMedian MissingValueStrategy = "median" // MissingNative allows NIPALS to handle missing values natively (NIPALS only) MissingNative MissingValueStrategy = "native" )
type ModelComponents ¶
type ModelComponents struct {
Loadings Matrix `json:"loadings"`
ExplainedVariance []float64 `json:"explained_variance"`
ExplainedVarianceRatio []float64 `json:"explained_variance_ratio"`
CumulativeVariance []float64 `json:"cumulative_variance"`
ComponentLabels []string `json:"component_labels"`
FeatureLabels []string `json:"feature_labels"`
}
ModelComponents contains the core PCA model components
type ModelConfig ¶
type ModelConfig struct {
Method string `json:"method"`
NComponents int `json:"n_components"`
MissingStrategy MissingValueStrategy `json:"missing_strategy"`
ExcludedRows []int `json:"excluded_rows,omitempty"`
ExcludedColumns []int `json:"excluded_columns,omitempty"`
// Kernel PCA parameters
KernelType string `json:"kernel_type,omitempty"`
KernelGamma float64 `json:"kernel_gamma,omitempty"`
KernelDegree int `json:"kernel_degree,omitempty"`
KernelCoef0 float64 `json:"kernel_coef0,omitempty"`
}
ModelConfig contains the configuration used for PCA
type ModelMetadata ¶
type ModelMetadata struct {
Version string `json:"version"`
CreatedAt string `json:"created_at"`
Software string `json:"software"`
Config ModelConfig `json:"config"`
}
ModelMetadata contains metadata about the model and analysis
type PCAConfig ¶
type PCAConfig struct {
Components int `json:"components"`
MeanCenter bool `json:"mean_center"`
StandardScale bool `json:"standard_scale"`
RobustScale bool `json:"robust_scale"` // Robust scaling (median/MAD)
ScaleOnly bool `json:"scale_only"` // Variance scaling: divide by std dev without mean centering
SNV bool `json:"snv"` // Standard Normal Variate (row-wise normalization)
VectorNorm bool `json:"vector_norm"` // L2 normalization (row-wise)
Method string `json:"method"` // "svd", "eigen", "nipals", or "kernel"
ExcludedRows []int `json:"excluded_rows,omitempty"` // 0-based indices of rows to exclude
ExcludedColumns []int `json:"excluded_columns,omitempty"` // 0-based indices of columns to exclude
// Missing value handling
MissingStrategy MissingValueStrategy `json:"missing_strategy,omitempty"` // How to handle missing values
// Kernel PCA specific parameters
KernelType string `json:"kernel_type,omitempty"` // "rbf", "linear", "poly"
KernelGamma float64 `json:"kernel_gamma,omitempty"` // RBF/Poly parameter
KernelDegree int `json:"kernel_degree,omitempty"` // Poly parameter
KernelCoef0 float64 `json:"kernel_coef0,omitempty"` // Poly parameter
}
PCAConfig holds configuration for PCA analysis
type PCAEngine ¶
type PCAEngine interface {
Fit(data Matrix, config PCAConfig) (*PCAResult, error)
Transform(data Matrix) (Matrix, error)
FitTransform(data Matrix, config PCAConfig) (*PCAResult, error)
}
PCAEngine defines the interface for PCA computation
type PCAMetadata ¶
type PCAMetadata struct {
NSamples int `json:"n_samples"`
NFeatures int `json:"n_features"`
NComponents int `json:"n_components"`
Method string `json:"method"`
Preprocessing string `json:"preprocessing"`
ExplainedVariance []float64 `json:"explained_variance"`
CumulativeVariance []float64 `json:"cumulative_variance"`
}
PCAMetadata contains analysis metadata
type PCAMetrics ¶
type PCAMetrics struct {
// MahalanobisDistances contains the Mahalanobis distance for each observation
// in the transformed PC space, measuring multivariate distance from the mean
MahalanobisDistances []float64
// HotellingT2 contains Hotelling's T-squared statistic for each observation,
// which is used for multivariate outlier detection
HotellingT2 []float64
// RSS (Residual Sum of Squares) contains the reconstruction error
// for each observation when using the specified number of components
RSS []float64
// QResiduals contains the Q-statistic (SPE - Squared Prediction Error)
// for each observation, measuring the lack of fit
QResiduals []float64
// OutlierMask indicates which observations are considered outliers
// based on the specified significance level
OutlierMask []bool
// ContributionScores contains the contribution of each variable
// to each principal component (variables x components)
ContributionScores [][]float64
// ConfidenceEllipse contains parameters for drawing confidence ellipses
// in score plots for outlier visualization
ConfidenceEllipse EllipseParams
}
PCAMetrics contains comprehensive diagnostic metrics for PCA model evaluation
type PCAOutputData ¶
type PCAOutputData struct {
Metadata ModelMetadata `json:"metadata"`
Preprocessing PreprocessingInfo `json:"preprocessing"`
Model ModelComponents `json:"model"`
Results ResultsData `json:"results"`
Diagnostics DiagnosticLimits `json:"diagnostics,omitempty"`
Eigencorrelations *EigencorrelationResult `json:"eigencorrelations,omitempty"`
PreservedColumns *PreservedColumns `json:"preservedColumns,omitempty"`
}
PCAOutputData represents complete PCA results for output
type PCAResult ¶
type PCAResult struct {
Scores Matrix `json:"scores"`
Loadings Matrix `json:"loadings"`
ExplainedVar []float64 `json:"explained_variance"`
ExplainedVarRatio []float64 `json:"explained_variance_ratio"` // Percentage of variance explained
CumulativeVar []float64 `json:"cumulative_variance"`
ComponentLabels []string `json:"component_labels"`
VariableLabels []string `json:"variable_labels,omitempty"` // Original variable names
ComponentsComputed int `json:"components_computed"` // Number of components actually computed
Method string `json:"method"` // Method used (svd, nipals, kernel)
PreprocessingApplied bool `json:"preprocessing_applied"` // Whether preprocessing was applied
// Preprocessing statistics
Means []float64 `json:"means,omitempty"` // Original feature means
StdDevs []float64 `json:"stddevs,omitempty"` // Original feature std devs
// Diagnostic metrics
Metrics []SampleMetrics `json:"metrics,omitempty"` // Per-sample diagnostic metrics
// Confidence limits for diagnostics
T2Limit95 float64 `json:"t2_limit_95,omitempty"` // 95% confidence limit for T²
T2Limit99 float64 `json:"t2_limit_99,omitempty"` // 99% confidence limit for T²
QLimit95 float64 `json:"q_limit_95,omitempty"` // 95% confidence limit for Q-residuals
QLimit99 float64 `json:"q_limit_99,omitempty"` // 99% confidence limit for Q-residuals
// Eigencorrelations with metadata
Eigencorrelations *EigencorrelationResult `json:"eigencorrelations,omitempty"`
// All eigenvalues (including non-retained) for diagnostic calculations
AllEigenvalues []float64 `json:"all_eigenvalues,omitempty"`
}
PCAResult contains the results of PCA analysis
type PreprocessingInfo ¶
type PreprocessingInfo struct {
MeanCenter bool `json:"mean_center"`
StandardScale bool `json:"standard_scale"`
RobustScale bool `json:"robust_scale"`
ScaleOnly bool `json:"scale_only"`
SNV bool `json:"snv"`
VectorNorm bool `json:"vector_norm"`
Parameters PreprocessingParams `json:"parameters"`
}
PreprocessingInfo contains all preprocessing configuration and parameters
type PreprocessingParams ¶
type PreprocessingParams struct {
FeatureMeans []float64 `json:"feature_means,omitempty"`
FeatureStdDevs []float64 `json:"feature_stddevs,omitempty"`
FeatureMedians []float64 `json:"feature_medians,omitempty"`
FeatureMADs []float64 `json:"feature_mads,omitempty"`
RowMeans []float64 `json:"row_means,omitempty"`
RowStdDevs []float64 `json:"row_stddevs,omitempty"`
}
PreprocessingParams contains the fitted preprocessing parameters
type PreservedColumns ¶
type PreservedColumns struct {
Categorical map[string][]string `json:"categorical,omitempty"`
NumericTarget map[string][]float64 `json:"numericTarget,omitempty"`
}
PreservedColumns contains columns that were excluded from PCA but preserved in output
type ResultsData ¶
type ResultsData struct {
Samples SamplesResults `json:"samples"`
}
ResultsData contains the results of the PCA analysis
type SampleData ¶
type SampleData struct {
Names []string `json:"names"` // Sample names from input
Scores Matrix `json:"scores"` // PC scores (n × c)
Metrics []SampleMetrics `json:"metrics"` // Advanced metrics per sample
}
SampleData contains sample-space results
type SampleMetrics ¶
type SampleMetrics struct {
HotellingT2 float64 `json:"hotelling_t2"`
Mahalanobis float64 `json:"mahalanobis"`
RSS float64 `json:"rss"`
IsOutlier bool `json:"is_outlier"`
}
SampleMetrics contains advanced metrics for a sample
type SamplesResults ¶
type SamplesResults struct {
Names []string `json:"names"`
Scores Matrix `json:"scores"`
Metrics *MetricsData `json:"metrics,omitempty"`
}
SamplesResults contains sample-specific results