Documentation
¶
Index ¶
- Constants
- func ApplyTransform(data types.Matrix, columns []int, transform TransformType) (types.Matrix, error)
- func CalculateConfidenceEllipse(x, y []float64, confidenceLevel float64) (centerX, centerY, majorAxis, minorAxis, angle float64, err error)
- func CalculateGroupEllipses(scores mat.Matrix, groups []string, pcX, pcY int, confidenceLevel float64) (map[string]EllipseParams, error)
- func CalculateMaxComponents(rows, cols int) int
- func CalculateMetricsFromPCAResult(result *types.PCAResult, preprocessedData types.Matrix) ([]types.SampleMetrics, error)
- func CheckForConstantColumns(data types.Matrix) ([]int, error)
- func ComputeAutoCorrelation(data [][]float64, maxLag int) ([][]float64, error)
- func CopyMatrixData(source *mat.Dense) []float64
- func CreateFloat2DSlice(rows, cols int) [][]float64
- func CreateFloatSlice(size int) []float64
- func CreateWorkingCopy(original *mat.Dense) *mat.Dense
- func EstimateTemporalPCAMemory(samples, variables, lags int) (bytes int64, warning string)
- func ExtractColumn(m *mat.Dense, col int) []float64
- func ExtractRow(m *mat.Dense, row int) []float64
- func GetColumnRanks(data types.Matrix) ([]int, error)
- func GetVarianceByColumn(data types.Matrix) ([]float64, error)
- func InitializeMatrix(rows, cols int) *mat.Dense
- func InitializeScoresAndLoadings(nSamples, nFeatures, nComponents int) (*mat.Dense, *mat.Dense)
- func InitializeSquareMatrix(size int) *mat.Dense
- func InitializeVector(size int) *mat.VecDense
- func NewKernelPCAEngine() types.PCAEngine
- func NewPCAEngine() types.PCAEngine
- func NewPCAEngineForMethod(method string) types.PCAEngine
- func NewTemporalPCAEngine() types.PCAEngine
- func RemoveOutliers(data types.Matrix, threshold float64) (types.Matrix, []int, error)
- func ValidateComponentCount(components, maxComponents int) error
- func ValidateDataForPCA(data types.Matrix, selectedCols []int) error
- func ValidateDataMatrix(data types.Matrix) error
- func ValidateKernelConfig(config types.PCAConfig) error
- func ValidateNaNValues(data types.Matrix, allowNaN bool) error
- func ValidatePCAInput(data types.Matrix, config types.PCAConfig) error
- func ValidateVectorPair(x, y []float64) error
- type CorrelationRequest
- type CorrelationResult
- type EllipseParams
- type KernelPCAImpl
- type KernelType
- type MissingValueHandler
- type PCAImpl
- func (p *PCAImpl) Fit(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (p *PCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (p *PCAImpl) SetLoadings(loadings types.Matrix, nComponents int) error
- func (p *PCAImpl) SetPreprocessor(preprocessor *Preprocessor)
- func (p *PCAImpl) Transform(data types.Matrix) (types.Matrix, error)
- type PCAMetricsCalculator
- func (m *PCAMetricsCalculator) CalculateMetrics(originalData types.Matrix) ([]types.SampleMetrics, error)
- func (m *PCAMetricsCalculator) CalculateQLimits(eigenvalues []float64, totalComponents int) (limit95, limit99 float64)
- func (m *PCAMetricsCalculator) CalculateT2Limits() (limit95, limit99 float64)
- type Preprocessor
- func NewPreprocessor(meanCenter, standardScale, robustScale bool) *Preprocessor
- func NewPreprocessorFull(meanCenter, standardScale, robustScale, snv, vectorNorm bool) *Preprocessor
- func NewPreprocessorWithScaleOnly(meanCenter, standardScale, robustScale, scaleOnly, snv, vectorNorm bool) *Preprocessor
- func (p *Preprocessor) Fit(data types.Matrix) error
- func (p *Preprocessor) FitTransform(data types.Matrix) (types.Matrix, error)
- func (p *Preprocessor) GetMADs() []float64
- func (p *Preprocessor) GetMeans() []float64
- func (p *Preprocessor) GetMedians() []float64
- func (p *Preprocessor) GetRowMeans() []float64
- func (p *Preprocessor) GetRowStdDevs() []float64
- func (p *Preprocessor) GetStdDevs() []float64
- func (p *Preprocessor) InverseTransform(data types.Matrix) (types.Matrix, error)
- func (p *Preprocessor) IsSNVEnabled() bool
- func (p *Preprocessor) SetFittedParameters(means, stdDevs, medians, mads, rowMeans, rowStdDevs []float64) error
- func (p *Preprocessor) Transform(data types.Matrix) (types.Matrix, error)
- type TemporalPCAImpl
- func (t *TemporalPCAImpl) ComputeVariableImportance() ([][]float64, error)
- func (t *TemporalPCAImpl) Fit(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (t *TemporalPCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (t *TemporalPCAImpl) GetLagContributions() ([][]float64, error)
- func (t *TemporalPCAImpl) GetLoadingForLag(variable, lag, component int) (float64, error)
- func (t *TemporalPCAImpl) ReconstructionError(data types.Matrix) ([]float64, error)
- func (t *TemporalPCAImpl) Transform(data types.Matrix) (types.Matrix, error)
- type TransformType
Constants ¶
const (
// Minimum variance/norm threshold to avoid division by zero
MinVarianceThreshold = 1e-8
)
Variables ¶
This section is empty.
Functions ¶
func ApplyTransform ¶
func ApplyTransform(data types.Matrix, columns []int, transform TransformType) (types.Matrix, error)
ApplyTransform applies a mathematical transformation to specified columns
func CalculateConfidenceEllipse ¶
func CalculateConfidenceEllipse(x, y []float64, confidenceLevel float64) (centerX, centerY, majorAxis, minorAxis, angle float64, err error)
CalculateConfidenceEllipse computes the parameters for a confidence ellipse for a 2D set of points at the specified confidence level. Returns center coordinates, semi-major/minor axes, and rotation angle.
Reference: Johnson & Wichern (2007) Applied Multivariate Statistical Analysis
func CalculateGroupEllipses ¶
func CalculateGroupEllipses(scores mat.Matrix, groups []string, pcX, pcY int, confidenceLevel float64) (map[string]EllipseParams, error)
CalculateGroupEllipses computes confidence ellipse parameters for each group in the data. scores is a 2D matrix where each row is an observation, columns are PC scores. groups is a slice indicating the group membership of each observation. pcX and pcY are the indices of the principal components to use (0-based).
func CalculateMaxComponents ¶ added in v0.9.10
CalculateMaxComponents calculates the maximum number of components for a data matrix
func CalculateMetricsFromPCAResult ¶
func CalculateMetricsFromPCAResult(result *types.PCAResult, preprocessedData types.Matrix) ([]types.SampleMetrics, error)
CalculateMetricsFromPCAResult is a convenience function that calculates metrics directly from PCAResult
func CheckForConstantColumns ¶ added in v0.9.10
CheckForConstantColumns checks if any columns have zero or near-zero variance
func ComputeAutoCorrelation ¶ added in v1.1.0
ComputeAutoCorrelation computes the autocorrelation function for lag selection guidance Returns autocorrelation values for each variable up to maxLag
func CopyMatrixData ¶ added in v0.9.10
CopyMatrixData creates a flat array copy of matrix data for gonum operations
func CreateFloat2DSlice ¶ added in v0.9.10
CreateFloat2DSlice creates a new 2D float64 slice with specified dimensions
func CreateFloatSlice ¶ added in v0.9.10
CreateFloatSlice creates a new float64 slice of specified size
func CreateWorkingCopy ¶ added in v0.9.10
CreateWorkingCopy creates a working copy of a matrix for deflation operations
func EstimateTemporalPCAMemory ¶ added in v1.1.0
EstimateTemporalPCAMemory estimates memory usage for temporal PCA Returns estimated bytes and a warning message if memory usage is high
func ExtractColumn ¶ added in v0.9.10
ExtractColumn extracts a column from a matrix as a slice
func ExtractRow ¶ added in v0.9.10
ExtractRow extracts a row from a matrix as a slice
func GetColumnRanks ¶
GetColumnRanks returns column indices sorted by variance (descending)
func GetVarianceByColumn ¶
GetVarianceByColumn calculates variance for each column
func InitializeMatrix ¶ added in v0.9.10
InitializeMatrix creates a new matrix with specified dimensions
func InitializeScoresAndLoadings ¶ added in v0.9.10
InitializeScoresAndLoadings creates new score and loading matrices for PCA algorithms
func InitializeSquareMatrix ¶ added in v0.9.10
InitializeSquareMatrix creates a new square matrix
func InitializeVector ¶ added in v0.9.10
InitializeVector creates a new vector of specified size
func NewKernelPCAEngine ¶
NewKernelPCAEngine creates a new Kernel PCA engine
func NewPCAEngine ¶
NewPCAEngine creates a new PCA engine instance
func NewPCAEngineForMethod ¶
NewPCAEngineForMethod creates a PCA engine for the specified method
func NewTemporalPCAEngine ¶ added in v1.1.0
NewTemporalPCAEngine creates a new Temporal PCA engine instance
func RemoveOutliers ¶
RemoveOutliers removes outliers based on z-score
func ValidateComponentCount ¶ added in v0.9.10
ValidateComponentCount validates the number of components requested
func ValidateDataForPCA ¶
ValidateDataForPCA checks if data is suitable for PCA after handling missing values
func ValidateDataMatrix ¶ added in v0.9.10
ValidateDataMatrix validates the basic structure and content of a data matrix
func ValidateKernelConfig ¶ added in v0.9.10
ValidateKernelConfig validates kernel-specific configuration
func ValidateNaNValues ¶ added in v0.9.10
ValidateNaNValues checks for NaN values in the data matrix
func ValidatePCAInput ¶ added in v0.9.10
ValidatePCAInput performs complete validation for PCA input
func ValidateVectorPair ¶ added in v0.9.10
ValidateVectorPair validates that two vectors have the same length
Types ¶
type CorrelationRequest ¶
type CorrelationRequest struct {
Scores mat.Matrix // PC scores matrix (samples × components)
MetadataNumeric map[string][]float64 // Numeric metadata columns
MetadataCategorical map[string][]string // Categorical metadata columns
Components []int // Which PCs to include (0-based)
Method string // "pearson" or "spearman"
}
CorrelationRequest defines the input for correlation calculations
type CorrelationResult ¶
type CorrelationResult struct {
Correlations map[string][]float64 // Variable name -> correlations with each PC
PValues map[string][]float64 // Variable name -> p-values
Variables []string // Order of variables
Components []string // PC labels
}
CorrelationResult contains the correlation analysis results
func CalculateEigencorrelations ¶
func CalculateEigencorrelations(request CorrelationRequest) (*CorrelationResult, error)
CalculateEigencorrelations computes correlations between PC scores and metadata variables
This function calculates Pearson or Spearman correlations between principal component scores and external metadata variables (both numeric and categorical). For categorical variables, one-hot encoding is performed before correlation calculation.
Reference: Jolliffe, I.T. (2002). Principal Component Analysis, 2nd edition. Springer.
type EllipseParams ¶
type EllipseParams struct {
CenterX float64
CenterY float64
MajorAxis float64
MinorAxis float64
Angle float64 // in radians
ConfidenceLevel float64
}
EllipseParams contains parameters for drawing a confidence ellipse
type KernelPCAImpl ¶
type KernelPCAImpl struct {
// contains filtered or unexported fields
}
KernelPCAImpl implements the PCAEngine interface for Kernel PCA Kernel PCA performs nonlinear dimensionality reduction by projecting data into a higher-dimensional feature space using kernel functions, then performing PCA in that space. Reference: Schölkopf, B., Smola, A., & Müller, K.R. (1998). Nonlinear component analysis as a kernel eigenvalue problem. Neural Computation, 10(5), 1299-1319.
func (*KernelPCAImpl) FitTransform ¶
func (kpca *KernelPCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
FitTransform fits the model and transforms the data in one step
type KernelType ¶
type KernelType string
KernelType represents the type of kernel function to use
const ( // KernelRBF is the Radial Basis Function (Gaussian) kernel KernelRBF KernelType = "rbf" // KernelLinear is the linear kernel (equivalent to standard PCA) KernelLinear KernelType = "linear" // KernelPoly is the polynomial kernel KernelPoly KernelType = "poly" )
type MissingValueHandler ¶
type MissingValueHandler struct {
// contains filtered or unexported fields
}
MissingValueHandler handles missing values in data matrices
func NewMissingValueHandler ¶
func NewMissingValueHandler(strategy types.MissingValueStrategy) *MissingValueHandler
NewMissingValueHandler creates a new missing value handler
func (*MissingValueHandler) HandleMissingValues ¶
func (h *MissingValueHandler) HandleMissingValues(data types.Matrix, missingInfo *types.MissingValueInfo, selectedCols []int) (types.Matrix, error)
HandleMissingValues processes missing values according to the specified strategy It only considers missing values in the selected columns
type PCAImpl ¶
type PCAImpl struct {
// contains filtered or unexported fields
}
PCAImpl implements the PCAEngine interface
func (*PCAImpl) FitTransform ¶
FitTransform fits the model and transforms the data in one step
func (*PCAImpl) SetLoadings ¶
SetLoadings sets the loadings matrix and marks the engine as fitted
func (*PCAImpl) SetPreprocessor ¶
func (p *PCAImpl) SetPreprocessor(preprocessor *Preprocessor)
SetPreprocessor sets the preprocessor for the PCA engine
type PCAMetricsCalculator ¶
type PCAMetricsCalculator struct {
// contains filtered or unexported fields
}
PCAMetricsCalculator calculates advanced metrics for PCA results
func NewPCAMetricsCalculator ¶
func NewPCAMetricsCalculator(scores, loadings *mat.Dense, mean, stdDev []float64) *PCAMetricsCalculator
NewPCAMetricsCalculator creates a new metrics calculator
func (*PCAMetricsCalculator) CalculateMetrics ¶
func (m *PCAMetricsCalculator) CalculateMetrics(originalData types.Matrix) ([]types.SampleMetrics, error)
CalculateMetrics computes all metrics for each sample
func (*PCAMetricsCalculator) CalculateQLimits ¶
func (m *PCAMetricsCalculator) CalculateQLimits(eigenvalues []float64, totalComponents int) (limit95, limit99 float64)
CalculateQLimits calculates the confidence limits for Q-residuals (SPE - Squared Prediction Error) Reference: Jackson, J.E., & Mudholkar, G.S. (1979). Control procedures for residuals associated with principal component analysis. Technometrics, 21(3), 341-349.
func (*PCAMetricsCalculator) CalculateT2Limits ¶
func (m *PCAMetricsCalculator) CalculateT2Limits() (limit95, limit99 float64)
CalculateT2Limits calculates the confidence limits for Hotelling's T² statistic
type Preprocessor ¶
type Preprocessor struct {
// Preprocessing parameters
MeanCenter bool
StandardScale bool
RobustScale bool
ScaleOnly bool
SNV bool
VectorNorm bool
// contains filtered or unexported fields
}
Preprocessor handles data preprocessing for PCA
func NewPreprocessor ¶
func NewPreprocessor(meanCenter, standardScale, robustScale bool) *Preprocessor
NewPreprocessor creates a new preprocessor instance
func NewPreprocessorFull ¶
func NewPreprocessorFull(meanCenter, standardScale, robustScale, snv, vectorNorm bool) *Preprocessor
NewPreprocessorFull creates a new preprocessor instance with all options
func NewPreprocessorWithScaleOnly ¶
func NewPreprocessorWithScaleOnly(meanCenter, standardScale, robustScale, scaleOnly, snv, vectorNorm bool) *Preprocessor
NewPreprocessorWithScaleOnly creates a new preprocessor instance with scale-only option
func (*Preprocessor) Fit ¶
func (p *Preprocessor) Fit(data types.Matrix) error
Fit calculates preprocessing parameters from the data
func (*Preprocessor) FitTransform ¶
FitTransform fits the preprocessor and transforms the data
func (*Preprocessor) GetMADs ¶
func (p *Preprocessor) GetMADs() []float64
GetMADs returns the fitted MAD (Median Absolute Deviation) values
func (*Preprocessor) GetMeans ¶
func (p *Preprocessor) GetMeans() []float64
GetMeans returns the fitted mean values
func (*Preprocessor) GetMedians ¶
func (p *Preprocessor) GetMedians() []float64
GetMedians returns the fitted median values
func (*Preprocessor) GetRowMeans ¶
func (p *Preprocessor) GetRowMeans() []float64
GetRowMeans returns the fitted row mean values (for SNV)
func (*Preprocessor) GetRowStdDevs ¶
func (p *Preprocessor) GetRowStdDevs() []float64
GetRowStdDevs returns the fitted row standard deviation values (for SNV)
func (*Preprocessor) GetStdDevs ¶
func (p *Preprocessor) GetStdDevs() []float64
GetStdDevs returns the fitted standard deviation values (original, before scaling)
func (*Preprocessor) InverseTransform ¶
InverseTransform reverses the preprocessing Note: When SNV is combined with column-wise preprocessing, the inverse transform only reverses the column-wise operations. Full reversal of SNV after column preprocessing would require storing the full transformed matrix.
func (*Preprocessor) IsSNVEnabled ¶
func (p *Preprocessor) IsSNVEnabled() bool
IsSNVEnabled returns whether SNV preprocessing is enabled
func (*Preprocessor) SetFittedParameters ¶
func (p *Preprocessor) SetFittedParameters(means, stdDevs, medians, mads, rowMeans, rowStdDevs []float64) error
SetFittedParameters sets the fitted parameters for the preprocessor
type TemporalPCAImpl ¶ added in v1.1.0
type TemporalPCAImpl struct {
// contains filtered or unexported fields
}
TemporalPCAImpl implements the PCAEngine interface for Temporal PCA (SSA-style) Based on Singular Spectrum Analysis (SSA) methodology.
References: - Broomhead & King (1986): "Extracting qualitative dynamics from experimental data" - Vautard & Ghil (1989): "Singular spectrum analysis in nonlinear dynamics" - Golyandina et al. (2001): "Analysis of Time Series Structure: SSA and related techniques" - Ghil et al. (2002): "Advanced spectral methods for climatic time series"
func (*TemporalPCAImpl) ComputeVariableImportance ¶ added in v1.1.0
func (t *TemporalPCAImpl) ComputeVariableImportance() ([][]float64, error)
ComputeVariableImportance aggregates loadings across lags to show variable importance Returns a matrix where rows are components and columns are original variables Uses RMS (Root Mean Square) aggregation to capture overall contribution strength
func (*TemporalPCAImpl) Fit ¶ added in v1.1.0
Fit trains the Temporal PCA model on the provided data Algorithm complexity: O((T-L+1) × (p×L)²) for SVD computation where T is samples, p is variables, L is lags
func (*TemporalPCAImpl) FitTransform ¶ added in v1.1.0
func (t *TemporalPCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
FitTransform fits the model and transforms the data in one step
func (*TemporalPCAImpl) GetLagContributions ¶ added in v1.1.0
func (t *TemporalPCAImpl) GetLagContributions() ([][]float64, error)
GetLagContributions returns the contribution of each lag to the total variance Returns a matrix where rows are components and columns are lags
func (*TemporalPCAImpl) GetLoadingForLag ¶ added in v1.1.0
func (t *TemporalPCAImpl) GetLoadingForLag(variable, lag, component int) (float64, error)
GetLoadingForLag returns the loading vector for a specific variable and lag variable: 0-indexed variable number lag: 0-indexed lag number (0 = current time, 1 = t-1, etc.) component: 0-indexed component number
func (*TemporalPCAImpl) ReconstructionError ¶ added in v1.1.0
func (t *TemporalPCAImpl) ReconstructionError(data types.Matrix) ([]float64, error)
ReconstructionError computes the reconstruction error for each sample This is computed in the standardized lag space as per SSA methodology Algorithm complexity: O((T-L+1) × (p×L) × k) where k is the number of components
type TransformType ¶
type TransformType string
VariableTransform applies mathematical transformations to variables
const ( TransformLog TransformType = "log" TransformSqrt TransformType = "sqrt" TransformSquare TransformType = "square" TransformReciprocal TransformType = "reciprocal" )