Documentation
¶
Index ¶
- Constants
- func ApplyTransform(data types.Matrix, columns []int, transform TransformType) (types.Matrix, error)
- func CalculateConfidenceEllipse(x, y []float64, confidenceLevel float64) (centerX, centerY, majorAxis, minorAxis, angle float64, err error)
- func CalculateGroupEllipses(scores mat.Matrix, groups []string, pcX, pcY int, confidenceLevel float64) (map[string]EllipseParams, error)
- func CalculateMetricsFromPCAResult(result *types.PCAResult, preprocessedData types.Matrix) ([]types.SampleMetrics, error)
- func GetColumnRanks(data types.Matrix) ([]int, error)
- func GetVarianceByColumn(data types.Matrix) ([]float64, error)
- func ImputeMissing(data types.Matrix, strategy MissingValueStrategy) (types.Matrix, error)
- func NewKernelPCAEngine() types.PCAEngine
- func NewPCAEngine() types.PCAEngine
- func NewPCAEngineForMethod(method string) types.PCAEngine
- func RemoveOutliers(data types.Matrix, threshold float64) (types.Matrix, []int, error)
- func SelectRowsColumns(data types.Matrix, rows, cols []int) (types.Matrix, error)
- func ValidateDataForPCA(data types.Matrix, selectedCols []int) error
- type CorrelationRequest
- type CorrelationResult
- type EllipseParams
- type KernelPCAImpl
- type KernelType
- type MissingValueHandler
- type MissingValueStrategy
- type PCAImpl
- func (p *PCAImpl) Fit(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (p *PCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
- func (p *PCAImpl) SetLoadings(loadings types.Matrix, nComponents int) error
- func (p *PCAImpl) SetPreprocessor(preprocessor *Preprocessor)
- func (p *PCAImpl) Transform(data types.Matrix) (types.Matrix, error)
- type PCAMetricsCalculator
- func (m *PCAMetricsCalculator) CalculateMetrics(originalData types.Matrix) ([]types.SampleMetrics, error)
- func (m *PCAMetricsCalculator) CalculateQLimits(eigenvalues []float64, totalComponents int) (limit95, limit99 float64)
- func (m *PCAMetricsCalculator) CalculateT2Limits() (limit95, limit99 float64)
- type Preprocessor
- func NewPreprocessor(meanCenter, standardScale, robustScale bool) *Preprocessor
- func NewPreprocessorFull(meanCenter, standardScale, robustScale, snv, vectorNorm bool) *Preprocessor
- func NewPreprocessorWithScaleOnly(meanCenter, standardScale, robustScale, scaleOnly, snv, vectorNorm bool) *Preprocessor
- func (p *Preprocessor) Fit(data types.Matrix) error
- func (p *Preprocessor) FitTransform(data types.Matrix) (types.Matrix, error)
- func (p *Preprocessor) GetMADs() []float64
- func (p *Preprocessor) GetMeans() []float64
- func (p *Preprocessor) GetMedians() []float64
- func (p *Preprocessor) GetRowMeans() []float64
- func (p *Preprocessor) GetRowStdDevs() []float64
- func (p *Preprocessor) GetStdDevs() []float64
- func (p *Preprocessor) InverseTransform(data types.Matrix) (types.Matrix, error)
- func (p *Preprocessor) IsSNVEnabled() bool
- func (p *Preprocessor) SetFittedParameters(means, stdDevs, medians, mads, rowMeans, rowStdDevs []float64) error
- func (p *Preprocessor) Transform(data types.Matrix) (types.Matrix, error)
- type TransformType
Constants ¶
const (
// Minimum variance/norm threshold to avoid division by zero
MinVarianceThreshold = 1e-8
)
Variables ¶
This section is empty.
Functions ¶
func ApplyTransform ¶
func ApplyTransform(data types.Matrix, columns []int, transform TransformType) (types.Matrix, error)
ApplyTransform applies a mathematical transformation to specified columns
func CalculateConfidenceEllipse ¶
func CalculateConfidenceEllipse(x, y []float64, confidenceLevel float64) (centerX, centerY, majorAxis, minorAxis, angle float64, err error)
CalculateConfidenceEllipse computes the parameters for a confidence ellipse for a 2D set of points at the specified confidence level. Returns center coordinates, semi-major/minor axes, and rotation angle.
Reference: Johnson & Wichern (2007) Applied Multivariate Statistical Analysis
func CalculateGroupEllipses ¶
func CalculateGroupEllipses(scores mat.Matrix, groups []string, pcX, pcY int, confidenceLevel float64) (map[string]EllipseParams, error)
CalculateGroupEllipses computes confidence ellipse parameters for each group in the data. scores is a 2D matrix where each row is an observation, columns are PC scores. groups is a slice indicating the group membership of each observation. pcX and pcY are the indices of the principal components to use (0-based).
func CalculateMetricsFromPCAResult ¶
func CalculateMetricsFromPCAResult(result *types.PCAResult, preprocessedData types.Matrix) ([]types.SampleMetrics, error)
CalculateMetricsFromPCAResult is a convenience function that calculates metrics directly from PCAResult
func GetColumnRanks ¶
GetColumnRanks returns column indices sorted by variance (descending)
func GetVarianceByColumn ¶
GetVarianceByColumn calculates variance for each column
func ImputeMissing ¶
ImputeMissing handles missing values in the data
func NewKernelPCAEngine ¶
NewKernelPCAEngine creates a new Kernel PCA engine
func NewPCAEngine ¶
NewPCAEngine creates a new PCA engine instance
func NewPCAEngineForMethod ¶
NewPCAEngineForMethod creates a PCA engine for the specified method
func RemoveOutliers ¶
RemoveOutliers removes outliers based on z-score
func SelectRowsColumns ¶
SelectRowsColumns provides utilities for data subsetting
Types ¶
type CorrelationRequest ¶
type CorrelationRequest struct {
Scores mat.Matrix // PC scores matrix (samples × components)
MetadataNumeric map[string][]float64 // Numeric metadata columns
MetadataCategorical map[string][]string // Categorical metadata columns
Components []int // Which PCs to include (0-based)
Method string // "pearson" or "spearman"
}
CorrelationRequest defines the input for correlation calculations
type CorrelationResult ¶
type CorrelationResult struct {
Correlations map[string][]float64 // Variable name -> correlations with each PC
PValues map[string][]float64 // Variable name -> p-values
Variables []string // Order of variables
Components []string // PC labels
}
CorrelationResult contains the correlation analysis results
func CalculateEigencorrelations ¶
func CalculateEigencorrelations(request CorrelationRequest) (*CorrelationResult, error)
CalculateEigencorrelations computes correlations between PC scores and metadata variables
This function calculates Pearson or Spearman correlations between principal component scores and external metadata variables (both numeric and categorical). For categorical variables, one-hot encoding is performed before correlation calculation.
Reference: Jolliffe, I.T. (2002). Principal Component Analysis, 2nd edition. Springer.
type EllipseParams ¶
type EllipseParams struct {
CenterX float64
CenterY float64
MajorAxis float64
MinorAxis float64
Angle float64 // in radians
ConfidenceLevel float64
}
EllipseParams contains parameters for drawing a confidence ellipse
type KernelPCAImpl ¶
type KernelPCAImpl struct {
// contains filtered or unexported fields
}
KernelPCAImpl implements the PCAEngine interface for Kernel PCA Kernel PCA performs nonlinear dimensionality reduction by projecting data into a higher-dimensional feature space using kernel functions, then performing PCA in that space. Reference: Schölkopf, B., Smola, A., & Müller, K.R. (1998). Nonlinear component analysis as a kernel eigenvalue problem. Neural Computation, 10(5), 1299-1319.
func (*KernelPCAImpl) FitTransform ¶
func (kpca *KernelPCAImpl) FitTransform(data types.Matrix, config types.PCAConfig) (*types.PCAResult, error)
FitTransform fits the model and transforms the data in one step
type KernelType ¶
type KernelType string
KernelType represents the type of kernel function to use
const ( // KernelRBF is the Radial Basis Function (Gaussian) kernel KernelRBF KernelType = "rbf" // KernelLinear is the linear kernel (equivalent to standard PCA) KernelLinear KernelType = "linear" // KernelPoly is the polynomial kernel KernelPoly KernelType = "poly" )
type MissingValueHandler ¶
type MissingValueHandler struct {
// contains filtered or unexported fields
}
MissingValueHandler handles missing values in data matrices
func NewMissingValueHandler ¶
func NewMissingValueHandler(strategy types.MissingValueStrategy) *MissingValueHandler
NewMissingValueHandler creates a new missing value handler
func (*MissingValueHandler) HandleMissingValues ¶
func (h *MissingValueHandler) HandleMissingValues(data types.Matrix, missingInfo *types.MissingValueInfo, selectedCols []int) (types.Matrix, error)
HandleMissingValues processes missing values according to the specified strategy It only considers missing values in the selected columns
type MissingValueStrategy ¶
type MissingValueStrategy string
HandleMissingValues provides strategies for dealing with missing data
const ( MissingMean MissingValueStrategy = "mean" MissingMedian MissingValueStrategy = "median" MissingZero MissingValueStrategy = "zero" MissingDrop MissingValueStrategy = "drop" )
type PCAImpl ¶
type PCAImpl struct {
// contains filtered or unexported fields
}
PCAImpl implements the PCAEngine interface
func (*PCAImpl) FitTransform ¶
FitTransform fits the model and transforms the data in one step
func (*PCAImpl) SetLoadings ¶
SetLoadings sets the loadings matrix and marks the engine as fitted
func (*PCAImpl) SetPreprocessor ¶
func (p *PCAImpl) SetPreprocessor(preprocessor *Preprocessor)
SetPreprocessor sets the preprocessor for the PCA engine
type PCAMetricsCalculator ¶
type PCAMetricsCalculator struct {
// contains filtered or unexported fields
}
PCAMetricsCalculator calculates advanced metrics for PCA results
func NewPCAMetricsCalculator ¶
func NewPCAMetricsCalculator(scores, loadings *mat.Dense, mean, stdDev []float64) *PCAMetricsCalculator
NewPCAMetricsCalculator creates a new metrics calculator
func (*PCAMetricsCalculator) CalculateMetrics ¶
func (m *PCAMetricsCalculator) CalculateMetrics(originalData types.Matrix) ([]types.SampleMetrics, error)
CalculateMetrics computes all metrics for each sample
func (*PCAMetricsCalculator) CalculateQLimits ¶
func (m *PCAMetricsCalculator) CalculateQLimits(eigenvalues []float64, totalComponents int) (limit95, limit99 float64)
CalculateQLimits calculates the confidence limits for Q-residuals (SPE - Squared Prediction Error) Reference: Jackson, J.E., & Mudholkar, G.S. (1979). Control procedures for residuals associated with principal component analysis. Technometrics, 21(3), 341-349.
func (*PCAMetricsCalculator) CalculateT2Limits ¶
func (m *PCAMetricsCalculator) CalculateT2Limits() (limit95, limit99 float64)
CalculateT2Limits calculates the confidence limits for Hotelling's T² statistic
type Preprocessor ¶
type Preprocessor struct {
// Preprocessing parameters
MeanCenter bool
StandardScale bool
RobustScale bool
ScaleOnly bool
SNV bool
VectorNorm bool
// contains filtered or unexported fields
}
Preprocessor handles data preprocessing for PCA
func NewPreprocessor ¶
func NewPreprocessor(meanCenter, standardScale, robustScale bool) *Preprocessor
NewPreprocessor creates a new preprocessor instance
func NewPreprocessorFull ¶
func NewPreprocessorFull(meanCenter, standardScale, robustScale, snv, vectorNorm bool) *Preprocessor
NewPreprocessorFull creates a new preprocessor instance with all options
func NewPreprocessorWithScaleOnly ¶
func NewPreprocessorWithScaleOnly(meanCenter, standardScale, robustScale, scaleOnly, snv, vectorNorm bool) *Preprocessor
NewPreprocessorWithScaleOnly creates a new preprocessor instance with scale-only option
func (*Preprocessor) Fit ¶
func (p *Preprocessor) Fit(data types.Matrix) error
Fit calculates preprocessing parameters from the data
func (*Preprocessor) FitTransform ¶
FitTransform fits the preprocessor and transforms the data
func (*Preprocessor) GetMADs ¶
func (p *Preprocessor) GetMADs() []float64
GetMADs returns the fitted MAD (Median Absolute Deviation) values
func (*Preprocessor) GetMeans ¶
func (p *Preprocessor) GetMeans() []float64
GetMeans returns the fitted mean values
func (*Preprocessor) GetMedians ¶
func (p *Preprocessor) GetMedians() []float64
GetMedians returns the fitted median values
func (*Preprocessor) GetRowMeans ¶
func (p *Preprocessor) GetRowMeans() []float64
GetRowMeans returns the fitted row mean values (for SNV)
func (*Preprocessor) GetRowStdDevs ¶
func (p *Preprocessor) GetRowStdDevs() []float64
GetRowStdDevs returns the fitted row standard deviation values (for SNV)
func (*Preprocessor) GetStdDevs ¶
func (p *Preprocessor) GetStdDevs() []float64
GetStdDevs returns the fitted standard deviation values (original, before scaling)
func (*Preprocessor) InverseTransform ¶
InverseTransform reverses the preprocessing Note: When SNV is combined with column-wise preprocessing, the inverse transform only reverses the column-wise operations. Full reversal of SNV after column preprocessing would require storing the full transformed matrix.
func (*Preprocessor) IsSNVEnabled ¶
func (p *Preprocessor) IsSNVEnabled() bool
IsSNVEnabled returns whether SNV preprocessing is enabled
func (*Preprocessor) SetFittedParameters ¶
func (p *Preprocessor) SetFittedParameters(means, stdDevs, medians, mads, rowMeans, rowStdDevs []float64) error
SetFittedParameters sets the fitted parameters for the preprocessor
type TransformType ¶
type TransformType string
VariableTransform applies mathematical transformations to variables
const ( TransformLog TransformType = "log" TransformSqrt TransformType = "sqrt" TransformSquare TransformType = "square" TransformReciprocal TransformType = "reciprocal" )