Documentation
¶
Overview ¶
Package som provides functionality for working with self-organizing maps.
Example ¶
Trains an SOM from randomly generated 2D data, and plots a scatter plot of the data and the trained SOM.
package main
import (
"fmt"
"image/png"
"log"
"math/rand"
"os"
"github.com/mlange-42/som"
"github.com/mlange-42/som/decay"
"github.com/mlange-42/som/distance"
"github.com/mlange-42/som/layer"
"github.com/mlange-42/som/neighborhood"
"github.com/mlange-42/som/norm"
"github.com/mlange-42/som/plot"
"github.com/mlange-42/som/table"
)
// Trains an SOM from randomly generated 2D data,
// and plots a scatter plot of the data and the trained SOM.
func main() {
// Create a random number generator for reproductible results
rng := rand.New(rand.NewSource(42))
// Generate random data for training
data := generateRandomData("a", "b", 250, rng)
// Create an SOM configuration, matching table columns.
config := som.SomConfig{
Size: layer.Size{
Width: 12,
Height: 8,
},
Neighborhood: &neighborhood.Gaussian{},
MapMetric: &neighborhood.ManhattanMetric{},
Layers: []*som.LayerDef{
{
Name: "L1",
Columns: []string{
"a",
"b",
},
Norm: []norm.Normalizer{
&norm.Identity{},
&norm.Identity{},
},
Metric: &distance.Euclidean{},
},
},
}
// Create an SOM from the configuration
s, err := som.New(&config)
if err != nil {
log.Fatal(err)
}
// Create a training configuration
trainingConfig := som.TrainingConfig{
Epochs: 1000,
LearningRate: &decay.Polynomial{Start: 0.5, End: 0.01, Exp: 2},
NeighborhoodRadius: &decay.Polynomial{Start: 6, End: 0.5, Exp: 2},
ViSomLambda: 0.0,
}
// Create a trainer instance from SOM and training data
trainer, err := som.NewTrainer(s, []*table.Table{data}, &trainingConfig, rng)
if err != nil {
log.Fatal(err)
}
_ = trainer
// Create a channel for training progress updates
progress := make(chan som.TrainingProgress)
// Run SOM training asynchronously
go trainer.Train(progress)
// Wait for training to finish
for p := range progress {
if p.Epoch%100 == 0 {
fmt.Printf("Epoch %03d (err=%.4f)\n", p.Epoch, p.Error)
}
}
// Create data sources for plotting
xy := plot.SomXY{Som: s, XLayer: 0, XColumn: 0, YLayer: 0, YColumn: 1}
dataXY := plot.TableXY{XTable: data, YTable: data, XColumn: 0, YColumn: 1, XNorm: &norm.Identity{}, YNorm: &norm.Identity{}}
// Make a scatter plot
img, err := plot.XY("xy", &xy, *s.Size(), 600, 400, nil, nil, true, &dataXY, nil, nil, false)
if err != nil {
log.Fatal(err)
}
// Open a file to write the image
file, err := os.Create("som-xy.png")
if err != nil {
log.Fatal(err)
}
defer file.Close()
// Write the image to a PNG file
err = png.Encode(file, img)
if err != nil {
log.Fatal(err)
}
}
// Generate 2D random data for training.
func generateRandomData(xCol, yCol string, rows int, rng *rand.Rand) *table.Table {
data := make([]float64, rows*2)
for i := 0; i < rows; i++ {
x := rng.Float64()*2 - 1
y := x*x + rng.NormFloat64()*0.1
data[i*2] = x
data[i*2+1] = y
}
t, err := table.NewWithData([]string{xCol, yCol}, data)
if err != nil {
panic(err)
}
return t
}
Output: Epoch 000 (err=0.1164) Epoch 100 (err=0.0641) Epoch 200 (err=0.0370) Epoch 300 (err=0.0206) Epoch 400 (err=0.0119) Epoch 500 (err=0.0064) Epoch 600 (err=0.0036) Epoch 700 (err=0.0022) Epoch 800 (err=0.0015) Epoch 900 (err=0.0012)
Index ¶
- type Evaluator
- type LayerDef
- type Predictor
- func (p *Predictor) FillMissing(tables []*table.Table) error
- func (p *Predictor) GetBMU() []int
- func (p *Predictor) GetBMUTable() *table.Table
- func (p *Predictor) GetBMUWithDistance() ([]int, []float64)
- func (p *Predictor) GetDensity() []int
- func (p *Predictor) GetError(rmse bool) []float64
- func (p *Predictor) GetRowBMU(row int) (int, float64)
- func (p *Predictor) Predict(tables []*table.Table, layers []string) error
- func (p *Predictor) Som() *Som
- func (p *Predictor) Tables() []*table.Table
- type Som
- func (s *Som) GetBMU(data [][]float64) (int, float64)
- func (s *Som) GetBMU2(data [][]float64) (int, float64, int, float64)
- func (s *Som) Layers() []*layer.Layer
- func (s *Som) Learn(data [][]float64, alpha, radius, lambda float64) float64
- func (s *Som) MapMetric() neighborhood.Metric
- func (s *Som) Neighborhood() neighborhood.Neighborhood
- func (s *Som) Randomize(rng *rand.Rand)
- func (s *Som) Size() *layer.Size
- func (s *Som) UMatrix(fill bool) [][]float64
- func (s *Som) ViSomMetric() neighborhood.Metric
- type SomConfig
- type Trainer
- type TrainingConfig
- type TrainingProgress
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Evaluator ¶ added in v0.2.0
type Evaluator struct {
// contains filtered or unexported fields
}
func NewEvaluator ¶ added in v0.2.0
func (*Evaluator) TopographicError ¶ added in v0.2.0
func (e *Evaluator) TopographicError(dist neighborhood.Metric) float64
type LayerDef ¶
type LayerDef struct {
Name string // Name of the layer
Columns []string // Columns to use from the data
Norm []norm.Normalizer // Normalization functions for the columns
Metric distance.Distance // Distance metric to use for this layer
Weight float64 // Weight value for this layer (for multi-layer SOMs)
Categorical bool // Whether the layer contains categorical data
Weights []float64 // Pre-computed layer weights (if provided)
}
LayerDef represents the configuration for a single layer in a Self-Organizing Map (SOM). It defines the name, columns, normalization, metric, weight, and whether the layer is categorical. If the layer has weights, it can also be initialized with the provided data.
A weight value of 0.0 is interpreted as standard weight of 1.0. To get a weight of 0.0, give the weight field a negative value.
type Predictor ¶
type Predictor struct {
// contains filtered or unexported fields
}
Predictor is a struct that holds an SOM and a set of tables for making predictions.
func NewPredictor ¶
NewPredictor creates a new Predictor instance with the given SOM and tables. The tables must have the same number of rows as the SOM has nodes. Tables are assumed to be normalized. An error is returned if the tables do not match the SOM.
func (*Predictor) FillMissing ¶ added in v0.2.0
FillMissing fills in any missing values in the input tables by using the best matching units (BMUs) from the SOM to determine the appropriate values to fill in. Tables in the argument should not be normalized. The number of rows in the input tables must match the number of rows in the Predictor's tables.
func (*Predictor) GetBMU ¶
GetBMU returns a slice of the best matching unit (BMU) indices for each row in the associated tables.
func (*Predictor) GetBMUTable ¶
GetBMUTable returns a table with the best matching units (BMUs) for each row in the associated tables. The table contains the following columns:
- node_id: the index of the BMU node - node_x: the x-coordinate of the BMU node - node_y: the y-coordinate of the BMU node - node_dist: the distance between the input data and the BMU node
func (*Predictor) GetBMUWithDistance ¶
GetBMUWithDistance returns the best matching unit (BMU) indices and the distances between the input data and the BMU for each row in the associated tables.
func (*Predictor) GetDensity ¶
GetDensity returns the density of the SOM, which is the number of data points that map to each node in the SOM. The returned slice has one element for each node in the SOM, where the value at index i represents the number of data points that map to the node at index i.
func (*Predictor) GetError ¶
GetError returns the error for each node in the SOM, either as the raw sum of squared distances between the input data and the BMU, or as the root mean squared error (RMSE). The returned slice has one element for each node in the SOM, where the value at index i represents the error for the node at index i.
If rmse is true, the returned values will be the RMSE . Otherwise, the returned values will be the MSE.
func (*Predictor) GetRowBMU ¶ added in v0.2.0
GetRowBMU returns the best matching unit (BMU) index and the distance between the input data and the BMU for the given row in the associated tables.
func (*Predictor) Predict ¶ added in v0.2.0
Predict generates predictions for the specified layers in the input tables using the self-organizing map (SOM) associated with the Predictor. The input tables should not be normalized. The function will create new tables for the predicted layers and populate them with the predicted values.
If any of the layers to predict are already present in the input tables, an error will be returned. The number of rows in the input tables must match the number of rows in the Predictor's tables.
type Som ¶
type Som struct {
// contains filtered or unexported fields
}
Som represents a Self-Organizing Map (SOM) model.
func New ¶
New creates a new Self-Organizing Map (SOM) instance based on the provided SomConfig. It initializes the layers of the SOM with the specified configurations. If the layer has pre-computed weights, they can be provided to initialize the layer. The function returns the created SOM instance and an error if any issues occur during the initialization.
func (*Som) GetBMU ¶
GetBMU finds the Best Matching Unit (BMU) for the given input data. It calculates the total distance between the input data and each node in the SOM, and returns the index of the node with the minimum total distance, along with that minimum distance.
func (*Som) Learn ¶
Learn updates the weights of the Self-Organizing Map (SOM) based on the given input data. It calculates the Best Matching Unit (BMU) for the input data, then updates the weights of the nodes in the SOM based on the neighborhood function and learning rate. The function returns the distance between the input data and the BMU.
func (*Som) MapMetric ¶
func (s *Som) MapMetric() neighborhood.Metric
MapMetric returns the metric used to calculate distances between nodes in the Self-Organizing Map (SOM).
func (*Som) Neighborhood ¶
func (s *Som) Neighborhood() neighborhood.Neighborhood
Neighborhood returns the neighborhood function used by the Self-Organizing Map (SOM) instance.
func (*Som) UMatrix ¶
UMatrix computes the U-Matrix for the Self-Organizing Map. The U-Matrix visualizes the distances between neighboring nodes in the map, which can be used to identify cluster boundaries. The returned matrix has double the dimensions of the original map, with the values representing the distances between nodes and their neighbors.
If fill is true, cells that don't correspond to a link, but to a node or an "empty space" are filled with the average of the surrounding links.
func (*Som) ViSomMetric ¶ added in v0.2.0
func (s *Som) ViSomMetric() neighborhood.Metric
ViSomMetric returns the metric used to calculate distances between nodes in the ViSOM (Visualization Induced Self-Organizing Map).
type SomConfig ¶
type SomConfig struct {
Size layer.Size // Size of the SOM
Layers []*LayerDef // Layer definitions
Neighborhood neighborhood.Neighborhood // Neighborhood function of the SOM
MapMetric neighborhood.Metric // Metric used to calculate distances on the map
ViSomMetric neighborhood.Metric // Metric used to calculate distances on the map for ViSOM update
}
SomConfig represents the configuration for a Self-Organizing Map (SOM). It defines the size of the map, the layers of data to be mapped, the neighborhood function, and the metric used to calculate distances on the map.
func (*SomConfig) PrepareTables ¶
func (c *SomConfig) PrepareTables(reader table.Reader, ignoreLayers []string, updateNormalizers bool, keepOriginal bool) (normalized, raw []*table.Table, err error)
PrepareTables reads the CSV data and creates a table for each layer defined in the SomConfig. If a categorical layer has no columns specified, it will attempt to read the class names for that layer and create a table from the classes. The created tables are returned in the same order as the layers in the SomConfig.
type Trainer ¶
type Trainer struct {
// contains filtered or unexported fields
}
Trainer is a struct that holds the necessary components for training a Self-Organizing Map (SOM). It contains a reference to the SOM, the training data tables, the training configuration parameters, and a random number generator.
func NewTrainer ¶
func NewTrainer(som *Som, tables []*table.Table, params *TrainingConfig, rng *rand.Rand) (*Trainer, error)
NewTrainer creates a new Trainer instance with the provided SOM, data tables, training configuration, and random number generator. It performs a check on the provided data tables to ensure they are compatible with the SOM. If the check fails, an error is returned.
func (*Trainer) PropagateLabels ¶ added in v0.2.0
func (*Trainer) Train ¶
func (t *Trainer) Train(progress chan TrainingProgress)
Train trains the Self-Organizing Map (SOM) using the provided training data and configuration. It iterates through the specified number of epochs, updating the learning rate and neighborhood radius at each epoch. For each epoch, it performs a single training iteration, and sends the training progress information (epoch, learning rate, neighborhood radius, mean distance, and quantization error) to the provided progress channel. After all epochs are completed, the channel is closed.
type TrainingConfig ¶
type TrainingConfig struct {
Epochs int // Number of training epochs
LearningRate decay.Decay // Learning rate decay function
NeighborhoodRadius decay.Decay // Neighborhood radius decay function
WeightDecay decay.Decay // Weight decay coefficient decay function
ViSomLambda float64 // ViSOM lambda resolution parameter
}
TrainingConfig holds the configuration parameters for training a Self-Organizing Map (SOM).
type TrainingProgress ¶
type TrainingProgress struct {
Epoch int // The current epoch number
Alpha float64 // The current learning rate alpha
Radius float64 // The current neighborhood radius
WeightDecay float64 // The weight decay factor
MeanDist float64 // The mean distance of the training data to the SOM
Error float64 // The quantization error (MSE)
}
TrainingProgress represents the progress of a training epoch.
func (*TrainingProgress) CsvHeader ¶
func (p *TrainingProgress) CsvHeader(delim rune) string
CsvHeader returns a CSV header row for the TrainingProgress struct fields, using the provided delimiter.
func (*TrainingProgress) CsvRow ¶
func (p *TrainingProgress) CsvRow(delim rune) string
CsvRow returns a comma-separated string representation of the TrainingProgress struct fields. The values are formatted using the provided delimiter character.
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
som
command
|
|
|
Package conv provides functions for converting between various data types.
|
Package conv provides functions for converting between various data types. |
|
Package csv provides functionality for reading and writing CSV data.
|
Package csv provides functionality for reading and writing CSV data. |
|
Package decay provides functionality parameter decayer functions, particularly for SOM learning rate and neighborhood radius.
|
Package decay provides functionality parameter decayer functions, particularly for SOM learning rate and neighborhood radius. |
|
Package distance provides distance metrics in the data space.
|
Package distance provides distance metrics in the data space. |
|
Package layer provides SOM layers.
|
Package layer provides SOM layers. |
|
Package neighborhood provides map space distance metrics and neighborhood weight functions.
|
Package neighborhood provides map space distance metrics and neighborhood weight functions. |
|
Package norm provides functionality for normalizing data.
|
Package norm provides functionality for normalizing data. |
|
Package plot provides functions for creating SOM-related plots.
|
Package plot provides functions for creating SOM-related plots. |
|
Package table provides functionality for working with tables.
|
Package table provides functionality for working with tables. |
|
Package yml provides YAML read and write functionality for SOM instances and configurations.
|
Package yml provides YAML read and write functionality for SOM instances and configurations. |