Documentation
¶
Overview ¶
Package nn provides a grid neural network implementation with both CPU and GPU execution.
A grid neural network organizes layers into a 2D grid structure where:
- Data flows sequentially through grid cells (row by row, column by column)
- Each grid cell contains multiple layers
- Total layers = gridRows × gridCols × layersPerCell
The network supports forward and backward propagation with multiple activation functions:
- ScaledReLU: v * 1.1, then ReLU
- Sigmoid: 1 / (1 + exp(-v))
- Tanh: tanh(v)
- Softplus: log(1 + exp(v))
- LeakyReLU: v if v >= 0, else v * 0.1
Activations are assigned cyclically based on grid position: (cellIdx * layersPerCell + layer) % 5
Example usage:
network := nn.NewNetwork(batchSize, gridRows, gridCols, layersPerCell) // Forward pass on CPU output, _ := network.ForwardCPU(input) // Forward pass on GPU network.InitGPU() defer network.ReleaseGPU() outputGPU, _, _ := network.ForwardGPU(input) // Backward pass gradInput, _ := network.BackwardCPU(gradOutput)
Index ¶
- Variables
- func Activate[T Numeric](v T, activation ActivationType) T
- func ActivateDerivative[T Numeric](preActivation T, activation ActivationType) T
- func ComputeSilhouetteScore(data [][]float32, assignments []int) float32
- func ConvertMaskToFloat(mask []bool) []float32
- func ConvertSliceFloat32ToT[T Numeric](src []float32) []T
- func ConvertSliceTToFloat32[T Numeric](src []T) []float32
- func EuclideanDistance(a, b []float32) float32
- func FlattenFrom2D(input []float32, batchSize, channels, height, width int) []float32
- func ForwardSoftmaxCPU(input []float32, config *LayerConfig) ([]float32, error)
- func GenericForwardPass[T Numeric](n *Network, input *Tensor[T], backend Backend[T]) (*Tensor[T], []*Tensor[T], []any, time.Duration)
- func GetLayerInitFunction(name string) (interface{}, bool)
- func GetLoomCompatibility(detected []DetectedTensor) map[string]int
- func InferLayerType(shape []int) string
- func InitMultiHeadAttentionLayer(config *LayerConfig, isGPU bool)
- func IsFloatType[T Numeric]() bool
- func IsIntegerType[T Numeric]() bool
- func KMeansCluster(data [][]float32, k int, maxIter int, parallel bool) (centroids [][]float32, assignments []int)
- func LSTMBackward[T Numeric](gradOutput, input *Tensor[T], states map[string]*Tensor[T], ...) (gradInput *Tensor[T], gradWeights *LSTMWeights[T])
- func LSTMForward[T Numeric](input *Tensor[T], weights *LSTMWeights[T], ...) (output *Tensor[T], hidden, cell *Tensor[T], allGates map[string]*Tensor[T])
- func LayerTypeToName(lt LayerType) string
- func LoadGenericFromBytes(weightsData []byte, configData []byte) (*Network, []DetectedTensor, error)
- func LoadSafetensors(filepath string) (map[string][]float32, error)
- func LoadSafetensorsFromBytes(data []byte) (map[string][]float32, error)
- func LoadSafetensorsWithShapes(data []byte) (map[string]TensorWithShape, error)
- func Max(v []float32) float32
- func MaxAbsDiff(a, b []float32) float64
- func Mean(v []float32) float32
- func Min(v []float32) float32
- func MultiHeadAttentionBackward[T Numeric](gradOutput, input *Tensor[T], weights *AttentionWeights[T]) (gradInput *Tensor[T], gradWeights *AttentionWeights[T])
- func MultiHeadAttentionForwardCPU(input []float32, config *LayerConfig, batchSize int) ([]float32, []float32)
- func ParallelForward[T Numeric](input *Tensor[T], branches []*LayerConfig, batchSize int, combineMode string) (*Tensor[T], []*Tensor[T], error)
- func ParallelForwardFiltered[T Numeric](input *Tensor[T], branches []*LayerConfig, gateConfig *LayerConfig, ...) (*Tensor[T], []*Tensor[T], []float32, error)
- func PrintEnsembleReport(matches []EnsembleMatch, topN int)
- func ReshapeTo2D(input []float32, batchSize int) ([]float32, int, int, int)
- func ResidualForwardCPU(input, skipInput []float32) []float32
- func RmsNormForwardCPU(input []float32, residual []float32, config *LayerConfig, batchSize int) []float32
- func SaveBundle(filename string, models map[string]*Network) error
- func ScaleWeights(weights []float32, scale float32)
- func SequentialForward[T Numeric](input *Tensor[T], layers []*LayerConfig, batchSize int) (*Tensor[T], []*Tensor[T], error)
- func StepForwardGeneric[T Numeric](n *Network, state *GenericStepState[T], backend Backend[T]) time.Duration
- func SwiGLUForwardCPU(input []float32, config *LayerConfig, batchSize int) ([]float32, []float32)
- func UpdateWeightsGeneric[T Numeric](n *Network, learningRate float64, kernelGrads []any, biasGrads []any)
- type ActivationType
- type AdamWOptimizer
- type AdaptationComparison
- func (ac *AdaptationComparison) AddResult(result *AdaptationResult)
- func (ac *AdaptationComparison) GetBestByAvgAccuracy() *AdaptationResult
- func (ac *AdaptationComparison) GetMostStable() *AdaptationResult
- func (ac *AdaptationComparison) PrintComparisonSummary(title string)
- func (ac *AdaptationComparison) PrintComparisonTimeline(title string, numWindows int)
- func (ac *AdaptationComparison) SaveToJSON(filepath string) error
- type AdaptationResult
- type AdaptationTracker
- func (at *AdaptationTracker) Finalize() *AdaptationResult
- func (at *AdaptationTracker) GetCurrentTask() int
- func (at *AdaptationTracker) GetWindows() []TimeWindow
- func (at *AdaptationTracker) RecordOutput(correct bool) int
- func (at *AdaptationTracker) ScheduleTaskChange(atOffset time.Duration, taskID int, taskName string)
- func (at *AdaptationTracker) SetModelInfo(modelName, modeName string)
- func (at *AdaptationTracker) Start(initialTask string, initialTaskID int)
- type ArchConfig
- type ArchConfigBundle
- type ArchGenOptions
- type AttentionWeights
- type Backend
- type BrainType
- type CPUBackend
- func (b *CPUBackend[T]) Activate(t *Tensor[T], actType ActivationType) *Tensor[T]
- func (b *CPUBackend[T]) ActivateDerivative(preAct *Tensor[T], actType ActivationType) *Tensor[T]
- func (b *CPUBackend[T]) Add(a, other *Tensor[T]) *Tensor[T]
- func (b *CPUBackend[T]) MatMul(a, mat *Tensor[T]) *Tensor[T]
- func (b *CPUBackend[T]) MatMulAdd(a, mat, c *Tensor[T]) *Tensor[T]
- func (b *CPUBackend[T]) Mean(t *Tensor[T]) T
- func (b *CPUBackend[T]) OuterProduct(a, other *Tensor[T]) *Tensor[T]
- func (b *CPUBackend[T]) Scale(t *Tensor[T], factor T) *Tensor[T]
- func (b *CPUBackend[T]) Sqrt(t *Tensor[T]) *Tensor[T]
- func (b *CPUBackend[T]) Sum(t *Tensor[T]) T
- type ChannelObserver
- type ComparisonResult
- type ConsoleObserver
- type ConstantScheduler
- type CorrelationMatrix
- type CorrelationResult
- func ComputeCorrelationMatrix(data [][]float32, labels []string) *CorrelationResult
- func ComputeCorrelationMatrixFloat64(data [][]float64, labels []string) *CorrelationResult
- func ComputeSpearmanMatrix(data [][]float32, labels []string) *CorrelationResult
- func CorrelationResultFromJSON(jsonStr string) (*CorrelationResult, error)
- type CosineAnnealingScheduler
- type DType
- type DataLoader
- type DetectedTensor
- type DeviationBucket
- type DeviationMetrics
- func (dm *DeviationMetrics) ComputeFinalMetrics()
- func (dm *DeviationMetrics) GetSamplesInBucket(bucketName string) []int
- func (dm *DeviationMetrics) GetWorstSamples(n int) []PredictionResult
- func (dm *DeviationMetrics) PrintSummary()
- func (dm *DeviationMetrics) SaveMetrics(filepath string) error
- func (dm *DeviationMetrics) UpdateMetrics(result PredictionResult)
- type EncodedWeights
- type EnsembleMatch
- type ExponentialDecayScheduler
- type FeaturePair
- type GPUDeviceInfo
- type GenericBackwardResult
- type GenericModelConfig
- type GenericStepState
- type GenericTweenState
- func (ts *GenericTweenState[T]) BackwardPassChainRule(n *Network, targetClass int, outputSize int)
- func (ts *GenericTweenState[T]) CalculateLinkBudgets()
- func (ts *GenericTweenState[T]) ComputeGaps()
- func (ts *GenericTweenState[T]) ForwardPass(n *Network, input *Tensor[T], backend Backend[T]) *Tensor[T]
- func (ts *GenericTweenState[T]) GetGap(layerIdx int) float32
- func (ts *GenericTweenState[T]) SetForwardActivation(layerIdx int, activation *Tensor[T])
- func (ts *GenericTweenState[T]) TweenStep(n *Network, input *Tensor[T], targetClass int, outputSize int, rate float32, ...) float32
- func (ts *GenericTweenState[T]) TweenWeightsChainRule(n *Network, rate float32)
- type GridPosition
- type GridPositionDef
- type GridShape
- type HTTPObserver
- type ImportedLayerConfig
- type ImportedModelFormat
- type LRScheduler
- type LSTMWeights
- type LayerConfig
- func CallLayerInitFunction(name string, args ...interface{}) (LayerConfig, error)
- func GraftNetworks(networks []*Network, combineMode string) (*LayerConfig, error)
- func InitConv1DLayer(seqLen, inChannels int, kernelSize, stride, padding, filters int, ...) LayerConfig
- func InitConv2DLayer(inputHeight, inputWidth, inputChannels int, ...) LayerConfig
- func InitDenseBrain(dModel int, activation ActivationType, scale float32) LayerConfig
- func InitDenseLayer(inputSize, outputSize int, activation ActivationType) LayerConfig
- func InitDiverseHive(config ArchConfig) LayerConfig
- func InitEmbeddingLayer(vocabSize, embeddingDim int) LayerConfig
- func InitEntmaxLayer(alpha float32) LayerConfig
- func InitFilteredParallelLayer(branches []LayerConfig, gateInputSize int, softmaxType SoftmaxType, ...) LayerConfig
- func InitGridSoftmaxLayer(rows, cols int) LayerConfig
- func InitGumbelSoftmaxLayer(temperature float32) LayerConfig
- func InitHierarchicalSoftmaxLayer(levels []int) LayerConfig
- func InitLSTMBrain(dModel int, scale float32) LayerConfig
- func InitLSTMLayer(inputSize, hiddenSize, batchSize, seqLength int) LayerConfig
- func InitMHABrain(dModel, numHeads int, scale float32) LayerConfig
- func InitMaskedSoftmaxLayer(maskSize int) LayerConfig
- func InitNormDenseBrain(dModel int, activation ActivationType, scale float32) LayerConfig
- func InitRNNBrain(dModel int, scale float32) LayerConfig
- func InitRNNLayer(inputSize, hiddenSize, batchSize, seqLength int) LayerConfig
- func InitSequentialLayer(layers ...LayerConfig) LayerConfig
- func InitSoftmaxLayer() LayerConfig
- func InitSparsemaxLayer() LayerConfig
- func InitStitchLayer(inputSize, outputSize int) LayerConfig
- func InitSwiGLUBrain(dModel int, scale float32) LayerConfig
- func InitTemperatureSoftmaxLayer(temperature float32) LayerConfig
- type LayerDefinition
- type LayerEvent
- type LayerInitFunction
- type LayerObserver
- type LayerStats
- type LayerTelemetry
- type LayerType
- type LayerWeights
- type LinearDecayScheduler
- type MethodInfo
- type ModelBundle
- type ModelPerformance
- type ModelSizeInfo
- type ModelTelemetry
- type MultiPrecisionLayer
- type MultiPrecisionWeights
- type Network
- func BuildDiverseNetwork(config ArchConfig, inputSize int) *Network
- func BuildNetworkFromFile(filename string) (*Network, error)
- func BuildNetworkFromJSON(jsonConfig string) (*Network, error)
- func BuildNetworkFromJSONWithDType(jsonConfig string) (*Network, string, error)
- func DeserializeModel(saved SavedModel) (*Network, error)
- func LoadImportedModel(filepath string, modelID string) (*Network, error)
- func LoadModel(filename string, modelID string) (*Network, error)
- func LoadModelFromString(jsonString string, modelID string) (*Network, error)
- func LoadModelWithDType(jsonString string, modelID string, targetDType string) (*Network, string, error)
- func LoadTransformerFromBytes(configData []byte, weightsData []byte) (*Network, error)
- func LoadTransformerFromSafetensors(modelDir string) (*Network, error)
- func NewNetwork(inputSize, gridRows, gridCols, layersPerCell int) *Network
- func (n *Network) Activations() [][]float32
- func (n *Network) ApplyGradients(learningRate float32)
- func (n *Network) ApplyGradientsAdamW(learningRate, beta1, beta2, weightDecay float32)
- func (n *Network) ApplyGradientsRMSprop(learningRate, alpha, epsilon, momentum float32)
- func (n *Network) ApplyGradientsSGDMomentum(learningRate, momentum, dampening float32, nesterov bool)
- func (n *Network) BackwardCPU(gradOutput []float32) ([]float32, time.Duration)
- func (n *Network) BackwardGPU(gradOutput []float32) ([]float32, time.Duration, error)
- func (n *Network) BiasGradients() [][]float32
- func (n *Network) EvaluateFromCheckpointFiles(checkpointFiles []string, expectedOutputs []float64) (*DeviationMetrics, time.Duration, time.Duration, error)
- func (n *Network) EvaluateNetwork(inputs [][]float32, expectedOutputs []float64) (*DeviationMetrics, error)
- func (n *Network) ForwardCPU(input []float32) ([]float32, time.Duration)
- func (n *Network) ForwardGPU(input []float32) ([]float32, time.Duration, error)
- func (n *Network) GetActivation(row, col, layer int) ActivationType
- func (n *Network) GetLayer(row, col, layer int) *LayerConfig
- func (n *Network) GetMethodSignature(methodName string) (string, error)
- func (n *Network) GetMethods() ([]MethodInfo, error)
- func (n *Network) GetMethodsJSON() (string, error)
- func (n *Network) GetModelSizeInfo() map[string]ModelSizeInfo
- func (n *Network) GetOptimizer() Optimizer
- func (n *Network) HasMethod(methodName string) bool
- func (n *Network) InitGPU() error
- func (n *Network) InitStepState(inputSize int) *StepState
- func (n *Network) InitializeWeights()
- func (n *Network) KernelGradients() [][]float32
- func (n *Network) ListMethods() []string
- func (n *Network) ReleaseGPU()
- func (n *Network) ResetOptimizer()
- func (n *Network) SaveModel(filename string, modelID string) error
- func (n *Network) SaveModelToString(modelID string) (string, error)
- func (n *Network) SaveModelWithDType(modelID string, dtype string) (string, error)
- func (n *Network) SerializeModel(modelID string) (SavedModel, error)
- func (n *Network) SetLayer(row, col, layer int, config LayerConfig)
- func (n *Network) SetOptimizer(opt Optimizer)
- func (n *Network) StepBackward(state *StepState, gradOutput []float32) ([]float32, time.Duration)
- func (n *Network) StepForward(state *StepState) time.Duration
- func (n *Network) StepForwardSingle(state *StepState, layerIdx int) time.Duration
- func (n *Network) TotalLayers() int
- func (n *Network) Train(batches []TrainingBatch, config *TrainingConfig) (*TrainingResult, error)
- func (n *Network) TrainWithStepping(config *SteppingTrainingConfig, dataLoader DataLoader, totalSteps int) (*SteppingTrainingResult, error)
- func (n *Network) UpdateWeights(learningRate float32)
- func (n *Network) ZeroGradients()
- type NetworkBlueprint
- type NetworkConfig
- type Numeric
- type Optimizer
- type ParameterInfo
- type PolynomialDecayScheduler
- type PredictionResult
- type RMSpropOptimizer
- type RecordedActivity
- type RecordingObserver
- type SGDOptimizer
- type SafetensorsHeader
- type SavedModel
- type SoftmaxType
- type StepDecayScheduler
- type StepState
- type SteppingTrainingConfig
- type SteppingTrainingResult
- type TaskChange
- type Tensor
- func ActivateDerivativeTensor[T Numeric](preAct *Tensor[T], activation ActivationType) *Tensor[T]
- func ActivateTensor[T Numeric](t *Tensor[T], activation ActivationType) *Tensor[T]
- func ApplySoftmax[T Numeric](logits *Tensor[T], temperature float64) *Tensor[T]
- func ApplySoftmaxGrid[T Numeric](logits *Tensor[T], rows, cols int, temperature float64) *Tensor[T]
- func Conv1DBackward[T Numeric](gradOutput, input, preActivation, kernel *Tensor[T], ...) (gradInput, gradKernel, gradBias *Tensor[T])
- func Conv1DForward[T Numeric](input, kernel, bias *Tensor[T], ...) (preAct, postAct *Tensor[T])
- func Conv2DBackward[T Numeric](gradOutput, input, preActivation, kernel *Tensor[T], ...) (gradInput, gradKernel, gradBias *Tensor[T])
- func Conv2DForward[T Numeric](input, kernel, bias *Tensor[T], ...) (preAct, postAct *Tensor[T])
- func ConvertTensorFloat32ToT[T Numeric](src *Tensor[float32]) *Tensor[T]
- func ConvertTensorTToFloat32[T Numeric](src *Tensor[T]) *Tensor[float32]
- func DenseBackward[T Numeric](gradOutput, input, preAct, weights *Tensor[T], ...) (gradInput, gradWeights, gradBias *Tensor[T])
- func DenseForward[T Numeric](input *Tensor[T], weights, bias *Tensor[T], ...) (preAct, postAct *Tensor[T])
- func EmbeddingBackward[T Numeric](gradOutput, tokenIDs *Tensor[T], vocabSize, embeddingDim int) *Tensor[T]
- func EmbeddingForward[T Numeric](tokenIDs *Tensor[T], weights *Tensor[T], vocabSize, embeddingDim int) *Tensor[T]
- func GenericBackwardPass[T Numeric](n *Network, gradOutput *Tensor[T], activations []*Tensor[T], ...) (*Tensor[T], []any, []any, time.Duration)
- func GenericTrainStep[T Numeric](n *Network, input *Tensor[T], target *Tensor[T], learningRate float64, ...) (*Tensor[T], float64, time.Duration)
- func LayerNormBackward[T Numeric](input, residual, gradOutput, gamma, beta *Tensor[T], normSize, batchSize int, ...) (gradInput, gradGamma, gradBeta *Tensor[T])
- func LayerNormForward[T Numeric](input, residual, gamma, beta *Tensor[T], normSize, batchSize int, ...) *Tensor[T]
- func MultiHeadAttentionForward[T Numeric](input *Tensor[T], weights *AttentionWeights[T], ropeTheta float64) *Tensor[T]
- func NewTensor[T Numeric](shape ...int) *Tensor[T]
- func NewTensorFromSlice[T Numeric](data []T, shape ...int) *Tensor[T]
- func ParallelBackward[T Numeric](gradOutput, input *Tensor[T], branches []*LayerConfig, ...) (*Tensor[T], [][]float32)
- func ParallelBackwardFiltered[T Numeric](gradOutput, input *Tensor[T], branches []*LayerConfig, ...) (*Tensor[T], []float32)
- func RMSNormBackward[T Numeric](input, residual, gradOutput, gamma *Tensor[T], normSize, batchSize int, ...) (gradInput, gradGamma *Tensor[T])
- func RMSNormForward[T Numeric](input, residual, gamma *Tensor[T], normSize int, epsilon float64) *Tensor[T]
- func RNNBackward[T Numeric](gradOutput, input, hiddenStates *Tensor[T], weightIH, weightHH *Tensor[T], ...) (gradInput, gradWeightIH, gradWeightHH, gradBiasH *Tensor[T])
- func RNNForward[T Numeric](input, weightIH, weightHH, biasH *Tensor[T], ...) (output, hiddenStates *Tensor[T])
- func ResidualBackward[T Numeric](gradOutput *Tensor[T]) (gradInput, gradSkip *Tensor[T])
- func ResidualForward[T Numeric](input, skipInput *Tensor[T]) *Tensor[T]
- func SequentialBackward[T Numeric](gradOutput, input *Tensor[T], layers []*LayerConfig, ...) *Tensor[T]
- func SoftmaxBackward[T Numeric](gradOutput, output *Tensor[T], softmaxRows, softmaxCols int) *Tensor[T]
- func StepBackwardGeneric[T Numeric](n *Network, state *GenericStepState[T], gradOutput *Tensor[T]) (*Tensor[T], []any, []any, time.Duration)
- func SwiGLUBackward[T Numeric](input, gradOutput *Tensor[T], ...) (...)
- func SwiGLUForward[T Numeric](...) (output *Tensor[T])
- type TensorInfo
- type TensorWithShape
- type TimeWindow
- type TrainingBatch
- type TrainingConfig
- type TrainingMetrics
- type TrainingResult
- type TransformerConfig
- type TweenConfig
- type TweenEpochMetrics
- type TweenState
- func (ts *TweenState) BackwardPass(n *Network, targetClass int, outputSize int)
- func (ts *TweenState) BackwardPassChainRule(n *Network, targetClass int, outputSize int)
- func (ts *TweenState) CalculateLinkBudgets()
- func (ts *TweenState) CalculateLinkBudgetsFromSample(n *Network, input []float32)
- func (ts *TweenState) ForwardPass(n *Network, input []float32) []float32
- func (ts *TweenState) GetBudgetSummary() (avg, min, max float32)
- func (ts *TweenState) GetGapSummary() (avg, max float32)
- func (ts *TweenState) ResetBatch()
- func (ts *TweenState) RestoreBest(n *Network)
- func (ts *TweenState) SaveBest(n *Network)
- func (ts *TweenState) Train(n *Network, inputs [][]float32, expected []float64, epochs int, rate float32, ...)
- func (ts *TweenState) TweenBatch(n *Network, inputs [][]float32, targetClasses []int, outputSize int, ...) float32
- func (ts *TweenState) TweenBatchApply(n *Network, rate float32)
- func (ts *TweenState) TweenStep(n *Network, input []float32, targetClass int, outputSize int, rate float32) float32
- func (ts *TweenState) TweenStepAccumulate(n *Network, input []float32, targetClass int, outputSize int) float32
- func (ts *TweenState) TweenWeights(n *Network, rate float32)
- func (ts *TweenState) TweenWeightsChainRule(n *Network, rate float32)
- type WarmupScheduler
- type WeightsData
Constants ¶
This section is empty.
Variables ¶
var ActivationTypeNames = []string{"ScaledReLU", "LeakyReLU", "Tanh", "Softplus", "Sigmoid"}
ActivationTypeNames maps activation types to names
var BrainTypeNames = []string{"MHA", "LSTM", "RNN", "Dense", "SwiGLU", "NormDense"}
BrainTypeNames maps brain types to their string names
var CombineModeNames = []string{"concat", "add", "avg", "grid_scatter", "filter"}
CombineModeNames lists all supported combine modes for parallel layers
var StandardGridShapes = []GridShape{
{1, 1, "1x1 Mono"},
{2, 2, "2x2 Standard"},
{3, 3, "3x3 Complex"},
{4, 1, "4x1 Tall"},
{1, 4, "1x4 Wide"},
{2, 3, "2x3 Rect"},
{3, 2, "3x2 Rect"},
{8, 1, "8x1 Scanner"},
{6, 4, "6x4 Matrix"},
}
StandardGridShapes provides common grid configurations
Functions ¶
func Activate ¶ added in v0.0.7
func Activate[T Numeric](v T, activation ActivationType) T
Activate applies the activation function for any numeric type. This is the generic implementation that supports all Numeric types.
func ActivateDerivative ¶ added in v0.0.7
func ActivateDerivative[T Numeric](preActivation T, activation ActivationType) T
ActivateDerivative computes the derivative of the activation function for any numeric type. Note: This computes the derivative with respect to the PRE-activation value.
func ComputeSilhouetteScore ¶ added in v0.0.7
ComputeSilhouetteScore calculates the mean Silhouette Coefficient of all samples. Returns a value between -1 and 1. High value indicates well-separated clusters.
func ConvertMaskToFloat ¶ added in v0.0.7
ConvertMaskToFloat converts a boolean mask to a float vector (0.0/1.0) Suitable for clustering input.
func ConvertSliceFloat32ToT ¶ added in v0.0.7
ConvertSliceFloat32ToT converts a float32 slice to any Numeric type.
func ConvertSliceTToFloat32 ¶ added in v0.0.7
ConvertSliceTToFloat32 converts any Numeric slice to float32.
func EuclideanDistance ¶ added in v0.0.7
EuclideanDistance computes the distance between two vectors.
func FlattenFrom2D ¶
FlattenFrom2D converts 2D conv output back to 1D for dense layers
func ForwardSoftmaxCPU ¶ added in v0.0.2
func ForwardSoftmaxCPU(input []float32, config *LayerConfig) ([]float32, error)
ForwardSoftmaxCPU applies the configured softmax variant
func GenericForwardPass ¶ added in v0.0.7
func GenericForwardPass[T Numeric]( n *Network, input *Tensor[T], backend Backend[T], ) (*Tensor[T], []*Tensor[T], []any, time.Duration)
GenericForwardPass executes network forward pass for any numeric type. Uses the Backend interface for computation. Returns: - Final output tensor - List of output activations for each layer (activations[0] = input) - List of backward contexts for each layer (intermediate states needed for backprop) - Duration of forward pass
func GetLayerInitFunction ¶ added in v0.0.2
GetLayerInitFunction returns a layer init function by name
func GetLoomCompatibility ¶ added in v0.0.7
func GetLoomCompatibility(detected []DetectedTensor) map[string]int
GetLoomCompatibility returns a summary of what can/can't be loaded
func InferLayerType ¶ added in v0.0.7
InferLayerType determines layer type from tensor shape Returns: "conv" (4D), "dense" (2D), "norm" (1D), "embedding" (2D large vocab), "unknown"
func InitMultiHeadAttentionLayer ¶
func InitMultiHeadAttentionLayer(config *LayerConfig, isGPU bool)
InitMultiHeadAttentionLayer (keeps existing logic)
func IsFloatType ¶ added in v0.0.7
IsFloatType checks if T is a floating-point type
func IsIntegerType ¶ added in v0.0.7
IsIntegerType checks if T is an integer type
func KMeansCluster ¶ added in v0.0.7
func KMeansCluster(data [][]float32, k int, maxIter int, parallel bool) (centroids [][]float32, assignments []int)
KMeansCluster performs K-means clustering on the provided data vectors. data: Slice of feature vectors (e.g., performance masks converted to floats) k: Number of clusters maxIter: Maximum number of iterations parallel: If true, uses all available CPUs for the assignment step Returns: - centroids: The final cluster centers - assignments: Cluster index for each data point
func LSTMBackward ¶ added in v0.0.7
func LSTMBackward[T Numeric]( gradOutput, input *Tensor[T], states map[string]*Tensor[T], weights *LSTMWeights[T], batchSize, seqLength, inputSize, hiddenSize int, ) (gradInput *Tensor[T], gradWeights *LSTMWeights[T])
LSTMBackward performs backward pass for LSTM layer using BPTT with any numeric type.
func LSTMForward ¶ added in v0.0.7
func LSTMForward[T Numeric]( input *Tensor[T], weights *LSTMWeights[T], batchSize, seqLength, inputSize, hiddenSize int, ) (output *Tensor[T], hidden, cell *Tensor[T], allGates map[string]*Tensor[T])
LSTMForward performs LSTM forward pass for any numeric type. Input shape: [batchSize, seqLength, inputSize] Output shape: [batchSize, seqLength, hiddenSize]
func LayerTypeToName ¶ added in v0.0.7
LayerTypeToName converts LayerType to human readable name
func LoadGenericFromBytes ¶ added in v0.0.7
func LoadGenericFromBytes(weightsData []byte, configData []byte) (*Network, []DetectedTensor, error)
LoadGenericFromBytes loads any safetensors model, auto-detecting layer types weightsData: safetensors bytes (required) configData: config.json bytes (optional, can be nil) Returns: Network, detected tensors info, error
func LoadSafetensors ¶ added in v0.0.3
LoadSafetensors reads a safetensors file and returns tensors by name
func LoadSafetensorsFromBytes ¶ added in v0.0.3
LoadSafetensorsFromBytes reads safetensors data from a byte slice and returns tensors by name
func LoadSafetensorsWithShapes ¶ added in v0.0.7
func LoadSafetensorsWithShapes(data []byte) (map[string]TensorWithShape, error)
LoadSafetensorsWithShapes loads safetensors and returns both values and shapes This enables proper layer type detection based on tensor dimensions
func MaxAbsDiff ¶
MaxAbsDiff calculates the maximum absolute difference between two slices
func MultiHeadAttentionBackward ¶ added in v0.0.7
func MultiHeadAttentionBackward[T Numeric]( gradOutput, input *Tensor[T], weights *AttentionWeights[T], ) (gradInput *Tensor[T], gradWeights *AttentionWeights[T])
MultiHeadAttentionBackward computes gradients for generic multi-head attention. Matches the simplified logic of MultiHeadAttentionForward (Average pooling of V).
func MultiHeadAttentionForwardCPU ¶ added in v0.0.3
func MultiHeadAttentionForwardCPU(input []float32, config *LayerConfig, batchSize int) ([]float32, []float32)
MultiHeadAttentionForwardCPU (keeps existing logic) Copy-paste original function to avoid deletion
func ParallelForward ¶ added in v0.0.7
func ParallelForward[T Numeric]( input *Tensor[T], branches []*LayerConfig, batchSize int, combineMode string, ) (*Tensor[T], []*Tensor[T], error)
ParallelForward executes multiple sub-layers in parallel for any numeric type. Returns combined output and a slice of intermediate tensors (one per branch) for backward pass.
func ParallelForwardFiltered ¶ added in v0.0.7
func ParallelForwardFiltered[T Numeric]( input *Tensor[T], branches []*LayerConfig, gateConfig *LayerConfig, softmaxType SoftmaxType, temperature float32, batchSize int, ) (*Tensor[T], []*Tensor[T], []float32, error)
ParallelForwardFiltered executes parallel branches with softmax-gated combining. The gate layer computes N logits (one per branch), softmax normalizes them, and outputs are weighted-summed. Requires all branches to have same output size. Returns: combined output, branch outputs, gate weights (for backward pass).
func PrintEnsembleReport ¶ added in v0.0.7
func PrintEnsembleReport(matches []EnsembleMatch, topN int)
PrintEnsembleReport generates a human-readable summary of the best matches
func ReshapeTo2D ¶
ReshapeTo2D converts flattened 1D data to 2D shape for convolution
func ResidualForwardCPU ¶ added in v0.0.7
ResidualForwardCPU performs residual connection on CPU
func RmsNormForwardCPU ¶ added in v0.0.3
func RmsNormForwardCPU(input []float32, residual []float32, config *LayerConfig, batchSize int) []float32
RmsNormForwardCPU performs RMS normalization on CPU (exported version)
func SaveBundle ¶
SaveBundle saves multiple models to a bundle file
func ScaleWeights ¶ added in v0.0.7
ScaleWeights scales a float32 slice in place
func SequentialForward ¶ added in v0.0.7
func SequentialForward[T Numeric]( input *Tensor[T], layers []*LayerConfig, batchSize int, ) (*Tensor[T], []*Tensor[T], error)
SequentialForward executes sub-layers in sequence for any numeric type. Returns: final output, list of intermediate outputs (for backward pass), error
func StepForwardGeneric ¶ added in v0.0.7
func StepForwardGeneric[T Numeric]( n *Network, state *GenericStepState[T], backend Backend[T], ) time.Duration
StepForwardGeneric executes one step for ALL layers using generic tensors. Network weights are converted from float32 to T during execution.
func SwiGLUForwardCPU ¶ added in v0.0.3
func SwiGLUForwardCPU(input []float32, config *LayerConfig, batchSize int) ([]float32, []float32)
SwiGLUForwardCPU performs SwiGLU gated activation on CPU
Types ¶
type ActivationType ¶
type ActivationType int
ActivationType defines the activation function used in a layer
const ( ActivationScaledReLU ActivationType = 0 // v * 1.1, then ReLU ActivationSigmoid ActivationType = 1 // 1 / (1 + exp(-v)) ActivationTanh ActivationType = 2 // tanh(v) ActivationSoftplus ActivationType = 3 // log(1 + exp(v)) ActivationLeakyReLU ActivationType = 4 // v if v >= 0, else v * 0.1 )
type AdamWOptimizer ¶ added in v0.0.5
type AdamWOptimizer struct {
// contains filtered or unexported fields
}
func NewAdamWOptimizer ¶ added in v0.0.5
func NewAdamWOptimizer(beta1, beta2, epsilon, weightDecay float32) *AdamWOptimizer
func NewAdamWOptimizerDefault ¶ added in v0.0.5
func NewAdamWOptimizerDefault() *AdamWOptimizer
func (*AdamWOptimizer) GetState ¶ added in v0.0.5
func (opt *AdamWOptimizer) GetState() map[string]interface{}
func (*AdamWOptimizer) LoadState ¶ added in v0.0.5
func (opt *AdamWOptimizer) LoadState(state map[string]interface{}) error
func (*AdamWOptimizer) Name ¶ added in v0.0.5
func (opt *AdamWOptimizer) Name() string
func (*AdamWOptimizer) Reset ¶ added in v0.0.5
func (opt *AdamWOptimizer) Reset()
func (*AdamWOptimizer) Step ¶ added in v0.0.5
func (opt *AdamWOptimizer) Step(network *Network, learningRate float32)
type AdaptationComparison ¶ added in v0.0.6
type AdaptationComparison struct {
Results []AdaptationResult `json:"results"`
}
AdaptationComparison holds results from multiple models/modes for comparison
func NewAdaptationComparison ¶ added in v0.0.6
func NewAdaptationComparison() *AdaptationComparison
NewAdaptationComparison creates a new comparison container
func (*AdaptationComparison) AddResult ¶ added in v0.0.6
func (ac *AdaptationComparison) AddResult(result *AdaptationResult)
AddResult adds a result to the comparison
func (*AdaptationComparison) GetBestByAvgAccuracy ¶ added in v0.0.6
func (ac *AdaptationComparison) GetBestByAvgAccuracy() *AdaptationResult
GetBestByAvgAccuracy returns the result with the highest average accuracy
func (*AdaptationComparison) GetMostStable ¶ added in v0.0.6
func (ac *AdaptationComparison) GetMostStable() *AdaptationResult
GetMostStable returns the result with the smallest accuracy variance (most consistent)
func (*AdaptationComparison) PrintComparisonSummary ¶ added in v0.0.6
func (ac *AdaptationComparison) PrintComparisonSummary(title string)
PrintComparisonSummary prints a summary table comparing all results
func (*AdaptationComparison) PrintComparisonTimeline ¶ added in v0.0.6
func (ac *AdaptationComparison) PrintComparisonTimeline(title string, numWindows int)
PrintComparisonTimeline prints a side-by-side timeline comparison
func (*AdaptationComparison) SaveToJSON ¶ added in v0.0.6
func (ac *AdaptationComparison) SaveToJSON(filepath string) error
SaveToJSON saves the comparison results to a JSON file
type AdaptationResult ¶ added in v0.0.6
type AdaptationResult struct {
ModelName string `json:"model_name"`
ModeName string `json:"mode_name"`
TotalOutputs int `json:"total_outputs"`
AvgAccuracy float64 `json:"avg_accuracy"`
Windows []TimeWindow `json:"windows"`
TaskChanges []TaskChange `json:"task_changes"`
Duration time.Duration `json:"duration"`
}
AdaptationResult captures the full adaptation performance across time windows
func (*AdaptationResult) PrintAdaptationSummary ¶ added in v0.0.6
func (ar *AdaptationResult) PrintAdaptationSummary()
PrintAdaptationSummary prints a summary of adaptation performance
func (*AdaptationResult) PrintTimeline ¶ added in v0.0.6
func (ar *AdaptationResult) PrintTimeline()
PrintTimeline prints an ASCII timeline of accuracy over time
type AdaptationTracker ¶ added in v0.0.6
type AdaptationTracker struct {
// contains filtered or unexported fields
}
AdaptationTracker tracks accuracy over time with task changes
func NewAdaptationTracker ¶ added in v0.0.6
func NewAdaptationTracker(windowDuration, totalDuration time.Duration) *AdaptationTracker
NewAdaptationTracker creates a tracker for measuring adaptation over time windowDuration: typically 1 second totalDuration: total test duration (determines number of windows)
func (*AdaptationTracker) Finalize ¶ added in v0.0.6
func (at *AdaptationTracker) Finalize() *AdaptationResult
Finalize computes final metrics and returns the AdaptationResult
func (*AdaptationTracker) GetCurrentTask ¶ added in v0.0.6
func (at *AdaptationTracker) GetCurrentTask() int
GetCurrentTask returns the current task ID
func (*AdaptationTracker) GetWindows ¶ added in v0.0.6
func (at *AdaptationTracker) GetWindows() []TimeWindow
GetWindows returns a copy of the current windows (thread-safe)
func (*AdaptationTracker) RecordOutput ¶ added in v0.0.6
func (at *AdaptationTracker) RecordOutput(correct bool) int
RecordOutput records an output (prediction) and whether it was correct Returns the current task ID so the caller knows what behavior to expect
func (*AdaptationTracker) ScheduleTaskChange ¶ added in v0.0.6
func (at *AdaptationTracker) ScheduleTaskChange(atOffset time.Duration, taskID int, taskName string)
ScheduleTaskChange schedules a task change at a specific time offset This should be called before Start()
func (*AdaptationTracker) SetModelInfo ¶ added in v0.0.6
func (at *AdaptationTracker) SetModelInfo(modelName, modeName string)
SetModelInfo sets the model and mode name for this tracker
func (*AdaptationTracker) Start ¶ added in v0.0.6
func (at *AdaptationTracker) Start(initialTask string, initialTaskID int)
Start begins the tracking session
type ArchConfig ¶ added in v0.0.7
type ArchConfig struct {
ID int `json:"id"`
Name string `json:"name"`
Species string `json:"species"` // Grid shape name
MutationStr string `json:"mutationStr"` // Compact description of config
GridRows int `json:"gridRows"`
GridCols int `json:"gridCols"`
NumBrains int `json:"numBrains"` // GridRows * GridCols
DModel int `json:"dModel"` // Model dimension
NumHeads int `json:"numHeads"` // Attention heads (for MHA brains)
LearningRate float32 `json:"learningRate"`
BudgetScale float32 `json:"budgetScale"`
Activation ActivationType `json:"activation"`
CombineMode string `json:"combineMode"` // "concat", "add", "avg", "grid_scatter", "filter"
Brains []BrainType `json:"brains"` // One per brain cell
BrainNames []string `json:"brainNames"` // String names for JSON
InitScale float32 `json:"initScale"` // Weight initialization scale
DType string `json:"dtype"` // Numerical type: "float32", "float64", "int32", "int16", "int8"
}
ArchConfig defines a complete network architecture configuration
func ArchConfigFromBytes ¶ added in v0.0.7
func ArchConfigFromBytes(data []byte) (*ArchConfig, error)
ArchConfigFromBytes deserializes an ArchConfig from bytes
func GenerateDiverseConfigs ¶ added in v0.0.7
func GenerateDiverseConfigs(count int, opts *ArchGenOptions) []ArchConfig
GenerateDiverseConfigs creates a slice of randomized architecture configurations If opts is nil, DefaultArchGenOptions() is used
func (*ArchConfig) ToBytes ¶ added in v0.0.7
func (ac *ArchConfig) ToBytes() ([]byte, error)
ToBytes serializes the ArchConfig to bytes (WASM-compatible)
type ArchConfigBundle ¶ added in v0.0.7
type ArchConfigBundle struct {
Version int `json:"version"`
Configs []ArchConfig `json:"configs"`
}
ArchConfigBundle holds multiple ArchConfigs for serialization
func ArchConfigBundleFromBytes ¶ added in v0.0.7
func ArchConfigBundleFromBytes(data []byte) (*ArchConfigBundle, error)
ArchConfigBundleFromBytes deserializes an ArchConfigBundle from bytes
func LoadArchConfigBundle ¶ added in v0.0.7
func LoadArchConfigBundle(filename string) (*ArchConfigBundle, error)
LoadArchConfigBundle loads an ArchConfigBundle from a file
func (*ArchConfigBundle) SaveToFile ¶ added in v0.0.7
func (b *ArchConfigBundle) SaveToFile(filename string) error
SaveToFile saves the bundle to a file
func (*ArchConfigBundle) ToBytes ¶ added in v0.0.7
func (b *ArchConfigBundle) ToBytes() ([]byte, error)
ToBytes serializes the bundle to bytes (WASM-compatible)
func (*ArchConfigBundle) ToJSON ¶ added in v0.0.7
func (b *ArchConfigBundle) ToJSON() (string, error)
ToJSON returns a pretty-printed JSON string (useful for debugging)
type ArchGenOptions ¶ added in v0.0.7
type ArchGenOptions struct {
DModels []int // Available model dimensions (default: {64, 64, 64, 32} = 75% D64)
NumHeads []int // Available head counts (default: {4, 8})
GridShapes []GridShape // Available grid shapes (default: StandardGridShapes)
LRMin float32 // Minimum learning rate (default: 0.0001)
LRMax float32 // Maximum learning rate (default: 0.01)
InitScale float32 // Weight initialization scale (default: 0.5)
BudgetMin float32 // Minimum budget scale (default: 0.5)
BudgetMax float32 // Maximum budget scale (default: 1.0)
// Probability distributions (weights, not percentages - will be normalized)
// BrainDistribution: probability of each BrainType being selected
// Default: MHA=30%, LSTM=25%, RNN=15%, Dense=15%, SwiGLU=8%, NormDense=7%
BrainDistribution []float64
// CombineDistribution: probability of each combine mode
// Default: avg=35%, add=30%, concat=20%, grid_scatter=15%
CombineDistribution []float64
// ActivationDistribution: probability of each activation type
// Default: uniform distribution across all 5 types
ActivationDistribution []float64
// DTypes: available numerical types for weights (default: float32 only)
// Options: "float32", "float64", "int32", "int16", "int8"
DTypes []string
// DTypeDistribution: probability of each dtype being selected
// Default: 100% float32
DTypeDistribution []float64
}
ArchGenOptions configures the random architecture generator
func DefaultArchGenOptions ¶ added in v0.0.7
func DefaultArchGenOptions() *ArchGenOptions
DefaultArchGenOptions returns options matching test43a's distributions
type AttentionWeights ¶ added in v0.0.7
type AttentionWeights[T Numeric] struct { QWeights, KWeights, VWeights *Tensor[T] QBias, KBias, VBias *Tensor[T] OutputWeight, OutputBias *Tensor[T] DModel, NumHeads, NumKVHeads, HeadDim int }
AttentionWeights holds all weights for attention in a type-generic way.
type Backend ¶ added in v0.0.7
type Backend[T Numeric] interface { // MatMul performs matrix multiplication: result = a @ b // a shape: [M, K], b shape: [K, N] -> result shape: [M, N] MatMul(a, b *Tensor[T]) *Tensor[T] // MatMulAdd performs matrix multiplication with bias: result = a @ b + c MatMulAdd(a, b, c *Tensor[T]) *Tensor[T] // Add performs element-wise addition: result = a + b Add(a, b *Tensor[T]) *Tensor[T] // Scale multiplies all elements by a scalar: result = t * factor Scale(t *Tensor[T], factor T) *Tensor[T] // Activate applies activation function element-wise Activate(t *Tensor[T], actType ActivationType) *Tensor[T] // ActivateDerivative computes activation derivative for backprop ActivateDerivative(preAct *Tensor[T], actType ActivationType) *Tensor[T] // OuterProduct computes outer product: result[i,j] = a[i] * b[j] // Essential for NeuralTween weight updates OuterProduct(a, b *Tensor[T]) *Tensor[T] // Sum computes the sum of all elements Sum(t *Tensor[T]) T // Mean computes the mean of all elements Mean(t *Tensor[T]) T // Sqrt computes element-wise square root Sqrt(t *Tensor[T]) *Tensor[T] }
Backend defines the interface for tensor operations. This abstraction allows swapping implementations (CPU, GPU, quantized) without changing layer code.
type BrainType ¶ added in v0.0.7
type BrainType int
BrainType defines the type of layer to use in a brain cell
type CPUBackend ¶ added in v0.0.7
type CPUBackend[T Numeric] struct{}
CPUBackend provides CPU-based tensor operations.
func NewCPUBackend ¶ added in v0.0.7
func NewCPUBackend[T Numeric]() *CPUBackend[T]
NewCPUBackend creates a new CPU backend.
func (*CPUBackend[T]) Activate ¶ added in v0.0.7
func (b *CPUBackend[T]) Activate(t *Tensor[T], actType ActivationType) *Tensor[T]
Activate applies activation function element-wise
func (*CPUBackend[T]) ActivateDerivative ¶ added in v0.0.7
func (b *CPUBackend[T]) ActivateDerivative(preAct *Tensor[T], actType ActivationType) *Tensor[T]
ActivateDerivative computes activation derivative for backprop
func (*CPUBackend[T]) Add ¶ added in v0.0.7
func (b *CPUBackend[T]) Add(a, other *Tensor[T]) *Tensor[T]
Add performs element-wise addition: result = a + b
func (*CPUBackend[T]) MatMul ¶ added in v0.0.7
func (b *CPUBackend[T]) MatMul(a, mat *Tensor[T]) *Tensor[T]
MatMul performs matrix multiplication: result = a @ b
func (*CPUBackend[T]) MatMulAdd ¶ added in v0.0.7
func (b *CPUBackend[T]) MatMulAdd(a, mat, c *Tensor[T]) *Tensor[T]
MatMulAdd performs matrix multiplication with bias: result = a @ b + c
func (*CPUBackend[T]) Mean ¶ added in v0.0.7
func (b *CPUBackend[T]) Mean(t *Tensor[T]) T
Mean computes the mean of all elements
func (*CPUBackend[T]) OuterProduct ¶ added in v0.0.7
func (b *CPUBackend[T]) OuterProduct(a, other *Tensor[T]) *Tensor[T]
OuterProduct computes outer product: result[i,j] = a[i] * b[j]
func (*CPUBackend[T]) Scale ¶ added in v0.0.7
func (b *CPUBackend[T]) Scale(t *Tensor[T], factor T) *Tensor[T]
Scale multiplies all elements by a scalar: result = t * factor
func (*CPUBackend[T]) Sqrt ¶ added in v0.0.7
func (b *CPUBackend[T]) Sqrt(t *Tensor[T]) *Tensor[T]
Sqrt computes element-wise square root
func (*CPUBackend[T]) Sum ¶ added in v0.0.7
func (b *CPUBackend[T]) Sum(t *Tensor[T]) T
Sum computes the sum of all elements
type ChannelObserver ¶ added in v0.0.6
type ChannelObserver struct {
Events chan LayerEvent
}
ChannelObserver sends events to a Go channel (for internal processing)
func NewChannelObserver ¶ added in v0.0.6
func NewChannelObserver(bufferSize int) *ChannelObserver
func (*ChannelObserver) OnBackward ¶ added in v0.0.6
func (o *ChannelObserver) OnBackward(event LayerEvent)
func (*ChannelObserver) OnForward ¶ added in v0.0.6
func (o *ChannelObserver) OnForward(event LayerEvent)
type ComparisonResult ¶ added in v0.0.6
type ComparisonResult struct {
Name string `json:"name"` // Network/test name
NumLayers int `json:"num_layers"` // Number of layers tested
NormalBP TrainingMetrics `json:"normal_bp"` // Normal backprop (no stepping)
NormalTween TrainingMetrics `json:"normal_tween"` // Normal tween training
StepBP TrainingMetrics `json:"step_bp"` // Stepping + backprop
StepTween TrainingMetrics `json:"step_tween"` // Stepping + tween (legacy mode)
StepTweenChain TrainingMetrics `json:"step_tween_chain"` // Stepping + tween (chain rule mode)
BatchTween TrainingMetrics `json:"batch_tween"` // Batch tween (non-stepping)
StepBatchTween TrainingMetrics `json:"step_batch_tween"` // Stepping + batch tween
}
ComparisonResult holds results from comparing multiple training methods
func (*ComparisonResult) DetermineBest ¶ added in v0.0.6
func (cr *ComparisonResult) DetermineBest() string
DetermineBest returns the name of the best performing training method
type ConsoleObserver ¶ added in v0.0.6
type ConsoleObserver struct {
Verbose bool // If true, print input/output data (can be large!)
}
ConsoleObserver prints layer events to stdout
func (*ConsoleObserver) OnBackward ¶ added in v0.0.6
func (o *ConsoleObserver) OnBackward(event LayerEvent)
func (*ConsoleObserver) OnForward ¶ added in v0.0.6
func (o *ConsoleObserver) OnForward(event LayerEvent)
type ConstantScheduler ¶ added in v0.0.5
type ConstantScheduler struct {
// contains filtered or unexported fields
}
func NewConstantScheduler ¶ added in v0.0.5
func NewConstantScheduler(baseLR float32) *ConstantScheduler
func (*ConstantScheduler) GetLR ¶ added in v0.0.5
func (s *ConstantScheduler) GetLR(step int) float32
func (*ConstantScheduler) Name ¶ added in v0.0.5
func (s *ConstantScheduler) Name() string
func (*ConstantScheduler) Reset ¶ added in v0.0.5
func (s *ConstantScheduler) Reset()
type CorrelationMatrix ¶ added in v0.0.7
type CorrelationMatrix struct {
Labels []string `json:"labels"` // Feature/column names
Matrix [][]float64 `json:"matrix"` // NxN correlation values (-1 to 1)
N int `json:"n"` // Number of features
Samples int `json:"samples"` // Number of data samples used
}
CorrelationMatrix holds the computed correlation data
type CorrelationResult ¶ added in v0.0.7
type CorrelationResult struct {
Correlation CorrelationMatrix `json:"correlation"`
Means []float64 `json:"means"` // Mean of each feature
StdDevs []float64 `json:"std_devs"` // Std deviation of each feature
Mins []float64 `json:"mins"` // Min value per feature
Maxs []float64 `json:"maxs"` // Max value per feature
}
CorrelationResult wraps the matrix with additional statistics
func ComputeCorrelationMatrix ¶ added in v0.0.7
func ComputeCorrelationMatrix(data [][]float32, labels []string) *CorrelationResult
ComputeCorrelationMatrix calculates Pearson correlation matrix for a dataset. data: 2D array where rows are samples and columns are features labels: optional feature names (if nil, uses "Feature_0", "Feature_1", etc.) Returns a CorrelationResult with the matrix and feature statistics.
func ComputeCorrelationMatrixFloat64 ¶ added in v0.0.7
func ComputeCorrelationMatrixFloat64(data [][]float64, labels []string) *CorrelationResult
ComputeCorrelationMatrixFloat64 is the float64 version for higher precision
func ComputeSpearmanMatrix ¶ added in v0.0.7
func ComputeSpearmanMatrix(data [][]float32, labels []string) *CorrelationResult
ComputeSpearmanMatrix calculates Spearman rank correlation (for non-linear relationships)
func CorrelationResultFromJSON ¶ added in v0.0.7
func CorrelationResultFromJSON(jsonStr string) (*CorrelationResult, error)
CorrelationResultFromJSON deserializes from JSON string
func (*CorrelationResult) GetCorrelationsWithFeature ¶ added in v0.0.7
func (cr *CorrelationResult) GetCorrelationsWithFeature(featureName string) []FeaturePair
GetCorrelationsWithFeature returns correlations for a specific feature, sorted by strength
func (*CorrelationResult) GetStrongCorrelations ¶ added in v0.0.7
func (cr *CorrelationResult) GetStrongCorrelations(threshold float64) []FeaturePair
GetStrongCorrelations returns pairs with |correlation| >= threshold, sorted by strength
func (*CorrelationResult) ToJSON ¶ added in v0.0.7
func (cr *CorrelationResult) ToJSON() (string, error)
ToJSON serializes the correlation result to JSON string
func (*CorrelationResult) ToJSONCompact ¶ added in v0.0.7
func (cr *CorrelationResult) ToJSONCompact() (string, error)
ToJSONCompact serializes without indentation (smaller size)
type CosineAnnealingScheduler ¶ added in v0.0.5
type CosineAnnealingScheduler struct {
// contains filtered or unexported fields
}
func NewCosineAnnealingScheduler ¶ added in v0.0.5
func NewCosineAnnealingScheduler(initialLR, minLR float32, totalSteps int) *CosineAnnealingScheduler
func NewCosineAnnealingWithWarmRestarts ¶ added in v0.0.5
func NewCosineAnnealingWithWarmRestarts(initialLR, minLR float32, restartPeriod int) *CosineAnnealingScheduler
func (*CosineAnnealingScheduler) GetLR ¶ added in v0.0.5
func (s *CosineAnnealingScheduler) GetLR(step int) float32
func (*CosineAnnealingScheduler) Name ¶ added in v0.0.5
func (s *CosineAnnealingScheduler) Name() string
func (*CosineAnnealingScheduler) Reset ¶ added in v0.0.5
func (s *CosineAnnealingScheduler) Reset()
type DType ¶ added in v0.0.7
type DType int
DType defines the numerical type stored in a Tensor
const ( DTypeFloat32 DType = 0 // Standard 32-bit float (default) DTypeFloat64 DType = 1 // 64-bit float (high precision) DTypeFloat16 DType = 2 // 16-bit float storage (computation upcasts to F32) DTypeInt8 DType = 3 // 8-bit int (quantized, requires scale factor) DTypeInt16 DType = 4 // 16-bit int DTypeInt32 DType = 5 // 32-bit int DTypeInt64 DType = 6 // 64-bit int DTypeUint8 DType = 7 // 8-bit unsigned int DTypeUint16 DType = 8 // 16-bit unsigned int DTypeUint32 DType = 9 // 32-bit unsigned int DTypeUint64 DType = 10 // 64-bit unsigned int )
type DataLoader ¶ added in v0.0.5
DataLoader is a function that returns input and target for a given step Returns nil to signal end of data
type DetectedTensor ¶ added in v0.0.7
type DetectedTensor struct {
Name string // Original tensor name
Shape []int // Tensor dimensions
ShapeType string // Shape-based type: "1d", "2d", "3d", "4d"
LoomType LayerType // Inferred loom LayerType
LoomTypeName string // Human readable type name
SpecialType string // "embedding", "bias", or empty for normal layers
InSize int // Input dimension
OutSize int // Output dimension
KernelH int // For conv layers
KernelW int // For conv layers
CanLoad bool // Whether loom can load this layer
}
DetectedTensor represents a tensor with inferred layer type
func InspectModel ¶ added in v0.0.7
func InspectModel(weightsData []byte, configData []byte) ([]DetectedTensor, int64, error)
InspectModel inspects a safetensors file and returns detailed tensor info Returns all detected tensors with their inferred loom types configData is optional (can be nil) but improves detection accuracy
type DeviationBucket ¶
type DeviationBucket struct {
RangeMin float64 `json:"range_min"`
RangeMax float64 `json:"range_max"`
Count int `json:"count"`
Samples []int `json:"samples"` // Track which sample indices fall in this bucket
}
DeviationBucket represents a specific deviation percentage range
func (DeviationBucket) MarshalJSON ¶ added in v0.0.4
func (db DeviationBucket) MarshalJSON() ([]byte, error)
MarshalJSON implements custom JSON marshaling to handle infinity values
type DeviationMetrics ¶
type DeviationMetrics struct {
Buckets map[string]*DeviationBucket `json:"buckets"`
Score float64 `json:"score"` // Average quality score (0-100)
TotalSamples int `json:"total_samples"`
Failures int `json:"failures"` // Count of 100%+ deviations
Results []PredictionResult `json:"results"` // All individual results
AverageDeviation float64 `json:"avg_deviation"` // Mean deviation across all samples
}
DeviationMetrics stores the full model performance breakdown
func EvaluateModel ¶
func EvaluateModel(expectedOutputs, actualOutputs []float64) (*DeviationMetrics, error)
EvaluateModel evaluates model performance on a batch of predictions Returns detailed deviation metrics
func LoadMetrics ¶
func LoadMetrics(filepath string) (*DeviationMetrics, error)
LoadMetrics loads deviation metrics from a JSON file
func NewDeviationMetrics ¶
func NewDeviationMetrics() *DeviationMetrics
NewDeviationMetrics initializes an empty metrics struct
func (*DeviationMetrics) ComputeFinalMetrics ¶
func (dm *DeviationMetrics) ComputeFinalMetrics()
ComputeFinalMetrics calculates final scores and averages
func (*DeviationMetrics) GetSamplesInBucket ¶
func (dm *DeviationMetrics) GetSamplesInBucket(bucketName string) []int
GetSamplesInBucket returns the sample indices that fall within a specific bucket
func (*DeviationMetrics) GetWorstSamples ¶
func (dm *DeviationMetrics) GetWorstSamples(n int) []PredictionResult
GetWorstSamples returns the N samples with the highest deviation
func (*DeviationMetrics) PrintSummary ¶
func (dm *DeviationMetrics) PrintSummary()
PrintSummary prints a human-readable summary of the deviation metrics
func (*DeviationMetrics) SaveMetrics ¶
func (dm *DeviationMetrics) SaveMetrics(filepath string) error
SaveMetrics saves the metrics to a JSON file
func (*DeviationMetrics) UpdateMetrics ¶
func (dm *DeviationMetrics) UpdateMetrics(result PredictionResult)
UpdateMetrics updates the metrics with a single prediction result
type EncodedWeights ¶
EncodedWeights stores weights in base64-encoded JSON format
type EnsembleMatch ¶ added in v0.0.7
type EnsembleMatch struct {
ModelA string
ModelB string
Coverage float64 // Combined coverage (0.0 - 1.0)
Overlap float64 // Percentage of samples both got right
}
EnsembleMatch represents a pair of models that complement each other
func FindComplementaryMatches ¶ added in v0.0.7
func FindComplementaryMatches(models []ModelPerformance, minCoverage float64) []EnsembleMatch
FindComplementaryMatches identifies pairs of models whose combined coverage is maximized (ideally 100%). models: List of model performances minCoverage: Minimum combined coverage to report (e.g., 0.95 for 95%)
type ExponentialDecayScheduler ¶ added in v0.0.5
type ExponentialDecayScheduler struct {
// contains filtered or unexported fields
}
func NewExponentialDecayScheduler ¶ added in v0.0.5
func NewExponentialDecayScheduler(initialLR, decayRate float32, decaySteps int) *ExponentialDecayScheduler
func (*ExponentialDecayScheduler) GetLR ¶ added in v0.0.5
func (s *ExponentialDecayScheduler) GetLR(step int) float32
func (*ExponentialDecayScheduler) Name ¶ added in v0.0.5
func (s *ExponentialDecayScheduler) Name() string
func (*ExponentialDecayScheduler) Reset ¶ added in v0.0.5
func (s *ExponentialDecayScheduler) Reset()
type FeaturePair ¶ added in v0.0.7
type FeaturePair struct {
Feature1 string `json:"feature1"`
Feature2 string `json:"feature2"`
Correlation float64 `json:"correlation"`
AbsCorr float64 `json:"abs_correlation"`
}
FeaturePair represents a correlation between two specific features
type GPUDeviceInfo ¶
type GPUDeviceInfo struct {
Device *wgpu.Device
Queue *wgpu.Queue
WorkgroupX uint32
// contains filtered or unexported fields
}
GPUDeviceInfo holds WebGPU resources for GPU execution
type GenericBackwardResult ¶ added in v0.0.7
type GenericBackwardResult[T Numeric] struct { GradInput *Tensor[T] KernelGrads *Tensor[T] BiasGrads *Tensor[T] }
GenericBackwardResult holds the results of a generic backward pass.
type GenericModelConfig ¶ added in v0.0.7
type GenericModelConfig struct {
ModelType string `json:"model_type"`
Architectures []string `json:"architectures"`
HiddenSize int `json:"hidden_size"`
IntermediateSize int `json:"intermediate_size"`
NumLayers int `json:"num_hidden_layers"`
NumHeads int `json:"num_attention_heads"`
NumKVHeads int `json:"num_key_value_heads"`
NumChannels int `json:"num_channels"`
ImageSize int `json:"image_size"`
PatchSize int `json:"patch_size"`
NumClasses int `json:"num_labels"`
VocabSize int `json:"vocab_size"`
RMSNormEps float64 `json:"rms_norm_eps"`
LayerNormEps float64 `json:"layer_norm_eps"`
}
GenericModelConfig holds model configuration parsed from config.json
type GenericStepState ¶ added in v0.0.7
type GenericStepState[T Numeric] struct { // Current data at each layer (input/output buffers) LayerData []*Tensor[T] // Backward context for each layer (intermediate states needed for backprop) // Can be *Tensor[T], map[string]*Tensor[T], or other types depending on layer BackwardContext []any // Residual connections tracking Residuals []*Tensor[T] // Step counter StepCount uint64 // contains filtered or unexported fields }
GenericStepState holds the current state of each layer for any numeric type.
func NewGenericStepState ¶ added in v0.0.7
func NewGenericStepState[T Numeric](totalLayers, inputSize int) *GenericStepState[T]
NewGenericStepState creates a new generic step state for the given network.
func (*GenericStepState[T]) GetLayerOutput ¶ added in v0.0.7
func (state *GenericStepState[T]) GetLayerOutput(layerIdx int) *Tensor[T]
GetLayerOutput retrieves the current output of a specific layer
func (*GenericStepState[T]) GetOutput ¶ added in v0.0.7
func (state *GenericStepState[T]) GetOutput() *Tensor[T]
GetOutput retrieves the output from the final layer
func (*GenericStepState[T]) SetInput ¶ added in v0.0.7
func (state *GenericStepState[T]) SetInput(input *Tensor[T])
SetInput sets the input data for the network (layer 0)
type GenericTweenState ¶ added in v0.0.7
type GenericTweenState[T Numeric] struct { // Forward pass: what each layer ACTUALLY produces (top-down) ForwardActs []*Tensor[T] // Backward pass: what each layer SHOULD produce (bottom-up from expected) BackwardTargets []*Tensor[T] // Link budget per layer: how much information is preserved (0-1) LinkBudgets []float32 // Gap at each layer: magnitude of difference between forward and backward Gaps []float32 // Momentum for stable updates (Stored as float32 for smoothness) WeightVel [][]float32 BiasVel [][]float32 // Config holds all tunable parameters Config *TweenConfig TotalLayers int TweenSteps int LossHistory []float32 // === CHAIN RULE SUPPORT === ChainGradients [][]float32 // Gradients are always small floats // === GRADIENT EXPLOSION DETECTION === PrevAvgGap float32 GapGrowthRate float32 ExplosionCount int AdaptiveRate float32 BaselineGap float32 GapSamples int }
GenericTweenState holds bidirectional analysis state for any numeric type.
func NewGenericTweenState ¶ added in v0.0.7
func NewGenericTweenState[T Numeric](n *Network, config *TweenConfig) *GenericTweenState[T]
NewGenericTweenState creates a generic tween state.
func (*GenericTweenState[T]) BackwardPassChainRule ¶ added in v0.0.7
func (ts *GenericTweenState[T]) BackwardPassChainRule(n *Network, targetClass int, outputSize int)
BackwardPassChainRule: Generates float32 gradients based on T inputs
func (*GenericTweenState[T]) CalculateLinkBudgets ¶ added in v0.0.7
func (ts *GenericTweenState[T]) CalculateLinkBudgets()
CalculateLinkBudgets measures information preservation
func (*GenericTweenState[T]) ComputeGaps ¶ added in v0.0.7
func (ts *GenericTweenState[T]) ComputeGaps()
ComputeGaps calculates gaps (alias for compatibility)
func (*GenericTweenState[T]) ForwardPass ¶ added in v0.0.7
func (ts *GenericTweenState[T]) ForwardPass(n *Network, input *Tensor[T], backend Backend[T]) *Tensor[T]
ForwardPass executes the network forward pass and captures activations.
func (*GenericTweenState[T]) GetGap ¶ added in v0.0.7
func (ts *GenericTweenState[T]) GetGap(layerIdx int) float32
GetGap returns the gap at a specific layer.
func (*GenericTweenState[T]) SetForwardActivation ¶ added in v0.0.7
func (ts *GenericTweenState[T]) SetForwardActivation(layerIdx int, activation *Tensor[T])
SetForwardActivation sets the forward activation for a layer.
func (*GenericTweenState[T]) TweenStep ¶ added in v0.0.7
func (ts *GenericTweenState[T]) TweenStep(n *Network, input *Tensor[T], targetClass int, outputSize int, rate float32, backend Backend[T]) float32
TweenStep: One complete bidirectional iteration with explosion detection
func (*GenericTweenState[T]) TweenWeightsChainRule ¶ added in v0.0.7
func (ts *GenericTweenState[T]) TweenWeightsChainRule(n *Network, rate float32)
TweenWeightsChainRule updates weights using chain rule gradients
type GridPosition ¶ added in v0.0.4
type GridPosition struct {
BranchIndex int // Which branch this position is for
TargetRow int // Grid row to place output
TargetCol int // Grid column to place output
TargetLayer int // Layer index within that cell
}
GridPosition specifies where a parallel branch output should be placed in the grid
type GridPositionDef ¶ added in v0.0.4
type GridPositionDef struct {
BranchIndex int `json:"branch_index"`
TargetRow int `json:"target_row"`
TargetCol int `json:"target_col"`
TargetLayer int `json:"target_layer"`
}
GridPositionDef is the JSON representation of a grid position
type HTTPObserver ¶ added in v0.0.6
type HTTPObserver struct {
URL string
Timeout time.Duration
// contains filtered or unexported fields
}
HTTPObserver sends layer events to an HTTP endpoint (for visualization)
func NewHTTPObserver ¶ added in v0.0.6
func NewHTTPObserver(url string) *HTTPObserver
func (*HTTPObserver) OnBackward ¶ added in v0.0.6
func (o *HTTPObserver) OnBackward(event LayerEvent)
func (*HTTPObserver) OnForward ¶ added in v0.0.6
func (o *HTTPObserver) OnForward(event LayerEvent)
type ImportedLayerConfig ¶ added in v0.0.3
type ImportedLayerConfig struct {
Type string `json:"type"`
Activation string `json:"activation,omitempty"`
// Dense layer
InputSize int `json:"input_size,omitempty"`
OutputSize int `json:"output_size,omitempty"`
Kernel [][]float32 `json:"kernel,omitempty"`
Bias []float32 `json:"bias,omitempty"`
// Multi-head attention
DModel int `json:"d_model,omitempty"`
NumHeads int `json:"num_heads,omitempty"`
HeadDim int `json:"head_dim,omitempty"`
SeqLength int `json:"seq_length,omitempty"`
QWeights [][]float32 `json:"q_weights,omitempty"`
QBias []float32 `json:"q_bias,omitempty"`
KWeights [][]float32 `json:"k_weights,omitempty"`
KBias []float32 `json:"k_bias,omitempty"`
VWeights [][]float32 `json:"v_weights,omitempty"`
VBias []float32 `json:"v_bias,omitempty"`
OutputWeight [][]float32 `json:"output_weight,omitempty"`
OutputBias []float32 `json:"output_bias,omitempty"`
// LayerNorm
NormSize int `json:"norm_size,omitempty"`
Gamma []float32 `json:"gamma,omitempty"`
Beta []float32 `json:"beta,omitempty"`
Epsilon float32 `json:"epsilon,omitempty"`
}
ImportedLayerConfig represents a layer from an imported model
type ImportedModelFormat ¶ added in v0.0.3
type ImportedModelFormat struct {
InputSize int `json:"input_size"`
GridRows int `json:"grid_rows"`
GridCols int `json:"grid_cols"`
LayersPerCell int `json:"layers_per_cell"`
BatchSize int `json:"batch_size"`
UseGPU bool `json:"use_gpu"`
Layers []ImportedLayerConfig `json:"layers"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
ImportedModelFormat represents models converted from PyTorch/TensorFlow
type LRScheduler ¶ added in v0.0.5
type LRScheduler interface {
// GetLR returns the learning rate for the given step
GetLR(step int) float32
// Reset resets the scheduler state
Reset()
// Name returns the scheduler name
Name() string
}
LRScheduler interface defines learning rate scheduling strategies
type LSTMWeights ¶ added in v0.0.7
type LSTMWeights[T Numeric] struct { WeightIH_i, WeightHH_i, BiasH_i *Tensor[T] // Input gate WeightIH_f, WeightHH_f, BiasH_f *Tensor[T] // Forget gate WeightIH_g, WeightHH_g, BiasH_g *Tensor[T] // Cell candidate gate WeightIH_o, WeightHH_o, BiasH_o *Tensor[T] // Output gate }
LSTMWeights holds all LSTM gate weights for type-generic operations.
type LayerConfig ¶
type LayerConfig struct {
Type LayerType
Activation ActivationType
// Conv2D specific parameters
KernelSize int // Size of convolution kernel (e.g., 3 for 3x3)
Stride int // Stride for convolution
Padding int // Padding for convolution
Filters int // Number of output filters/channels
Kernel []float32 // Convolution kernel weights [filters][inChannels][kernelH][kernelW]
Bias []float32 // Bias terms [filters]
// Shape information (for Conv2D)
InputHeight int
InputWidth int
InputChannels int
OutputHeight int
OutputWidth int
// Multi-Head Attention specific parameters
NumHeads int // Number of attention heads (query heads)
NumKVHeads int // Number of key/value heads (for GQA, 0 = same as NumHeads)
HeadDim int // Dimension per head (dModel / numHeads)
DModel int // Model dimension (embedding size)
SeqLength int // Sequence length
QWeights []float32 // Query projection weights [dModel][dModel]
KWeights []float32 // Key projection weights [dModel][dModel or smaller for GQA]
VWeights []float32 // Value projection weights [dModel][dModel or smaller for GQA]
OutputWeight []float32 // Output projection weights [dModel][dModel]
QBias []float32 // Query bias [dModel]
KBias []float32 // Key bias [dModel or smaller for GQA]
VBias []float32 // Value bias [dModel or smaller for GQA]
OutputBias []float32 // Output bias [dModel]
// RNN/LSTM specific parameters
HiddenSize int // Hidden state size
RNNInputSize int // Input feature size (different from network InputSize)
WeightIH []float32 // Input-to-hidden weights [hiddenSize][inputSize]
WeightHH []float32 // Hidden-to-hidden weights [hiddenSize][hiddenSize]
BiasH []float32 // Hidden bias [hiddenSize]
// LSTM specific parameters (gates: i=input, f=forget, g=cell, o=output)
WeightIH_i []float32 // Input gate: input-to-hidden [hiddenSize][inputSize]
WeightHH_i []float32 // Input gate: hidden-to-hidden [hiddenSize][hiddenSize]
BiasH_i []float32 // Input gate bias [hiddenSize]
WeightIH_f []float32 // Forget gate: input-to-hidden [hiddenSize][inputSize]
WeightHH_f []float32 // Forget gate: hidden-to-hidden [hiddenSize][hiddenSize]
BiasH_f []float32 // Forget gate bias [hiddenSize]
WeightIH_g []float32 // Cell gate: input-to-hidden [hiddenSize][inputSize]
WeightHH_g []float32 // Cell gate: hidden-to-hidden [hiddenSize][hiddenSize]
BiasH_g []float32 // Cell gate bias [hiddenSize]
WeightIH_o []float32 // Output gate: input-to-hidden [hiddenSize][inputSize]
WeightHH_o []float32 // Output gate: hidden-to-hidden [hiddenSize][hiddenSize]
BiasH_o []float32 // Output gate bias [hiddenSize]
// Softmax specific parameters
SoftmaxVariant SoftmaxType // Which softmax variant to use
SoftmaxRows int // For grid softmax: number of rows (agents/groups)
SoftmaxCols int // For grid softmax: number of columns (actions per row)
Temperature float32 // For temperature softmax (default 1.0)
GumbelNoise bool // For Gumbel softmax: whether to add noise
Mask []bool // For masked softmax: which positions to include
HierarchyLevels []int // For hierarchical softmax: sizes at each level [strategies, units, actions]
AdaptiveClusters [][]int // For adaptive softmax: item indices per cluster
MixtureWeights []float32 // For mixture softmax: weights for each component
EntmaxAlpha float32 // For entmax: alpha parameter (1.0=softmax, 2.0=sparsemax)
// LayerNorm specific parameters
NormSize int // Size of the normalization dimension
Gamma []float32 // Scale parameters [normSize]
Beta []float32 // Shift parameters [normSize]
Epsilon float32 // Small constant for numerical stability (default 1e-5)
// SwiGLU specific parameters (gated FFN: down_proj(silu(gate_proj(x)) * up_proj(x)))
GateWeights []float32 // Gate projection weights [intermediate][hidden]
UpWeights []float32 // Up projection weights [intermediate][hidden]
DownWeights []float32 // Down projection weights [hidden][intermediate]
GateBias []float32 // Gate bias [intermediate]
UpBias []float32 // Up bias [intermediate]
DownBias []float32 // Down bias [hidden]
// Residual connection
ResidualSkip int // How many layers back to skip for residual (0 = no residual)
// Embedding layer specific parameters
VocabSize int // Size of vocabulary (number of unique tokens)
EmbeddingDim int // Dimension of embedding vectors
EmbeddingWeights []float32 // Embedding lookup table [VocabSize * EmbeddingDim]
// Conv1D specific parameters (for audio/sequence data)
Conv1DFilters int // Number of output filters
Conv1DKernelSize int // Size of 1D kernel
Conv1DStride int // Stride for convolution
Conv1DPadding int // Padding for convolution
Conv1DKernel []float32 // Kernel weights [filters][inChannels][kernelSize]
Conv1DBias []float32 // Bias terms [filters]
Conv1DInChannels int // Input channels
// Parallel layer specific parameters
ParallelBranches []LayerConfig // Sub-layers to run in parallel
CombineMode string // How to combine outputs: "concat", "add", "avg", "grid_scatter"
GridPositions []GridPosition // For grid_scatter: where to place each branch output
GridOutputRows int // For grid_scatter: output grid dimensions
GridOutputCols int
GridOutputLayers int
// Filter combine mode (gated parallel / mixture of experts)
FilterGateConfig *LayerConfig // Gate layer to compute routing weights (Dense, MHA, etc.)
FilterSoftmax SoftmaxType // Softmax variant for gating (default: SoftmaxStandard)
FilterTemperature float32 // Temperature for softmax (lower = sharper selection)
// Observer for debugging/recording (nil = no observation)
Observer LayerObserver
// Grid position (set by Network when layer is accessed)
GridRow int // Row in the grid
GridCol int // Column in the grid
CellLayer int // Layer index within the cell
ModelID string // Identifier for the model (for multi-model visualization)
// Pruning support
IsDisabled bool // If true, this layer acts as an identity function (pass-through)
// Training control
Frozen bool // If true, weights in this layer will not be updated during training
}
LayerConfig holds configuration for a specific layer in the grid
func CallLayerInitFunction ¶ added in v0.0.2
func CallLayerInitFunction(name string, args ...interface{}) (LayerConfig, error)
CallLayerInitFunction calls a layer init function by name with the provided arguments
func GraftNetworks ¶ added in v0.0.7
func GraftNetworks(networks []*Network, combineMode string) (*LayerConfig, error)
GraftNetworks takes multiple heterogeneous networks and grafts their "Hive" (Layer 1) into a single parallel layer. It assumes the networks follow the ef.go structure: Input(0) -> Hive(1) -> Merger(2) -> Output(3). It returns a LayerConfig for a new Super-Hive layer containing all branches from the source hives.
func InitConv1DLayer ¶ added in v0.0.7
func InitConv1DLayer( seqLen, inChannels int, kernelSize, stride, padding, filters int, activation ActivationType, ) LayerConfig
InitConv1DLayer initializes a Conv1D layer with random weights
func InitConv2DLayer ¶
func InitConv2DLayer( inputHeight, inputWidth, inputChannels int, kernelSize, stride, padding, filters int, activation ActivationType, ) LayerConfig
InitConv2DLayer initializes a Conv2D layer with random weights
func InitDenseBrain ¶ added in v0.0.7
func InitDenseBrain(dModel int, activation ActivationType, scale float32) LayerConfig
InitDenseBrain creates a Dense brain layer with specified activation
func InitDenseLayer ¶
func InitDenseLayer(inputSize, outputSize int, activation ActivationType) LayerConfig
InitDenseLayer initializes a dense (fully-connected) layer
func InitDiverseHive ¶ added in v0.0.7
func InitDiverseHive(config ArchConfig) LayerConfig
InitDiverseHive creates a parallel layer with diverse brain types
func InitEmbeddingLayer ¶ added in v0.0.7
func InitEmbeddingLayer(vocabSize, embeddingDim int) LayerConfig
InitEmbeddingLayer initializes an Embedding layer with random weights
func InitEntmaxLayer ¶ added in v0.0.2
func InitEntmaxLayer(alpha float32) LayerConfig
InitEntmaxLayer creates an entmax layer alpha: 1.0=softmax, 1.5=entmax-1.5, 2.0=sparsemax
func InitFilteredParallelLayer ¶ added in v0.0.7
func InitFilteredParallelLayer(branches []LayerConfig, gateInputSize int, softmaxType SoftmaxType, temperature float32) LayerConfig
InitFilteredParallelLayer creates a parallel layer with softmax-gated filtering. branches: the sub-layers to run in parallel gateInputSize: size of input to the gate layer numBranches: must match len(branches), used for gate output size softmaxType: which softmax variant to use for gating
func InitGridSoftmaxLayer ¶ added in v0.0.2
func InitGridSoftmaxLayer(rows, cols int) LayerConfig
InitGridSoftmaxLayer creates a grid softmax layer for multi-agent decisions rows: number of agents/groups cols: number of actions per agent
func InitGumbelSoftmaxLayer ¶ added in v0.0.2
func InitGumbelSoftmaxLayer(temperature float32) LayerConfig
InitGumbelSoftmaxLayer creates a Gumbel softmax layer (adds exploration noise) temperature: controls noise strength
func InitHierarchicalSoftmaxLayer ¶ added in v0.0.2
func InitHierarchicalSoftmaxLayer(levels []int) LayerConfig
InitHierarchicalSoftmaxLayer creates a hierarchical softmax layer levels: sizes at each level, e.g., [3, 3, 4] = 3 strategies × 3 units × 4 actions
func InitLSTMBrain ¶ added in v0.0.7
func InitLSTMBrain(dModel int, scale float32) LayerConfig
InitLSTMBrain creates an LSTM brain layer
func InitLSTMLayer ¶
func InitLSTMLayer(inputSize, hiddenSize, batchSize, seqLength int) LayerConfig
InitLSTMLayer initializes an LSTM layer with Xavier/Glorot initialization LSTM has 4 gates: input (i), forget (f), cell/candidate (g), output (o) inputSize: size of input features hiddenSize: size of hidden state and cell state batchSize: batch size for processing seqLength: length of input sequences
func InitMHABrain ¶ added in v0.0.7
func InitMHABrain(dModel, numHeads int, scale float32) LayerConfig
InitMHABrain creates a Multi-Head Attention brain layer
func InitMaskedSoftmaxLayer ¶ added in v0.0.2
func InitMaskedSoftmaxLayer(maskSize int) LayerConfig
InitMaskedSoftmaxLayer creates a masked softmax layer maskSize: size of the mask array (must match input size)
func InitNormDenseBrain ¶ added in v0.0.7
func InitNormDenseBrain(dModel int, activation ActivationType, scale float32) LayerConfig
InitNormDenseBrain creates a normalized Dense brain layer
func InitRNNBrain ¶ added in v0.0.7
func InitRNNBrain(dModel int, scale float32) LayerConfig
InitRNNBrain creates a simple RNN brain layer
func InitRNNLayer ¶
func InitRNNLayer(inputSize, hiddenSize, batchSize, seqLength int) LayerConfig
InitRNNLayer initializes a Recurrent Neural Network layer with Xavier/Glorot initialization inputSize: size of input features hiddenSize: size of hidden state batchSize: batch size for processing seqLength: length of input sequences
func InitSequentialLayer ¶ added in v0.0.7
func InitSequentialLayer(layers ...LayerConfig) LayerConfig
InitSequentialLayer initializes a sequential layer that runs multiple sub-layers in order. This is useful for grouping layers (e.g. expert + stitch) as a single branch in a parallel layer.
func InitSoftmaxLayer ¶ added in v0.0.2
func InitSoftmaxLayer() LayerConfig
InitSoftmaxLayer creates a standard softmax layer
func InitSparsemaxLayer ¶ added in v0.0.2
func InitSparsemaxLayer() LayerConfig
InitSparsemaxLayer creates a sparsemax layer (can output exact zeros)
func InitStitchLayer ¶ added in v0.0.7
func InitStitchLayer(inputSize, outputSize int) LayerConfig
InitStitchLayer creates a linear dense layer to project dimensionality. It is effectively a Dense layer with linear activation.
func InitSwiGLUBrain ¶ added in v0.0.7
func InitSwiGLUBrain(dModel int, scale float32) LayerConfig
InitSwiGLUBrain creates a SwiGLU-style brain layer
func InitTemperatureSoftmaxLayer ¶ added in v0.0.2
func InitTemperatureSoftmaxLayer(temperature float32) LayerConfig
InitTemperatureSoftmaxLayer creates a temperature-scaled softmax layer temperature: controls distribution sharpness (0.1=sharp, 1.0=normal, 5.0=smooth)
func (*LayerConfig) SetMask ¶ added in v0.0.2
func (config *LayerConfig) SetMask(mask []bool) error
SetMask updates the mask for a masked softmax layer
func (*LayerConfig) SetTemperature ¶ added in v0.0.2
func (config *LayerConfig) SetTemperature(temp float32)
SetTemperature updates the temperature parameter
type LayerDefinition ¶
type LayerDefinition struct {
Type string `json:"type"`
Activation string `json:"activation"`
// Dense layer fields (also shared with RNN/LSTM)
Width int `json:"width,omitempty"`
Height int `json:"height,omitempty"`
InputSize int `json:"input_size,omitempty"` // Used for Dense, RNN, LSTM
OutputSize int `json:"output_size,omitempty"` // Used for Dense
// Conv2D fields
InputChannels int `json:"input_channels,omitempty"`
Filters int `json:"filters,omitempty"`
KernelSize int `json:"kernel_size,omitempty"`
Stride int `json:"stride,omitempty"`
Padding int `json:"padding,omitempty"`
InputHeight int `json:"input_height,omitempty"`
InputWidth int `json:"input_width,omitempty"`
OutputHeight int `json:"output_height,omitempty"`
OutputWidth int `json:"output_width,omitempty"`
// MHA fields
DModel int `json:"d_model,omitempty"`
NumHeads int `json:"num_heads,omitempty"`
SeqLength int `json:"seq_length,omitempty"`
// RNN/LSTM fields
HiddenSize int `json:"hidden_size,omitempty"`
// Softmax fields
SoftmaxVariant string `json:"softmax_variant,omitempty"`
SoftmaxRows int `json:"softmax_rows,omitempty"`
SoftmaxCols int `json:"softmax_cols,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
GumbelNoise bool `json:"gumbel_noise,omitempty"`
Mask []bool `json:"mask,omitempty"`
HierarchyLevels []int `json:"hierarchy_levels,omitempty"`
AdaptiveClusters [][]int `json:"adaptive_clusters,omitempty"`
MixtureWeights []float32 `json:"mixture_weights,omitempty"`
EntmaxAlpha float32 `json:"entmax_alpha,omitempty"`
// Normalization fields
NormSize int `json:"norm_size,omitempty"`
Epsilon float32 `json:"epsilon,omitempty"`
// Embedding fields
VocabSize int `json:"vocab_size,omitempty"`
EmbeddingDim int `json:"embedding_dim,omitempty"`
// Conv1D fields
InputLength int `json:"input_length,omitempty"`
// Parallel layer fields
Branches []LayerDefinition `json:"branches,omitempty"`
CombineMode string `json:"combine_mode,omitempty"` // "concat", "add", "avg", "grid_scatter"
GridPositions []GridPositionDef `json:"grid_positions,omitempty"`
GridOutputRows int `json:"grid_output_rows,omitempty"`
GridOutputCols int `json:"grid_output_cols,omitempty"`
GridOutputLayers int `json:"grid_output_layers,omitempty"`
}
LayerDefinition defines a single layer's configuration
type LayerEvent ¶ added in v0.0.6
type LayerEvent struct {
Mode string `json:"mode"` // "normal" or "step"
Type string // "forward", "backward"
LayerIdx int // Which layer in the network (flattened index)
LayerType LayerType // Type of layer
Stats LayerStats // Summary statistics
Input []float32 // Input data (optional, can be nil to save memory)
Output []float32 // Output data (optional, can be nil to save memory)
StepCount uint64 `json:"step_count"` // For step-based execution
// Grid position info for visualization
GridRow int `json:"grid_row"` // Row in the grid
GridCol int `json:"grid_col"` // Column in the grid
CellLayer int `json:"cell_layer"` // Layer index within the cell
ModelID string `json:"model_id"` // Identifier for the model
// Branch tracking for parallel layers
BranchIdx int `json:"branch_idx"` // Which branch within parallel layer (-1 if not a branch)
IsParallelBranch bool `json:"is_parallel_branch"` // True if this is a branch inside a parallel layer
}
LayerEvent represents an event during forward/backward pass
type LayerInitFunction ¶ added in v0.0.2
type LayerInitFunction struct {
Name string
Function interface{} `json:"-"` // Omit from JSON serialization
NumArgs int
ArgTypes []string
}
LayerInitFunction represents a layer initialization function
func ListLayerInitFunctions ¶ added in v0.0.2
func ListLayerInitFunctions() []LayerInitFunction
ListLayerInitFunctions returns metadata about all available layer init functions
type LayerObserver ¶ added in v0.0.6
type LayerObserver interface {
// OnForward is called after a layer's forward pass completes
OnForward(event LayerEvent)
// OnBackward is called after a layer's backward pass completes
OnBackward(event LayerEvent)
}
LayerObserver receives events during network execution Implement this interface for console logging, HTTP streaming, visualization, etc.
type LayerStats ¶ added in v0.0.6
type LayerStats struct {
AvgActivation float32 // Mean activation value
MaxActivation float32 // Maximum activation value
MinActivation float32 // Minimum activation value
ActiveNeurons int // Count of neurons with activation > threshold
TotalNeurons int // Total neuron count
LayerType string // "dense", "conv2d", "attention", etc.
}
LayerStats contains summary statistics for layer activity
type LayerTelemetry ¶ added in v0.0.6
type LayerTelemetry struct {
// Grid position
GridRow int `json:"grid_row"`
GridCol int `json:"grid_col"`
CellLayer int `json:"cell_layer"`
// Layer info
Type string `json:"type"`
Activation string `json:"activation,omitempty"`
Parameters int `json:"parameters"`
// Dimensions (generic)
InputShape []int `json:"input_shape,omitempty"`
OutputShape []int `json:"output_shape,omitempty"`
// For nested/parallel layers
Branches []LayerTelemetry `json:"branches,omitempty"`
CombineMode string `json:"combine_mode,omitempty"` // "concat", "add", "avg", "grid_scatter"
}
LayerTelemetry contains metadata about a specific layer
type LayerType ¶
type LayerType int
LayerType defines the type of neural network layer
const ( LayerDense LayerType = 0 // Dense/Fully-connected layer (element-wise activation) LayerConv2D LayerType = 1 // 2D Convolutional layer LayerMultiHeadAttention LayerType = 2 // Multi-Head Attention layer LayerRNN LayerType = 3 // Recurrent Neural Network layer LayerLSTM LayerType = 4 // Long Short-Term Memory layer LayerSoftmax LayerType = 5 // Softmax layer with multiple variants LayerNorm LayerType = 6 // Layer Normalization LayerResidual LayerType = 7 // Residual/Skip connection (adds stored input) LayerRMSNorm LayerType = 8 // RMS Normalization (Llama-style, no beta) LayerSwiGLU LayerType = 9 // SwiGLU gated activation (gate_proj * silu(up_proj)) LayerParallel LayerType = 10 // Parallel layer (runs multiple sub-layers and concatenates outputs) LayerEmbedding LayerType = 11 // Embedding lookup table (token/position -> vector) LayerConv1D LayerType = 12 // 1D Convolutional layer (for audio/sequence data) LayerSequential LayerType = 13 // Sequential layer (runs multiple sub-layers in sequence) )
func AllSupportedTypes ¶ added in v0.0.7
func AllSupportedTypes() []LayerType
AllSupportedTypes returns all LayerTypes that loom supports
func InferLoomLayerType ¶ added in v0.0.7
func InferLoomLayerType(shape []int, nameHint string, config *GenericModelConfig) (LayerType, bool, string)
InferLoomLayerType tries to determine the loom LayerType from shape, name hint, and config Primary: shape + config-based detection Secondary: name hints for disambiguation Returns: LayerType, canLoad, specialType ("embedding", "bias", or "")
type LayerWeights ¶
type LayerWeights struct {
// Data type for this layer (optional, defaults to float32)
DType string `json:"dtype,omitempty"`
// Dense weights
Biases []float32 `json:"biases,omitempty"`
// Conv2D weights
Kernel []float32 `json:"kernel,omitempty"`
ConvBias []float32 `json:"conv_bias,omitempty"`
// MHA weights
QWeights []float32 `json:"q_weights,omitempty"`
KWeights []float32 `json:"k_weights,omitempty"`
VWeights []float32 `json:"v_weights,omitempty"`
OutputWeight []float32 `json:"output_weight,omitempty"`
QBias []float32 `json:"q_bias,omitempty"`
KBias []float32 `json:"k_bias,omitempty"`
VBias []float32 `json:"v_bias,omitempty"`
OutputBias []float32 `json:"output_bias,omitempty"`
// RNN weights
WeightIH []float32 `json:"weight_ih,omitempty"`
WeightHH []float32 `json:"weight_hh,omitempty"`
BiasH []float32 `json:"bias_h,omitempty"`
// LSTM weights
WeightII []float32 `json:"weight_ii,omitempty"`
WeightIF []float32 `json:"weight_if,omitempty"`
WeightIG []float32 `json:"weight_ig,omitempty"`
WeightIO []float32 `json:"weight_io,omitempty"`
WeightHI []float32 `json:"weight_hi,omitempty"`
WeightHF []float32 `json:"weight_hf,omitempty"`
WeightHG []float32 `json:"weight_hg,omitempty"`
WeightHO []float32 `json:"weight_ho,omitempty"`
BiasI []float32 `json:"bias_i,omitempty"`
BiasF []float32 `json:"bias_f,omitempty"`
BiasG []float32 `json:"bias_g,omitempty"`
BiasO []float32 `json:"bias_o,omitempty"`
// LayerNorm / RMSNorm weights
Gamma []float32 `json:"gamma,omitempty"`
Beta []float32 `json:"beta,omitempty"`
// SwiGLU weights
GateWeights []float32 `json:"gate_weights,omitempty"`
UpWeights []float32 `json:"up_weights,omitempty"`
DownWeights []float32 `json:"down_weights,omitempty"`
GateBias []float32 `json:"gate_bias,omitempty"`
UpBias []float32 `json:"up_bias,omitempty"`
DownBias []float32 `json:"down_bias,omitempty"`
// Parallel layer branch weights (recursive)
BranchWeights []LayerWeights `json:"branch_weights,omitempty"`
}
LayerWeights stores weights for a single layer Note: Currently serialized as float32, but generic tensors can be converted on load
type LinearDecayScheduler ¶ added in v0.0.5
type LinearDecayScheduler struct {
// contains filtered or unexported fields
}
func NewLinearDecayScheduler ¶ added in v0.0.5
func NewLinearDecayScheduler(initialLR, finalLR float32, totalSteps int) *LinearDecayScheduler
func (*LinearDecayScheduler) GetLR ¶ added in v0.0.5
func (s *LinearDecayScheduler) GetLR(step int) float32
func (*LinearDecayScheduler) Name ¶ added in v0.0.5
func (s *LinearDecayScheduler) Name() string
func (*LinearDecayScheduler) Reset ¶ added in v0.0.5
func (s *LinearDecayScheduler) Reset()
type MethodInfo ¶
type MethodInfo struct {
MethodName string `json:"method_name"`
Parameters []ParameterInfo `json:"parameters"`
Returns []string `json:"returns"`
}
MethodInfo represents metadata about a method, including its name, parameters, and parameter types.
type ModelBundle ¶
type ModelBundle struct {
Type string `json:"type"`
Version int `json:"version"`
Models []SavedModel `json:"models"`
}
ModelBundle represents a collection of saved models
func LoadBundle ¶
func LoadBundle(filename string) (*ModelBundle, error)
LoadBundle loads a model bundle from a file
func LoadBundleFromString ¶
func LoadBundleFromString(jsonString string) (*ModelBundle, error)
LoadBundleFromString loads a model bundle from a JSON string This is useful for WASM, CABI pipelines, or embedding models directly in code
func (*ModelBundle) SaveToFile ¶
func (b *ModelBundle) SaveToFile(filename string) error
SaveToFile saves the bundle to a file
func (*ModelBundle) SaveToString ¶
func (b *ModelBundle) SaveToString() (string, error)
SaveToString converts the bundle to a JSON string This is useful for WASM, CABI pipelines, or returning models over network
type ModelPerformance ¶ added in v0.0.7
type ModelPerformance struct {
ModelID string
// Mask is a comprehensive boolean vector where Mask[i] is true if the model
// correctly handled sample i.
Mask []bool
}
ModelPerformance holds the correctness mask for a specific model
type ModelSizeInfo ¶ added in v0.0.7
type ModelSizeInfo struct {
DType string `json:"dtype"`
TotalWeights int `json:"total_weights"`
BytesPerWeight int `json:"bytes_per_weight"`
TotalBytes int `json:"total_bytes"`
Base64Bytes int `json:"base64_bytes"` // After base64 encoding (~33% larger)
}
ModelSizeInfo contains information about model storage size
type ModelTelemetry ¶ added in v0.0.6
type ModelTelemetry struct {
ID string `json:"id"`
TotalLayers int `json:"total_layers"`
TotalParams int `json:"total_parameters"`
Layers []LayerTelemetry `json:"layers"`
}
ModelTelemetry represents a single network's structure
func ExtractNetworkBlueprint ¶ added in v0.0.6
func ExtractNetworkBlueprint(n *Network, modelID string) ModelTelemetry
ExtractNetworkBlueprint extracts telemetry data from a loaded network.
type MultiPrecisionLayer ¶ added in v0.0.7
type MultiPrecisionLayer struct {
DType string `json:"dtype,omitempty"` // Layer-specific dtype override
// All weights stored as base64-encoded bytes
// The bytes are interpreted according to DType
Kernel string `json:"kernel,omitempty"`
Biases string `json:"biases,omitempty"`
QWeights string `json:"q_weights,omitempty"`
KWeights string `json:"k_weights,omitempty"`
VWeights string `json:"v_weights,omitempty"`
OutputWeight string `json:"output_weight,omitempty"`
QBias string `json:"q_bias,omitempty"`
KBias string `json:"k_bias,omitempty"`
VBias string `json:"v_bias,omitempty"`
OutputBias string `json:"output_bias,omitempty"`
WeightIH string `json:"weight_ih,omitempty"`
WeightHH string `json:"weight_hh,omitempty"`
BiasH string `json:"bias_h,omitempty"`
Gamma string `json:"gamma,omitempty"`
Beta string `json:"beta,omitempty"`
GateWeights string `json:"gate_weights,omitempty"`
UpWeights string `json:"up_weights,omitempty"`
DownWeights string `json:"down_weights,omitempty"`
GateBias string `json:"gate_bias,omitempty"`
UpBias string `json:"up_bias,omitempty"`
DownBias string `json:"down_bias,omitempty"`
// Parallel layer branches (recursive)
BranchWeights []MultiPrecisionLayer `json:"branch_weights,omitempty"`
}
MultiPrecisionLayer stores weights for a single layer with type-aware storage
type MultiPrecisionWeights ¶ added in v0.0.7
type MultiPrecisionWeights struct {
DType string `json:"dtype"` // "float32", "float64", "int8", "int16", "int32"
Scale float64 `json:"scale"` // Quantization scale (for int types)
Layers []MultiPrecisionLayer `json:"layers"`
}
MultiPrecisionWeights stores weights with explicit type information
type Network ¶
type Network struct {
GridRows int // Number of rows in the grid
GridCols int // Number of columns in the grid
LayersPerCell int // Number of layers per grid cell
InputSize int // Total input size
BatchSize int // Batch size for Conv2D layers
// Layer configuration for each position in the grid
// Indexed by flattened position: row*GridCols*LayersPerCell + col*LayersPerCell + layer
Layers []LayerConfig
// contains filtered or unexported fields
}
Network represents a grid neural network Data flows through a 2D grid of cells, where each cell contains multiple layers
func BuildDiverseNetwork ¶ added in v0.0.7
func BuildDiverseNetwork(config ArchConfig, inputSize int) *Network
BuildDiverseNetwork creates a complete network from an ArchConfig
func BuildNetworkFromFile ¶ added in v0.0.4
BuildNetworkFromFile creates a neural network from a JSON configuration file
func BuildNetworkFromJSON ¶ added in v0.0.4
BuildNetworkFromJSON creates a neural network from a JSON configuration string This allows building complete neural networks from JSON without manually assigning layers The JSON structure matches the NetworkConfig format used in serialization
func BuildNetworkFromJSONWithDType ¶ added in v0.0.7
BuildNetworkFromJSONWithDType builds a network from JSON and returns the dtype specified in config Returns: network, dtype (defaults to "float32" if not specified), error The dtype can be used with SaveModelWithDType/LoadModelWithDType for multi-precision storage
func DeserializeModel ¶
func DeserializeModel(saved SavedModel) (*Network, error)
DeserializeModel creates a Network from a SavedModel
func LoadImportedModel ¶ added in v0.0.3
LoadImportedModel loads a model converted from PyTorch/TensorFlow/HuggingFace
func LoadModelFromString ¶
LoadModelFromString loads a single model from a JSON string This is useful for WASM, CABI pipelines, or embedding models directly in code
func LoadModelWithDType ¶ added in v0.0.7
func LoadModelWithDType(jsonString string, modelID string, targetDType string) (*Network, string, error)
LoadModelWithDType loads a model and converts weights to the specified target dtype The model can be loaded regardless of its stored dtype
func LoadTransformerFromBytes ¶ added in v0.0.3
LoadTransformerFromBytes loads a Llama-based transformer model from byte slices Supports: Llama, TinyLlama, Qwen2.5, Mistral, and other models using the Llama architecture configData: JSON config file contents weightsData: safetensors file contents
func LoadTransformerFromSafetensors ¶ added in v0.0.3
LoadTransformerFromSafetensors loads a Llama-based transformer model directly from safetensors Supports: Llama, TinyLlama, Qwen2.5, Mistral, and other models using the Llama architecture
func NewNetwork ¶
NewNetwork creates a new grid neural network with dense layers gridRows: number of rows in the grid gridCols: number of columns in the grid layersPerCell: number of layers in each grid cell inputSize: batch size of input data
func (*Network) Activations ¶ added in v0.0.3
Activations returns the activation values for all layers
func (*Network) ApplyGradients ¶ added in v0.0.5
ApplyGradients applies the stored gradients to the weights If an optimizer is set, it will use that optimizer's update rule Otherwise, it falls back to simple SGD (w = w - lr * grad)
func (*Network) ApplyGradientsAdamW ¶ added in v0.0.5
ApplyGradientsAdamW is a convenience method for using AdamW optimizer Automatically creates and sets an AdamW optimizer if not already set
func (*Network) ApplyGradientsRMSprop ¶ added in v0.0.5
ApplyGradientsRMSprop is a convenience method for using RMSprop optimizer
func (*Network) ApplyGradientsSGDMomentum ¶ added in v0.0.5
func (n *Network) ApplyGradientsSGDMomentum(learningRate, momentum, dampening float32, nesterov bool)
ApplyGradientsSGDMomentum is a convenience method for using SGD with momentum
func (*Network) BackwardCPU ¶
BackwardCPU computes gradients via backpropagation on CPU through the grid gradOutput: gradient flowing back from the loss (same size as network output) Returns: gradient with respect to the input
func (*Network) BackwardGPU ¶
BackwardGPU computes gradients via backpropagation on GPU Note: This requires storing activations from forward pass For now, this is a simplified version that applies derivatives
func (*Network) BiasGradients ¶
BiasGradients returns the bias gradients for all layers
func (*Network) EvaluateFromCheckpointFiles ¶
func (n *Network) EvaluateFromCheckpointFiles(checkpointFiles []string, expectedOutputs []float64) (*DeviationMetrics, time.Duration, time.Duration, error)
EvaluateFromCheckpointFiles loads checkpoint files and evaluates model performance Returns metrics and timing information
func (*Network) EvaluateNetwork ¶
func (n *Network) EvaluateNetwork(inputs [][]float32, expectedOutputs []float64) (*DeviationMetrics, error)
EvaluateNetwork evaluates a network's predictions against expected outputs Runs forward passes and computes deviation metrics
func (*Network) ForwardCPU ¶
ForwardCPU executes the grid network on CPU and stores intermediate activations for backprop
func (*Network) ForwardGPU ¶
ForwardGPU executes the network on GPU
func (*Network) GetActivation ¶
func (n *Network) GetActivation(row, col, layer int) ActivationType
GetActivation returns the activation function for a specific position in the grid (For backward compatibility with dense-only code)
func (*Network) GetLayer ¶
func (n *Network) GetLayer(row, col, layer int) *LayerConfig
GetLayer returns the layer configuration for a specific position in the grid
func (*Network) GetMethodSignature ¶
GetMethodSignature returns the signature of a specific method
func (*Network) GetMethods ¶
func (n *Network) GetMethods() ([]MethodInfo, error)
GetMethods retrieves all methods of the Network struct, including their names, parameters, and types.
func (*Network) GetMethodsJSON ¶
GetMethodsJSON returns a JSON string containing all methods attached to the Network struct, including each method's parameters and their types.
func (*Network) GetModelSizeInfo ¶ added in v0.0.7
func (n *Network) GetModelSizeInfo() map[string]ModelSizeInfo
GetModelSizeInfo returns size information for different dtypes
func (*Network) GetOptimizer ¶ added in v0.0.5
GetOptimizer returns the current optimizer (may be nil)
func (*Network) InitStepState ¶ added in v0.0.5
InitStepState initializes the stepping state for the network
func (*Network) InitializeWeights ¶ added in v0.0.4
func (n *Network) InitializeWeights()
InitializeWeights initializes all trainable weights in the network with random values
func (*Network) KernelGradients ¶
KernelGradients returns the kernel gradients for all layers
func (*Network) ListMethods ¶
ListMethods returns a simple list of all public method names
func (*Network) ResetOptimizer ¶ added in v0.0.5
func (n *Network) ResetOptimizer()
ResetOptimizer clears the optimizer state
func (*Network) SaveModelToString ¶
SaveModelToString saves a single model to a JSON string This is useful for WASM, CABI pipelines, or returning models over network
func (*Network) SaveModelWithDType ¶ added in v0.0.7
SaveModelWithDType saves a model with weights converted to the specified dtype Supported dtypes: "float32", "float64", "int8", "int16", "int32"
func (*Network) SerializeModel ¶
func (n *Network) SerializeModel(modelID string) (SavedModel, error)
SerializeModel converts the network to a SavedModel structure
func (*Network) SetLayer ¶
func (n *Network) SetLayer(row, col, layer int, config LayerConfig)
SetLayer sets the layer configuration for a specific position in the grid
func (*Network) SetOptimizer ¶ added in v0.0.5
SetOptimizer sets the optimizer to use for gradient updates
func (*Network) StepBackward ¶ added in v0.0.5
StepBackward executes one backward step for ALL layers simultaneously It applies a "Softmax Variation" to the weight gradients to balance updates
func (*Network) StepForward ¶ added in v0.0.5
StepForward executes one step for ALL layers simultaneously Each layer processes its current input and updates its output
func (*Network) StepForwardSingle ¶ added in v0.0.5
StepForwardSingle executes one step for a SINGLE layer This allows even finer-grained control over propagation
func (*Network) TotalLayers ¶
TotalLayers returns the total number of layers in the grid
func (*Network) Train ¶
func (n *Network) Train(batches []TrainingBatch, config *TrainingConfig) (*TrainingResult, error)
Train trains the network on provided batches
func (*Network) TrainWithStepping ¶ added in v0.0.5
func (n *Network) TrainWithStepping( config *SteppingTrainingConfig, dataLoader DataLoader, totalSteps int, ) (*SteppingTrainingResult, error)
TrainWithStepping provides a high-level training loop with stepping This integrates optimizers, schedulers, gradient accumulation, and checkpointing
func (*Network) UpdateWeights ¶
UpdateWeights applies gradients to network weights using simple SGD This should be called after BackwardGPU/BackwardCPU to update the model parameters
func (*Network) ZeroGradients ¶ added in v0.0.2
func (n *Network) ZeroGradients()
type NetworkBlueprint ¶ added in v0.0.6
type NetworkBlueprint struct {
Models []ModelTelemetry `json:"models"`
}
NetworkBlueprint contains the structural information of a network extracted after loading.
type NetworkConfig ¶
type NetworkConfig struct {
ID string `json:"id"`
DType string `json:"dtype,omitempty"` // Numerical type: "float32", "float64", "int32", "int16", "int8"
BatchSize int `json:"batch_size"`
GridRows int `json:"grid_rows"`
GridCols int `json:"grid_cols"`
LayersPerCell int `json:"layers_per_cell"`
Layers []LayerDefinition `json:"layers"`
Seed int64 `json:"seed,omitempty"`
}
NetworkConfig represents the network architecture
type Numeric ¶ added in v0.0.7
type Numeric interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 |
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 |
~float32 | ~float64
}
Numeric is a type constraint for all numeric types that Tensors can hold. This enables generic tensor operations across int and float types.
type Optimizer ¶ added in v0.0.5
type Optimizer interface {
// Step applies gradients to network weights
Step(network *Network, learningRate float32)
// Reset clears optimizer state (momentum, etc.)
Reset()
// GetState returns optimizer state for serialization
GetState() map[string]interface{}
// LoadState restores optimizer state from serialization
LoadState(state map[string]interface{}) error
// Name returns the optimizer name
Name() string
}
Optimizer interface defines the contract for all optimizers
type ParameterInfo ¶
ParameterInfo represents metadata about a parameter, including its name and type.
type PolynomialDecayScheduler ¶ added in v0.0.5
type PolynomialDecayScheduler struct {
// contains filtered or unexported fields
}
func NewPolynomialDecayScheduler ¶ added in v0.0.5
func NewPolynomialDecayScheduler(initialLR, finalLR float32, totalSteps int, power float32) *PolynomialDecayScheduler
func (*PolynomialDecayScheduler) GetLR ¶ added in v0.0.5
func (s *PolynomialDecayScheduler) GetLR(step int) float32
func (*PolynomialDecayScheduler) Name ¶ added in v0.0.5
func (s *PolynomialDecayScheduler) Name() string
func (*PolynomialDecayScheduler) Reset ¶ added in v0.0.5
func (s *PolynomialDecayScheduler) Reset()
type PredictionResult ¶
type PredictionResult struct {
SampleIndex int `json:"sample_index"`
ExpectedOutput float64 `json:"expected"`
ActualOutput float64 `json:"actual"`
Deviation float64 `json:"deviation"` // Percentage deviation
Bucket string `json:"bucket"`
}
PredictionResult represents the performance of the model on one prediction
func EvaluatePrediction ¶
func EvaluatePrediction(sampleIndex int, expected, actual float64) PredictionResult
EvaluatePrediction categorizes an expected vs actual output into a deviation bucket
type RMSpropOptimizer ¶ added in v0.0.5
type RMSpropOptimizer struct {
// contains filtered or unexported fields
}
func NewRMSpropOptimizer ¶ added in v0.0.5
func NewRMSpropOptimizer(alpha, epsilon, momentum float32) *RMSpropOptimizer
func NewRMSpropOptimizerDefault ¶ added in v0.0.5
func NewRMSpropOptimizerDefault() *RMSpropOptimizer
func (*RMSpropOptimizer) GetState ¶ added in v0.0.5
func (opt *RMSpropOptimizer) GetState() map[string]interface{}
func (*RMSpropOptimizer) LoadState ¶ added in v0.0.5
func (opt *RMSpropOptimizer) LoadState(state map[string]interface{}) error
func (*RMSpropOptimizer) Name ¶ added in v0.0.5
func (opt *RMSpropOptimizer) Name() string
func (*RMSpropOptimizer) Reset ¶ added in v0.0.5
func (opt *RMSpropOptimizer) Reset()
func (*RMSpropOptimizer) Step ¶ added in v0.0.5
func (opt *RMSpropOptimizer) Step(network *Network, learningRate float32)
type RecordedActivity ¶ added in v0.0.6
type RecordedActivity struct {
ModelID string `json:"model_id"`
RecordedAt string `json:"recorded_at"`
Duration float64 `json:"duration_seconds"`
TotalEvents int `json:"total_events"`
Events []LayerEvent `json:"events"`
}
RecordedActivity represents the full recording of neural activity
type RecordingObserver ¶ added in v0.0.6
type RecordingObserver struct {
Events []LayerEvent
ModelID string
StartTime time.Time
}
RecordingObserver collects all layer events for saving to a file
func NewRecordingObserver ¶ added in v0.0.6
func NewRecordingObserver(modelID string) *RecordingObserver
func (*RecordingObserver) GetRecording ¶ added in v0.0.6
func (o *RecordingObserver) GetRecording() RecordedActivity
GetRecording returns the full recorded activity
func (*RecordingObserver) OnBackward ¶ added in v0.0.6
func (o *RecordingObserver) OnBackward(event LayerEvent)
func (*RecordingObserver) OnForward ¶ added in v0.0.6
func (o *RecordingObserver) OnForward(event LayerEvent)
func (*RecordingObserver) Reset ¶ added in v0.0.6
func (o *RecordingObserver) Reset()
Reset clears the recorded events
type SGDOptimizer ¶ added in v0.0.5
type SGDOptimizer struct {
// contains filtered or unexported fields
}
func NewSGDOptimizer ¶ added in v0.0.5
func NewSGDOptimizer() *SGDOptimizer
func NewSGDOptimizerWithMomentum ¶ added in v0.0.5
func NewSGDOptimizerWithMomentum(momentum, dampening float32, nesterov bool) *SGDOptimizer
func (*SGDOptimizer) GetState ¶ added in v0.0.5
func (opt *SGDOptimizer) GetState() map[string]interface{}
func (*SGDOptimizer) LoadState ¶ added in v0.0.5
func (opt *SGDOptimizer) LoadState(state map[string]interface{}) error
func (*SGDOptimizer) Name ¶ added in v0.0.5
func (opt *SGDOptimizer) Name() string
func (*SGDOptimizer) Reset ¶ added in v0.0.5
func (opt *SGDOptimizer) Reset()
func (*SGDOptimizer) Step ¶ added in v0.0.5
func (opt *SGDOptimizer) Step(network *Network, learningRate float32)
type SafetensorsHeader ¶ added in v0.0.3
type SafetensorsHeader struct {
Tensors map[string]TensorInfo `json:"-"`
}
SafetensorsHeader contains metadata about tensors in the file
type SavedModel ¶
type SavedModel struct {
ID string `json:"id"`
Config NetworkConfig `json:"cfg"`
Weights EncodedWeights `json:"weights"`
}
SavedModel represents a single saved model with config and weights
type SoftmaxType ¶ added in v0.0.2
type SoftmaxType int
SoftmaxType defines the variant of softmax to use
const ( SoftmaxStandard SoftmaxType = 0 // Standard softmax: one distribution SoftmaxGrid SoftmaxType = 1 // Grid softmax: independent distributions per row SoftmaxHierarchical SoftmaxType = 2 // Hierarchical: nested softmax levels SoftmaxTemperature SoftmaxType = 3 // Temperature-scaled softmax SoftmaxGumbel SoftmaxType = 4 // Gumbel softmax (adds noise) SoftmaxMasked SoftmaxType = 5 // Masked softmax (ignores certain positions) SoftmaxSparse SoftmaxType = 6 // Sparsemax (can output exact zeros) SoftmaxAdaptive SoftmaxType = 7 // Adaptive softmax (for large vocabularies) SoftmaxMixture SoftmaxType = 8 // Mixture of softmaxes SoftmaxEntmax SoftmaxType = 9 // Entmax (generalization of softmax/sparsemax) )
type StepDecayScheduler ¶ added in v0.0.5
type StepDecayScheduler struct {
// contains filtered or unexported fields
}
func NewStepDecayScheduler ¶ added in v0.0.5
func NewStepDecayScheduler(initialLR, decayFactor float32, stepSize int) *StepDecayScheduler
func (*StepDecayScheduler) GetLR ¶ added in v0.0.5
func (s *StepDecayScheduler) GetLR(step int) float32
func (*StepDecayScheduler) Name ¶ added in v0.0.5
func (s *StepDecayScheduler) Name() string
func (*StepDecayScheduler) Reset ¶ added in v0.0.5
func (s *StepDecayScheduler) Reset()
type StepState ¶ added in v0.0.5
type StepState struct {
// contains filtered or unexported fields
}
StepState holds the current state of each layer for stepping execution
func (*StepState) GetLayerData ¶ added in v0.0.5
GetLayerData returns the internal layer data (for debugging)
func (*StepState) GetLayerOutput ¶ added in v0.0.5
GetLayerOutput retrieves the current output of a specific layer
func (*StepState) GetStepCount ¶ added in v0.0.5
GetStepCount returns the current step count
type SteppingTrainingConfig ¶ added in v0.0.5
type SteppingTrainingConfig struct {
// Optimizer settings
Optimizer string // "sgd", "adamw", "rmsprop", "sgd_momentum"
LearningRate float32
Beta1 float32 // AdamW/Adam (default: 0.9)
Beta2 float32 // AdamW/Adam (default: 0.999)
WeightDecay float32 // AdamW (default: 0.01)
Momentum float32 // SGD/RMSprop (default: 0.9)
Dampening float32 // SGD momentum (default: 0.0)
Nesterov bool // SGD Nesterov momentum (default: false)
Alpha float32 // RMSprop decay rate (default: 0.99)
Epsilon float32 // AdamW/RMSprop epsilon (default: 1e-8)
// Scheduler settings
LRSchedule string // "constant", "linear", "cosine", "exponential", "warmup", "step", "polynomial"
WarmupSteps int // Number of warmup steps (for warmup scheduler)
TotalSteps int // Total training steps (for decay schedulers)
MinLR float32 // Minimum learning rate (for cosine/polynomial)
DecayRate float32 // Decay rate (for exponential/step schedulers)
DecaySteps int // Decay steps (for exponential scheduler)
StepSize int // Step size (for step scheduler)
Power float32 // Power (for polynomial scheduler, default: 1.0)
// Training settings
GradAccumSteps int // Number of steps to accumulate gradients (default: 1 = no accumulation)
GradClipValue float32 // Gradient clipping value (0 = no clipping)
CheckpointEvery int // Save checkpoint every N steps (0 = no checkpointing)
LogEvery int // Log progress every N steps (default: 100)
// Callbacks
OnStep func(step int, lr float32, loss float32) // Called after each step
OnCheckpoint func(step int, network *Network) // Called at checkpoints
}
TrainingConfig holds configuration for high-level training with stepping
type SteppingTrainingResult ¶ added in v0.0.5
type SteppingTrainingResult struct {
TotalSteps int
FinalLoss float32
LossHistory []float32
LRHistory []float32
TotalTime time.Duration
StepsPerSecond float32
}
TrainingResult holds the results of training
type TaskChange ¶ added in v0.0.6
type TaskChange struct {
AtTime time.Duration `json:"at_time"` // When the change occurs
FromTask string `json:"from_task"` // Previous task name
ToTask string `json:"to_task"` // New task name
PreChangeWindow int `json:"pre_change_window"` // Window index before change
PostChangeWindow int `json:"post_change_window"` // Window index after change
PreAccuracy float64 `json:"pre_accuracy"` // Accuracy before change
PostAccuracy float64 `json:"post_accuracy"` // Accuracy after change
RecoveryWindows int `json:"recovery_windows"` // Windows to recover to 50%+
RecoveryTime time.Duration `json:"recovery_time"` // Time to recover
}
TaskChange represents a point where the task/goal changes
type Tensor ¶ added in v0.0.7
type Tensor[T Numeric] struct { Data []T // Underlying data storage DType DType // Type identifier for runtime checks Shape []int // Dimensions (e.g., [batch, channels, height, width]) Strides []int // Step sizes for each dimension Scale float32 // Quantization scale factor (used only for Int8) Offset int // Offset into Data for views/slices }
Tensor wraps numerical data with metadata for type-agnostic operations. It replaces raw []float32 slices to enable multi-precision training.
func ActivateDerivativeTensor ¶ added in v0.0.7
func ActivateDerivativeTensor[T Numeric](preAct *Tensor[T], activation ActivationType) *Tensor[T]
ActivateDerivativeTensor computes activation derivatives for all elements.
func ActivateTensor ¶ added in v0.0.7
func ActivateTensor[T Numeric](t *Tensor[T], activation ActivationType) *Tensor[T]
ActivateTensor applies activation function to all elements of a tensor.
func ApplySoftmax ¶ added in v0.0.7
ApplySoftmax applies standard softmax for any numeric type.
func ApplySoftmaxGrid ¶ added in v0.0.7
ApplySoftmaxGrid applies independent softmax to each row.
func Conv1DBackward ¶ added in v0.0.7
func Conv1DBackward[T Numeric]( gradOutput, input, preActivation, kernel *Tensor[T], seqLen, inChannels, kernelSize, stride, padding, filters, batchSize int, activation ActivationType, ) (gradInput, gradKernel, gradBias *Tensor[T])
Conv1DBackward computes gradients for 1D convolution with any numeric type.
func Conv1DForward ¶ added in v0.0.7
func Conv1DForward[T Numeric]( input, kernel, bias *Tensor[T], seqLen, inChannels, kernelSize, stride, padding, filters, batchSize int, activation ActivationType, ) (preAct, postAct *Tensor[T])
Conv1DForward performs 1D convolution for any numeric type. Input shape: [batch][inChannels][seqLen] (flattened) Output shape: [batch][filters][outLen] (flattened)
func Conv2DBackward ¶ added in v0.0.7
func Conv2DBackward[T Numeric]( gradOutput, input, preActivation, kernel *Tensor[T], inH, inW, inC, kSize, stride, padding, filters, outH, outW, batchSize int, activation ActivationType, ) (gradInput, gradKernel, gradBias *Tensor[T])
Conv2DBackward computes gradients for 2D convolution with any numeric type.
func Conv2DForward ¶ added in v0.0.7
func Conv2DForward[T Numeric]( input, kernel, bias *Tensor[T], inH, inW, inC, kSize, stride, padding, filters, outH, outW, batchSize int, activation ActivationType, ) (preAct, postAct *Tensor[T])
Conv2DForward performs 2D convolution for any numeric type. input shape: [batch][inChannels][height][width] (flattened) output shape: [batch][filters][outHeight][outWidth] (flattened)
func ConvertTensorFloat32ToT ¶ added in v0.0.7
ConvertTensorFloat32ToT converts a float32 tensor to any Numeric type.
func ConvertTensorTToFloat32 ¶ added in v0.0.7
ConvertTensorTToFloat32 converts any Numeric tensor to float32.
func DenseBackward ¶ added in v0.0.7
func DenseBackward[T Numeric](gradOutput, input, preAct, weights *Tensor[T], inputSize, outputSize, batchSize int, activation ActivationType) (gradInput, gradWeights, gradBias *Tensor[T])
DenseBackward performs backward pass for dense layer with any numeric type.
func DenseForward ¶ added in v0.0.7
func DenseForward[T Numeric](input *Tensor[T], weights, bias *Tensor[T], inputSize, outputSize, batchSize int, activation ActivationType) (preAct, postAct *Tensor[T])
DenseForward performs forward pass for dense layer with any numeric type. input: [batchSize * inputSize] weights: [inputSize * outputSize] output: [batchSize * outputSize]
func EmbeddingBackward ¶ added in v0.0.7
func EmbeddingBackward[T Numeric]( gradOutput, tokenIDs *Tensor[T], vocabSize, embeddingDim int, ) *Tensor[T]
EmbeddingBackward computes gradients for embedding lookup. Only the embeddings for tokens that were looked up get gradients.
func EmbeddingForward ¶ added in v0.0.7
func EmbeddingForward[T Numeric]( tokenIDs *Tensor[T], weights *Tensor[T], vocabSize, embeddingDim int, ) *Tensor[T]
EmbeddingForward performs embedding lookup for any numeric type. Input: token indices as int values (stored in T, will be cast to int) Output: [seqLen, embeddingDim]
func GenericBackwardPass ¶ added in v0.0.7
func GenericBackwardPass[T Numeric]( n *Network, gradOutput *Tensor[T], activations []*Tensor[T], backwardContext []any, ) (*Tensor[T], []any, []any, time.Duration)
GenericBackwardPass computes gradients via backpropagation for any numeric type. Returns: - Gradient with respect to input - Slice of kernel gradients per layer (type depends on layer) - Slice of bias gradients per layer (type depends on layer) - Duration
func GenericTrainStep ¶ added in v0.0.7
func GenericTrainStep[T Numeric]( n *Network, input *Tensor[T], target *Tensor[T], learningRate float64, backend Backend[T], ) (*Tensor[T], float64, time.Duration)
GenericTrainStep executes a single training step for generic types
func LayerNormBackward ¶ added in v0.0.7
func LayerNormBackward[T Numeric](input, residual, gradOutput, gamma, beta *Tensor[T], normSize, batchSize int, epsilon float64) (gradInput, gradGamma, gradBeta *Tensor[T])
LayerNormBackward computes gradients for Layer normalization.
func LayerNormForward ¶ added in v0.0.7
func LayerNormForward[T Numeric](input, residual, gamma, beta *Tensor[T], normSize, batchSize int, epsilon float64) *Tensor[T]
LayerNormForward performs layer normalization for any numeric type. input shape: [batchSize][normSize] (flattened) residual: optional residual connection to add before normalization
func MultiHeadAttentionForward ¶ added in v0.0.7
func MultiHeadAttentionForward[T Numeric]( input *Tensor[T], weights *AttentionWeights[T], ropeTheta float64, ) *Tensor[T]
MultiHeadAttentionForward performs multi-head attention for any numeric type. Input shape: [seqLen, dModel] Output shape: [seqLen, dModel]
func NewTensor ¶ added in v0.0.7
NewTensor creates a new tensor with the given shape. Data is allocated but not initialized.
func NewTensorFromSlice ¶ added in v0.0.7
NewTensorFromSlice creates a tensor from existing data. The slice is used directly (not copied) for efficiency.
func ParallelBackward ¶ added in v0.0.7
func ParallelBackward[T Numeric]( gradOutput, input *Tensor[T], branches []*LayerConfig, branchIntermediates []*Tensor[T], combineMode string, ) (*Tensor[T], [][]float32)
ParallelBackward computes gradients for parallel layer.
func ParallelBackwardFiltered ¶ added in v0.0.7
func ParallelBackwardFiltered[T Numeric]( gradOutput, input *Tensor[T], branches []*LayerConfig, branchOutputs []*Tensor[T], gateWeights []float32, gateConfig *LayerConfig, ) (*Tensor[T], []float32)
ParallelBackwardFiltered computes gradients for filter combine mode. Each branch receives gradient scaled by its gate weight. Gate gradient is computed based on how much each branch contributed to the loss.
func RMSNormBackward ¶ added in v0.0.7
func RMSNormBackward[T Numeric](input, residual, gradOutput, gamma *Tensor[T], normSize, batchSize int, epsilon float64) (gradInput, gradGamma *Tensor[T])
RMSNormBackward computes gradients for RMS normalization.
func RMSNormForward ¶ added in v0.0.7
func RMSNormForward[T Numeric](input, residual, gamma *Tensor[T], normSize int, epsilon float64) *Tensor[T]
RMSNormForward performs RMS normalization for any numeric type. RMSNorm is simpler than LayerNorm - only uses gamma (no beta) Formula: output = input * gamma / sqrt(mean(input^2) + epsilon)
func RNNBackward ¶ added in v0.0.7
func RNNBackward[T Numeric]( gradOutput, input, hiddenStates *Tensor[T], weightIH, weightHH *Tensor[T], batchSize, seqLength, inputSize, hiddenSize int, ) (gradInput, gradWeightIH, gradWeightHH, gradBiasH *Tensor[T])
RNNBackward performs backward pass for RNN layer using BPTT with any numeric type.
func RNNForward ¶ added in v0.0.7
func RNNForward[T Numeric]( input, weightIH, weightHH, biasH *Tensor[T], batchSize, seqLength, inputSize, hiddenSize int, ) (output, hiddenStates *Tensor[T])
RNNForward performs forward pass for RNN layer with any numeric type. Input shape: [batchSize, seqLength, inputSize] Output shape: [batchSize, seqLength, hiddenSize]
func ResidualBackward ¶ added in v0.0.7
ResidualBackward computes gradients for Residual layer. Output gradient flows to both the input (previous layer's output) and the skip connection. returns: gradInput: gradient w.r.t. input (flows to previous layer's output) gradSkip: gradient w.r.t. skip input (flows to previous layer's input / skipped layer)
func ResidualForward ¶ added in v0.0.7
ResidualForward adds a residual connection: output = input + previous_input In this architecture, "LayerResidual" is strictly a distinct layer that adds its input (which is the output of the previous layer) to the input of the previous layer. This assumes the previous layer preserved its dimensions.
input: current layer input (output of previous layer) skipInput: input to the previous layer (the "skip" connection)
func SequentialBackward ¶ added in v0.0.7
func SequentialBackward[T Numeric]( gradOutput, input *Tensor[T], layers []*LayerConfig, intermediates []*Tensor[T], ) *Tensor[T]
SequentialBackward computes gradients for sequential layer. Iterates backward through layers.
func SoftmaxBackward ¶ added in v0.0.7
func SoftmaxBackward[T Numeric](gradOutput, output *Tensor[T], softmaxRows, softmaxCols int) *Tensor[T]
SoftmaxBackward computes gradients for Softmax layer.
func StepBackwardGeneric ¶ added in v0.0.7
func StepBackwardGeneric[T Numeric]( n *Network, state *GenericStepState[T], gradOutput *Tensor[T], ) (*Tensor[T], []any, []any, time.Duration)
StepBackwardGeneric executes backward pass for GenericStepState. This is a placeholder for the generic backward pass implementation. StepBackwardGeneric executes backward pass for GenericStepState. Returns input gradient, kernel gradients, bias gradients, and duration.
func SwiGLUBackward ¶ added in v0.0.7
func SwiGLUBackward[T Numeric]( input, gradOutput *Tensor[T], gateWeights, upWeights, downWeights, gateBias, upBias, downBias *Tensor[T], inputSize, intermediateSize, seqLen int, ) (gradInput, gradGateW, gradUpW, gradDownW, gradGateB, gradUpB, gradDownB *Tensor[T])
SwiGLUBackward computes gradients for SwiGLU.
func SwiGLUForward ¶ added in v0.0.7
func SwiGLUForward[T Numeric]( input, gateWeights, upWeights, downWeights, gateBias, upBias, downBias *Tensor[T], inputSize, intermediateSize, seqLen int, ) (output *Tensor[T])
SwiGLUForward performs SwiGLU gated activation for any numeric type. SwiGLU: down_proj(silu(gate_proj(x)) * up_proj(x)) where silu(x) = x * sigmoid(x) = x / (1 + exp(-x))
type TensorInfo ¶ added in v0.0.3
type TensorInfo struct {
DType string `json:"dtype"`
Shape []int `json:"shape"`
Offset []int `json:"data_offsets"`
}
TensorInfo describes a tensor's properties
type TensorWithShape ¶ added in v0.0.7
TensorWithShape holds tensor data along with its shape
type TimeWindow ¶ added in v0.0.6
type TimeWindow struct {
WindowIndex int `json:"window_index"`
Duration time.Duration `json:"duration"`
Outputs int `json:"outputs"` // Number of outputs in this window
Correct int `json:"correct"` // Number of correct predictions
Accuracy float64 `json:"accuracy"` // Accuracy percentage
OutputsPerSec int `json:"outputs_per_sec"`
CurrentTask string `json:"current_task"` // Task label for this window
TaskID int `json:"task_id"` // Numeric task identifier
}
TimeWindow captures metrics for a single time window (typically 1 second)
type TrainingBatch ¶
TrainingBatch represents a single training batch
type TrainingConfig ¶
type TrainingConfig struct {
Epochs int
LearningRate float32
UseGPU bool
PrintEveryBatch int // Print progress every N batches (0 = only print epoch summary)
GradientClip float32 // Max gradient norm (0 = no clipping)
LossType string // "mse" or "cross_entropy"
Verbose bool
EvaluateEveryN int // Evaluate on validation set every N epochs (0 = no evaluation)
ValidationInputs [][]float32 // Optional: validation inputs for evaluation
ValidationTargets []float64 // Optional: validation expected outputs
}
TrainingConfig holds configuration for training
func DefaultTrainingConfig ¶
func DefaultTrainingConfig() *TrainingConfig
DefaultTrainingConfig returns sensible defaults
type TrainingMetrics ¶ added in v0.0.6
type TrainingMetrics struct {
Steps int `json:"steps"` // Total training steps/iterations
Accuracy float64 `json:"accuracy"` // Final accuracy percentage
Loss float32 `json:"loss"` // Final loss value
TimeTotal time.Duration `json:"time_total"` // Total training time
TimeToTarget time.Duration `json:"time_to_target"` // Time to reach target accuracy
MemoryPeakMB float64 `json:"memory_peak_mb"` // Peak memory usage in MB
Milestones map[int]time.Duration `json:"milestones"` // Time to reach 10%, 20%, ... 100% accuracy
}
TrainingMetrics captures performance metrics for a training run
func NewTrainingMetrics ¶ added in v0.0.6
func NewTrainingMetrics() TrainingMetrics
NewTrainingMetrics creates an initialized TrainingMetrics with milestone tracking
func (*TrainingMetrics) UpdateMilestone ¶ added in v0.0.6
func (tm *TrainingMetrics) UpdateMilestone(accuracy float64, elapsed time.Duration)
UpdateMilestone records the time when an accuracy milestone is reached
type TrainingResult ¶
type TrainingResult struct {
FinalLoss float64
BestLoss float64
TotalTime time.Duration
AvgThroughput float64 // samples per second
LossHistory []float64 // loss per epoch
EvalMetrics *DeviationMetrics // Optional: evaluation metrics if validation set provided
}
TrainingResult contains training statistics
type TransformerConfig ¶ added in v0.0.3
type TransformerConfig struct {
ModelType string `json:"model_type"` // "llama", "qwen2", "mistral", etc.
Architectures []string `json:"architectures"` // Model architecture names
HiddenSize int `json:"hidden_size"`
IntermediateSize int `json:"intermediate_size"`
NumLayers int `json:"num_hidden_layers"`
NumHeads int `json:"num_attention_heads"`
NumKVHeads int `json:"num_key_value_heads"`
RMSNormEps float64 `json:"rms_norm_eps"`
VocabSize int `json:"vocab_size"`
}
TransformerConfig represents configuration for Llama-based transformer models Supports: Llama, TinyLlama, Qwen2, Mistral, etc.
type TweenConfig ¶ added in v0.0.6
type TweenConfig struct {
// === TUNABLE LEARNING ===
FrontierEnabled bool // Default: true
FrontierMin int // Minimum layer index to oscillate to
FrontierThreshold float32 // Default: 0.55
FrontierNoise float32 // Default: 0.0
IgnoreThreshold float32 // Default: 0.2
DenseRate float32 // Default: 1.0
RNNRate float32 // Default: 0.5
LSTMRate float32 // Default: 0.5
AttentionRate float32 // Default: 0.2
NormRate float32 // Default: 0.1
SwiGLURate float32 // Default: 0.2
Conv2DRate float32 // Default: 0.1
EmbeddingRate float32 // Default: 0.1
Conv1DRate float32 // Default: 0.1
// === MOMENTUM & UPDATE SCALING ===
Momentum float32 // Default: 0.9
BiasRateMultiplier float32 // Default: 0.1
WeightRateMultiplier float32 // Default: 0.01
// === BACKWARD PASS CLAMPING ===
TanhClampMin float32 // Default: -0.95
TanhClampMax float32 // Default: 0.95
SigmoidClampMin float32 // Default: 0.05
SigmoidClampMax float32 // Default: 0.95
ReLUClampMax float32 // Default: 10.0
// === TRAINING BEHAVIOR ===
EarlyStopThreshold float64 // Default: 95.0
EvalFrequency int // Default: 5
LinkBudgetScale float32 // Default: 0.5
// === DYNAMIC LAYER PRUNING ===
PruneEnabled bool // Default: false
PruneThreshold float32 // Default: 0.1
PrunePatience int // Default: 10
// === BATCH TRAINING ===
BatchSize int // Default: 1
// === CHAIN RULE SUPPORT ===
UseChainRule bool // Default: true
DepthScaleFactor float32 // Default: 1.2
// === NEW CONFIGURABLE CONSTANTS ===
GradientScale float32 // Default: 0.1 (Base scale for gradients)
TotalWeightThreshold float32 // Default: 0.01 (Threshold for weight importance)
ReLUSlope float32 // Default: 1.1 (Slope for positive activation)
LeakyReLUSlope float32 // Default: 0.1 (Slope for negative activation)
DerivativeEpsilon float32 // Default: 0.01 (Stability epsilon)
LSTMGateScale float32 // Default: 0.25 (Scaling for LSTM gates)
AttentionRoutingRate float32 // Default: 0.2 (Scaling for attention routing)
AttentionBiasRate float32 // Default: 0.05 (Scaling for attention bias)
NormBetaRate float32 // Default: 0.1 (Scaling for Norm Beta)
NormGammaRate float32 // Default: 0.01 (Scaling for Norm Gamma)
// === EXPLOSION DETECTION ===
ExplosionDetection bool // Default: false (must be enabled explicitly)
}
TweenConfig holds all tunable parameters for NeuralTween
func DefaultTweenConfig ¶ added in v0.0.6
func DefaultTweenConfig(totalLayers int) *TweenConfig
DefaultTweenConfig returns the standard configuration
type TweenEpochMetrics ¶ added in v0.0.6
type TweenEpochMetrics struct {
Epoch int
AvgLoss float32
Score float64
AvgLinkBudget float32
MinLinkBudget float32
MaxLinkBudget float32
AvgGap float32
MaxGap float32
DepthBarrier float32 // Overall info preservation (product of all budgets)
BottleneckLayer int // Layer with lowest link budget
}
TweenEpochMetrics captures detailed per-epoch information for visualization
type TweenState ¶ added in v0.0.6
type TweenState struct {
// Forward pass: what each layer ACTUALLY produces (top-down)
ForwardActs [][]float32
// Backward pass: what each layer SHOULD produce (bottom-up from expected)
BackwardTargets [][]float32
// Link budget per layer: how much information is preserved (0-1)
// Low budget = high attenuation = need more careful tweening
LinkBudgets []float32
// Gap at each layer: magnitude of difference between forward and backward
Gaps []float32
// Momentum for stable updates
WeightVel [][]float32
BiasVel [][]float32
// Best state tracking
BestScore float64
BestWeights [][][]float32
BestBiases [][][]float32
// Config holds all tunable parameters
Config *TweenConfig
TotalLayers int
TweenSteps int
LossHistory []float32
// Tracks consecutive epochs a layer has been dead for Pruning
DeadEpochs []int
// === BATCH TRAINING ===
BatchGaps [][]float32 // Accumulated gaps per layer [layer][output]
BatchCount int // Current samples in batch
// === VISUALIZATION & DEBUGGING ===
Verbose bool // If true, print training progress to console
// Link budget history: [epoch][layer] = budget value (for heatmap visualization)
LinkBudgetHistory [][]float32
// Gap history: [epoch][layer] = gap value (for tracking convergence per layer)
GapHistory [][]float32
// Depth barrier: cumulative signal preservation from input to each layer
// DepthBarrier[i] = product of LinkBudgets[0..i], shows how much original info survives
DepthBarrier []float32
// Depth barrier history: [epoch] = overall depth barrier (product of all budgets)
DepthBarrierHistory []float32
// Per-epoch metrics for plotting
EpochMetrics []TweenEpochMetrics
// === CHAIN RULE SUPPORT ===
ChainGradients [][]float32 // Gradient at each layer, computed via chain rule
// === GRADIENT EXPLOSION DETECTION ===
PrevAvgGap float32 // Previous epoch's average gap
GapGrowthRate float32 // Rate of gap growth (current/previous)
ExplosionCount int // Consecutive epochs with explosion detected
AdaptiveRate float32 // Current adaptive learning rate multiplier (0-1)
BaselineGap float32 // Baseline gap from first few epochs
GapSamples int // Number of samples for baseline calculation
}
TweenState holds bidirectional analysis state
func NewTweenState ¶ added in v0.0.6
func NewTweenState(n *Network, config *TweenConfig) *TweenState
NewTweenState creates tween state with tunable defaults. Pass nil for config to use defaults.
func (*TweenState) BackwardPass ¶ added in v0.0.6
func (ts *TweenState) BackwardPass(n *Network, targetClass int, outputSize int)
BackwardPass: From expected output, estimate what each layer SHOULD produce This is the "bottom-up" pass - propagating the target upward through layers
func (*TweenState) BackwardPassChainRule ¶ added in v0.0.6
func (ts *TweenState) BackwardPassChainRule(n *Network, targetClass int, outputSize int)
BackwardPassChainRule: Proper chain rule gradient propagation Unlike BackwardPass which uses heuristics, this properly applies: 1. Output error gradient (target - actual) 2. Activation function derivatives at each layer 3. Transpose weight multiplication to propagate gradients 4. Depth scaling to combat vanishing gradients
func (*TweenState) CalculateLinkBudgets ¶ added in v0.0.6
func (ts *TweenState) CalculateLinkBudgets()
CalculateLinkBudgets: Measure information preservation at each layer High budget = good signal flow, Low budget = high attenuation
func (*TweenState) CalculateLinkBudgetsFromSample ¶ added in v0.0.6
func (ts *TweenState) CalculateLinkBudgetsFromSample(n *Network, input []float32)
func (*TweenState) ForwardPass ¶ added in v0.0.6
func (ts *TweenState) ForwardPass(n *Network, input []float32) []float32
ForwardPass: Push input through untrained network, capture ALL activations
func (*TweenState) GetBudgetSummary ¶ added in v0.0.6
func (ts *TweenState) GetBudgetSummary() (avg, min, max float32)
func (*TweenState) GetGapSummary ¶ added in v0.0.6
func (ts *TweenState) GetGapSummary() (avg, max float32)
func (*TweenState) ResetBatch ¶ added in v0.0.6
func (ts *TweenState) ResetBatch()
ResetBatch clears accumulated batch gaps
func (*TweenState) RestoreBest ¶ added in v0.0.6
func (ts *TweenState) RestoreBest(n *Network)
func (*TweenState) SaveBest ¶ added in v0.0.6
func (ts *TweenState) SaveBest(n *Network)
func (*TweenState) Train ¶ added in v0.0.6
func (ts *TweenState) Train(n *Network, inputs [][]float32, expected []float64, epochs int, rate float32, callback func(epoch int, avgLoss float32, metrics *DeviationMetrics))
Train with early stopping
func (*TweenState) TweenBatch ¶ added in v0.0.6
func (ts *TweenState) TweenBatch(n *Network, inputs [][]float32, targetClasses []int, outputSize int, rate float32) float32
TweenBatch: Convenience function for batch training (non-stepping mode) Processes all samples in batch, accumulates gaps, then applies averaged update
func (*TweenState) TweenBatchApply ¶ added in v0.0.6
func (ts *TweenState) TweenBatchApply(n *Network, rate float32)
TweenBatchApply: Applies accumulated batch gaps as averaged weight updates
func (*TweenState) TweenStep ¶ added in v0.0.6
func (ts *TweenState) TweenStep(n *Network, input []float32, targetClass int, outputSize int, rate float32) float32
TweenStep: One complete bidirectional iteration with explosion detection
func (*TweenState) TweenStepAccumulate ¶ added in v0.0.6
func (ts *TweenState) TweenStepAccumulate(n *Network, input []float32, targetClass int, outputSize int) float32
TweenStepAccumulate: Accumulates gaps without applying weight updates Call TweenBatchApply when batch is complete to apply averaged updates
func (*TweenState) TweenWeights ¶ added in v0.0.6
func (ts *TweenState) TweenWeights(n *Network, rate float32)
TweenWeights: Adjust weights to close the gap at each layer Supports ALL layer types: Dense, Conv2D, Attention, LSTM, LayerNorm, SwiGLU
func (*TweenState) TweenWeightsChainRule ¶ added in v0.0.6
func (ts *TweenState) TweenWeightsChainRule(n *Network, rate float32)
TweenWeightsChainRule: Use chain rule gradients directly for weight updates This is the proper gradient-based approach: dW = input^T * output_gradient
type WarmupScheduler ¶ added in v0.0.5
type WarmupScheduler struct {
// contains filtered or unexported fields
}
func NewWarmupScheduler ¶ added in v0.0.5
func NewWarmupScheduler(warmupSteps int, warmupLR, baseLR float32, afterScheduler LRScheduler) *WarmupScheduler
func (*WarmupScheduler) GetLR ¶ added in v0.0.5
func (s *WarmupScheduler) GetLR(step int) float32
func (*WarmupScheduler) Name ¶ added in v0.0.5
func (s *WarmupScheduler) Name() string
func (*WarmupScheduler) Reset ¶ added in v0.0.5
func (s *WarmupScheduler) Reset()
type WeightsData ¶
type WeightsData struct {
Type string `json:"type"` // "float32", "float64", etc.
DType string `json:"dtype"` // DType enum value as string (for multi-type support)
Layers []LayerWeights `json:"layers"`
}
WeightsData represents the actual weight values Type field indicates the numeric type used for weights
Source Files
¶
- activations.go
- architecture.go
- attention.go
- attention_gpu.go
- backend.go
- backward.go
- clustering.go
- cnn.go
- conv1d.go
- conv2d_gpu.go
- correlation.go
- dense.go
- embedding.go
- ensemble.go
- evaluation.go
- forward.go
- gpu.go
- grafting.go
- grouping.go
- import_model.go
- introspection.go
- layernorm.go
- load_generic.go
- load_transformer.go
- lstm.go
- nn.go
- observer.go
- optimizer.go
- parallel.go
- registry.go
- residual.go
- rmsnorm.go
- rnn.go
- rope.go
- safetensors.go
- scheduler.go
- sequential.go
- serialization.go
- serialization_multiprecision.go
- softmax.go
- step_backward.go
- step_backward_utils.go
- step_forward.go
- swiglu.go
- telemetry.go
- training.go
- training_utils.go
- tween.go
- types.go
- utils.go