Documentation
¶
Overview ¶
Package serialization provides native .born format for saving and loading Born ML models.
The .born format is a simple, efficient binary format designed specifically for Born models:
Format Structure: [4 bytes: Magic "BORN"] [4 bytes: Version (uint32 LE)] [4 bytes: Flags (uint32 LE)] [8 bytes: Header Size (uint64 LE)] [Header: JSON metadata] [Tensor data: raw bytes, 64-byte aligned]
The format supports:
- Multiple data types (float32, float64, int32, int64, uint8, bool)
- Arbitrary tensor shapes
- Metadata preservation
- Fast loading with memory mapping support (future)
- Optional compression (future)
Example usage:
// Save a model
model := nn.NewLinear(784, 128, backend)
writer := serialization.NewBornWriter("model.born")
if err := writer.WriteModel(model); err != nil {
log.Fatal(err)
}
writer.Close()
// Load a model
reader := serialization.NewBornReader("model.born")
stateDict, err := reader.ReadStateDict(backend)
if err != nil {
log.Fatal(err)
}
model.LoadStateDict(stateDict)
reader.Close()
Index ¶
- Constants
- Variables
- func ComputeChecksum(data []byte) [32]byte
- func ComputeChecksumReader(r io.Reader) ([32]byte, error)
- func ValidateChecksum(computed, stored [32]byte) error
- func ValidateHeader(h *Header, dataSize int64, level ValidationLevel) error
- func ValidateTensorName(name string) error
- func ValidateTensorOffsets(tensors []TensorMeta, dataSize int64) error
- func WriteSafeTensors(path string, tensors map[string]*tensor.RawTensor, metadata map[string]string) error
- func WriteTo(writer io.Writer, stateDict map[string]*tensor.RawTensor, modelType string, ...) error
- type BornReader
- func (r *BornReader) Close() error
- func (r *BornReader) Header() Header
- func (r *BornReader) LoadTensor(name string, backend tensor.Backend) (*tensor.RawTensor, error)
- func (r *BornReader) Metadata() map[string]string
- func (r *BornReader) ReadStateDict(backend tensor.Backend) (map[string]*tensor.RawTensor, error)
- func (r *BornReader) ReadTensorData(name string) ([]byte, error)
- func (r *BornReader) TensorInfo(name string) (*TensorMeta, error)
- func (r *BornReader) TensorNames() []string
- type BornWriter
- func (w *BornWriter) Close() error
- func (w *BornWriter) WriteStateDict(stateDict map[string]*tensor.RawTensor, modelType string, ...) error
- func (w *BornWriter) WriteStateDictV2(stateDict map[string]*tensor.RawTensor, modelType string, ...) error
- func (w *BornWriter) WriteStateDictWithHeader(stateDict map[string]*tensor.RawTensor, header Header) error
- func (w *BornWriter) WriteStateDictWithHeaderV2(stateDict map[string]*tensor.RawTensor, header Header) error
- type CheckpointMeta
- type Header
- type MmapReader
- func (r *MmapReader) Checksum() [32]byte
- func (r *MmapReader) Close() error
- func (r *MmapReader) Flags() uint32
- func (r *MmapReader) Header() Header
- func (r *MmapReader) LoadTensor(name string, backend tensor.Backend) (*tensor.RawTensor, error)
- func (r *MmapReader) ReadStateDict(backend tensor.Backend) (map[string]*tensor.RawTensor, error)
- func (r *MmapReader) TensorData(name string) ([]byte, error)
- func (r *MmapReader) TensorDataCopy(name string) ([]byte, error)
- func (r *MmapReader) TensorInfo(name string) (*TensorMeta, error)
- func (r *MmapReader) TensorNames() []string
- func (r *MmapReader) Version() uint32
- type ReaderOptions
- type SafeTensorHeader
- type SafeTensorsWriter
- type TensorMeta
- type ValidationError
- type ValidationLevel
Constants ¶
const ( MagicBytes = "BORN" FormatVersion = 1 // v1: Basic format without checksum FormatVersionV2 = 2 // v2: With SHA-256 checksum HeaderAlignment = 64 // Align tensor data to 64 bytes for optimal performance FixedHeaderSizeV2 = 64 // v2 fixed header size (0x40 bytes) ChecksumSize = 32 // SHA-256 checksum size (32 bytes) ChecksumOffsetV2 = 0x20 // Checksum offset in v2 fixed header )
Format constants.
const ( DTypeFloat32 = "float32" DTypeFloat64 = "float64" DTypeInt32 = "int32" DTypeInt64 = "int64" DTypeUint8 = "uint8" DTypeBool = "bool" )
Data type string constants for serialization.
const ( FlagCompressed uint32 = 1 << 0 // bit 0: gzip compression FlagHasOptimizer uint32 = 1 << 1 // bit 1: optimizer state included FlagHasMetadata uint32 = 1 << 2 // bit 2: custom metadata included )
Flags for the .born format.
const ( MaxHeaderSize = 100 * 1024 * 1024 // 100MB - maximum header size MaxTensorCount = 100_000 // Maximum number of tensors in a file MaxTensorNameLen = 4096 // Maximum tensor name length MaxMetadataSize = 10 * 1024 * 1024 // 10MB - maximum metadata size )
Validation limits for security and resource protection.
Variables ¶
var ( ErrChecksumMismatch = errors.New("checksum mismatch: file may be corrupted") ErrOffsetOverlap = errors.New("tensor offsets overlap") ErrOutOfBounds = errors.New("tensor extends beyond data section") ErrNegativeOffset = errors.New("negative offset or size") ErrTooManyTensors = errors.New("too many tensors in file") ErrTensorNameTooLong = errors.New("tensor name too long") ErrInvalidTensorName = errors.New("invalid tensor name") ErrHeaderTooLarge = errors.New("header exceeds maximum size") ErrInvalidMagic = errors.New("invalid magic bytes") ErrUnsupportedVersion = errors.New("unsupported format version") )
Common errors.
Functions ¶
func ComputeChecksum ¶
ComputeChecksum computes SHA-256 checksum of data.
func ComputeChecksumReader ¶
ComputeChecksumReader computes SHA-256 checksum from an io.Reader. This is useful for computing checksums of large files without loading them entirely into memory.
func ValidateChecksum ¶
ValidateChecksum compares computed checksum against stored checksum. Returns ErrChecksumMismatch if they don't match.
func ValidateHeader ¶
func ValidateHeader(h *Header, dataSize int64, level ValidationLevel) error
ValidateHeader performs comprehensive header validation.
func ValidateTensorName ¶
ValidateTensorName checks tensor names for path traversal attacks and malicious patterns.
func ValidateTensorOffsets ¶
func ValidateTensorOffsets(tensors []TensorMeta, dataSize int64) error
ValidateTensorOffsets checks for overlapping tensor offsets and out-of-bounds access. This is critical for security - malformed files could cause memory corruption or data leakage.
func WriteSafeTensors ¶
func WriteSafeTensors(path string, tensors map[string]*tensor.RawTensor, metadata map[string]string) error
WriteSafeTensors writes tensors to a SafeTensors file.
Format: [8 bytes: header_size (uint64 LE)] [header_size bytes: JSON header] [tensor data: raw bytes]
Tensors are written in alphabetical order by name.
Types ¶
type BornReader ¶
type BornReader struct {
// contains filtered or unexported fields
}
BornReader reads models from .born format.
func NewBornReader ¶
func NewBornReader(path string) (*BornReader, error)
NewBornReader creates a new .born file reader with default options (strict validation).
func NewBornReaderWithOptions ¶
func NewBornReaderWithOptions(path string, opts ReaderOptions) (*BornReader, error)
NewBornReaderWithOptions creates a new .born file reader with custom options.
func (*BornReader) Close ¶
func (r *BornReader) Close() error
Close closes the reader and the underlying file.
func (*BornReader) LoadTensor ¶
LoadTensor loads a single tensor from the file.
func (*BornReader) Metadata ¶
func (r *BornReader) Metadata() map[string]string
Metadata returns the metadata map from the header.
func (*BornReader) ReadStateDict ¶
ReadStateDict reads all tensors into a state dictionary.
func (*BornReader) ReadTensorData ¶
func (r *BornReader) ReadTensorData(name string) ([]byte, error)
ReadTensorData reads raw tensor data for a given tensor name.
func (*BornReader) TensorInfo ¶
func (r *BornReader) TensorInfo(name string) (*TensorMeta, error)
TensorInfo returns information about a specific tensor.
func (*BornReader) TensorNames ¶
func (r *BornReader) TensorNames() []string
TensorNames returns a list of all tensor names in the file.
type BornWriter ¶
type BornWriter struct {
// contains filtered or unexported fields
}
BornWriter writes models in .born format.
func NewBornWriter ¶
func NewBornWriter(path string) (*BornWriter, error)
NewBornWriter creates a new .born file writer.
func (*BornWriter) Close ¶
func (w *BornWriter) Close() error
Close closes the writer and the underlying file.
func (*BornWriter) WriteStateDict ¶
func (w *BornWriter) WriteStateDict(stateDict map[string]*tensor.RawTensor, modelType string, metadata map[string]string) error
WriteStateDict writes a state dictionary to the .born file.
The state dictionary is a map from parameter names to tensors. All tensors must be on the same device.
func (*BornWriter) WriteStateDictV2 ¶
func (w *BornWriter) WriteStateDictV2(stateDict map[string]*tensor.RawTensor, modelType string, metadata map[string]string) error
WriteStateDictV2 writes a state dictionary to the .born file using format v2 with SHA-256 checksum.
Format v2 includes: - 64-byte fixed header with SHA-256 checksum at offset 0x20 - Backward compatible: v1 readers will reject, but v2 readers can read v1.
func (*BornWriter) WriteStateDictWithHeader ¶
func (w *BornWriter) WriteStateDictWithHeader(stateDict map[string]*tensor.RawTensor, header Header) error
WriteStateDictWithHeader writes a state dictionary with custom header to the .born file.
This allows setting CheckpointMeta and other custom header fields.
func (*BornWriter) WriteStateDictWithHeaderV2 ¶
func (w *BornWriter) WriteStateDictWithHeaderV2(stateDict map[string]*tensor.RawTensor, header Header) error
WriteStateDictWithHeaderV2 writes a state dictionary with custom header to the .born file using format v2.
This allows setting CheckpointMeta and other custom header fields.
type CheckpointMeta ¶
type CheckpointMeta struct {
IsCheckpoint bool `json:"is_checkpoint"` // Whether this is a checkpoint file
Epoch int `json:"epoch"` // Training epoch number
Step int64 `json:"step"` // Training step number
Loss float64 `json:"loss"` // Loss value at checkpoint
OptimizerType string `json:"optimizer_type"` // Optimizer type ("SGD", "Adam", etc.)
OptimizerConfig map[string]any `json:"optimizer_config"` // Optimizer hyperparameters
TrainingMeta map[string]any `json:"training_meta"` // Additional training metadata
}
CheckpointMeta contains training state information for checkpoints.
type Header ¶
type Header struct {
FormatVersion int `json:"format_version"` // Version of the .born format
BornVersion string `json:"born_version"` // Version of Born that created this file
ModelType string `json:"model_type"` // Type of model (e.g., "Sequential", "Linear")
CreatedAt time.Time `json:"created_at"` // When the file was created
Tensors []TensorMeta `json:"tensors"` // Tensor metadata
Metadata map[string]string `json:"metadata"` // Custom metadata
CheckpointMeta *CheckpointMeta `json:"checkpoint,omitempty"` // Checkpoint metadata (optional)
}
Header represents the JSON header in a .born file.
type MmapReader ¶
type MmapReader struct {
// contains filtered or unexported fields
}
MmapReader provides memory-mapped access to .born files. This enables efficient loading of large models by only reading the header initially, and accessing tensor data on-demand via OS page cache.
func NewMmapReader ¶
func NewMmapReader(path string) (*MmapReader, error)
NewMmapReader creates a memory-mapped reader for a .born file. The file is opened read-only and mapped into memory. Only the header is parsed initially - tensor data is accessed on-demand.
Important: Always call Close() when done to unmap the file (use defer).
func (*MmapReader) Checksum ¶
func (r *MmapReader) Checksum() [32]byte
Checksum returns the SHA-256 checksum (v2 only, all zeros for v1).
func (*MmapReader) LoadTensor ¶
LoadTensor loads a tensor using the mmap'd data. This is a convenience method that creates a RawTensor and copies data into it.
func (*MmapReader) ReadStateDict ¶
ReadStateDict reads all tensors into a state dictionary.
func (*MmapReader) TensorData ¶
func (r *MmapReader) TensorData(name string) ([]byte, error)
TensorData returns a zero-copy slice to tensor data. The returned slice is valid only while the reader is open. WARNING: The data is read-only - writing to it will cause undefined behavior.
For cases where you need to modify the data, use TensorDataCopy instead.
func (*MmapReader) TensorDataCopy ¶
func (r *MmapReader) TensorDataCopy(name string) ([]byte, error)
TensorDataCopy returns a copy of tensor data (for modification). This allocates a new buffer and copies the data. Use this when you need to modify the tensor data.
func (*MmapReader) TensorInfo ¶
func (r *MmapReader) TensorInfo(name string) (*TensorMeta, error)
TensorInfo returns metadata about a specific tensor.
func (*MmapReader) TensorNames ¶
func (r *MmapReader) TensorNames() []string
TensorNames returns a list of all tensor names in the file.
func (*MmapReader) Version ¶
func (r *MmapReader) Version() uint32
Version returns the format version (1 or 2).
type ReaderOptions ¶
type ReaderOptions struct {
SkipChecksumValidation bool // Skip checksum validation (faster but less safe)
ValidationLevel ValidationLevel // Validation strictness level
}
ReaderOptions configures the behavior of BornReader.
type SafeTensorHeader ¶
type SafeTensorHeader struct {
DType string `json:"dtype"`
Shape []int64 `json:"shape"`
DataOffsets [2]int64 `json:"data_offsets"`
}
SafeTensorHeader represents a tensor in the SafeTensors header.
type SafeTensorsWriter ¶
type SafeTensorsWriter struct {
// contains filtered or unexported fields
}
SafeTensorsWriter writes models in SafeTensors format. SafeTensors is the standard format for HuggingFace models.
func NewSafeTensorsWriter ¶
func NewSafeTensorsWriter(path string) (*SafeTensorsWriter, error)
NewSafeTensorsWriter creates a new SafeTensors file writer.
func (*SafeTensorsWriter) Close ¶
func (w *SafeTensorsWriter) Close() error
Close closes the writer and the underlying file.
func (*SafeTensorsWriter) WriteStateDict ¶
func (w *SafeTensorsWriter) WriteStateDict(stateDict map[string]*tensor.RawTensor, metadata map[string]string) error
WriteStateDict writes a state dictionary to the SafeTensors file.
The state dictionary is a map from parameter names to tensors. Tensors are written in alphabetical order by name (SafeTensors requirement).
type TensorMeta ¶
type TensorMeta struct {
Name string `json:"name"` // Tensor name (e.g., "layer.0.weight")
DType string `json:"dtype"` // Data type (e.g., "float32", "float64")
Shape []int `json:"shape"` // Tensor shape
Offset int64 `json:"offset"` // Offset in the data section (bytes from start of tensor data)
Size int64 `json:"size"` // Size in bytes
}
TensorMeta describes a tensor in the .born file.
type ValidationError ¶
type ValidationError struct {
Type string // Type of error (e.g., "offset_overlap", "out_of_bounds")
Tensor string // Primary tensor name involved
Tensor2 string // Secondary tensor name (for overlap errors)
Details string // Additional details
}
ValidationError provides detailed information about validation failures.
func (*ValidationError) Error ¶
func (e *ValidationError) Error() string
Error implements the error interface.
type ValidationLevel ¶
type ValidationLevel int
ValidationLevel controls the strictness of validation.
const ( // ValidationStrict performs all validation checks (default, recommended for production). ValidationStrict ValidationLevel = iota // ValidationNormal performs basic validation checks only. ValidationNormal // ValidationNone skips validation (dangerous! Use only with trusted input). ValidationNone )