 Documentation
      ¶
      Documentation
      ¶
    
    
  
    
  
    Index ¶
- func BatchVectorAdd(operations []BatchOperation) error
- func CleanupCUDA()
- func FusedVectorMatrix(op *FusedOperation) error
- func GetDeviceCount() int
- func InitCUDA() error
- func LegacyMatrixMultiply(a, b []float32, width int) ([]float32, error)
- func LegacyVectorAdd(a, b []float32) ([]float32, error)
- func MatrixMultiply(a, b []float32, width int) ([]float32, error)
- func PrintDeviceInfo()
- func VectorAdd(a, b []float32) ([]float32, error)
- type BatchOperation
- type DeviceInfo
- type FusedOperation
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func BatchVectorAdd ¶
func BatchVectorAdd(operations []BatchOperation) error
BatchVectorAdd performs multiple vector additions in a single GPU call
func FusedVectorMatrix ¶
func FusedVectorMatrix(op *FusedOperation) error
FusedVectorMatrix performs both vector addition and matrix multiplication in a single kernel
func LegacyMatrixMultiply ¶
LegacyMatrixMultiply performs matrix multiplication using the original implementation
func LegacyVectorAdd ¶
LegacyVectorAdd performs vector addition using the original implementation
func MatrixMultiply ¶
MatrixMultiply performs matrix multiplication using CUDA (uses original implementation for best performance)
func PrintDeviceInfo ¶
func PrintDeviceInfo()
PrintDeviceInfo prints information about all CUDA devices
Types ¶
type BatchOperation ¶
BatchOperation represents a batch of operations
type DeviceInfo ¶
DeviceInfo represents CUDA device information
func GetDeviceInfo ¶
func GetDeviceInfo(deviceID int) (*DeviceInfo, error)
GetDeviceInfo returns information about a specific CUDA device
 Click to show internal directories. 
   Click to hide internal directories.