sound

package
v0.9.15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 28, 2021 License: BSD-3-Clause Imports: 15 Imported by: 3

Documentation

Index

Constants

View Source
const (
	BigEndian    = iota // Samples are big endian byte order
	LittleEndian        // Samples are little endian byte order
)
View Source
const (
	Unknown   = iota // Not set
	SignedInt        // Samples are signed integers
	UnSignedInt
	Float
)

Variables

This section is empty.

Functions

func MSecToSamples added in v0.9.7

func MSecToSamples(ms float32, rate int) int

MSecToSamples converts milliseconds to samples, in terms of sample_rate

func SamplesToMSec added in v0.9.7

func SamplesToMSec(samples int, rate int) float32

SamplesToMSec converts samples to milliseconds, in terms of sample_rate

Types

type Endian

type Endian int32

type Params added in v0.9.7

type Params struct {
	WinMs       float32 `def:"25" desc:"input window -- number of milliseconds worth of sound to filter at a time"`
	StepMs      float32 `` /* 139-byte string literal not displayed */
	SegmentMs   float32 `` /* 265-byte string literal not displayed */
	StrideMs    float32 `def:"100" desc:"how far to move on each trial"`
	BorderSteps int     `def:"6" view:"+" desc:"overlap with previous segment"`
	Channel     int     `` /* 138-byte string literal not displayed */
	PadValue    float32 `desc:"value to use of signal when padding"`

	// these are calculated
	WinSamples        int   `inactive:"+" desc:"number of samples to process each step"`
	StepSamples       int   `inactive:"+" desc:"number of samples to step input by"`
	SegmentSamples    int   `inactive:"+" desc:"number of samples in a segment"`
	StrideSamples     int   `inactive:"+" desc:"number of samples converted from StrideMS"`
	SegmentSteps      int   `inactive:"+" desc:"number of steps in a segment"`
	SegmentStepsTotal int   `inactive:"+" desc:"SegmentSteps plus steps border steps on both sides"`
	Steps             []int `inactive:"+" desc:"pre-calculated start position for each step"`
}

Params defines the sound input parameters for auditory processing

type SndEnv added in v0.9.7

type SndEnv struct {
	// the environment has the training/test data and the procedures for creating/choosing the input to the model
	// "Segment" in var name indicates that the data or value only applies to a segment of samples rather than the entire signal
	Nm              string `desc:"name of this environment"`
	Dsc             string `desc:"description of this environment"`
	Sound           Wave   `desc:"specifications of the raw se.tory input"`
	Params          Params
	Signal          etensor.Float32 `view:"no-inline" desc:" the full sound input obtained from the sound input"`
	SegCnt          int             `desc:"the number of segments for this sound"`
	Window          etensor.Float32 `inactive:"+" desc:" [Input.WinSamples] the raw sound input, one channel at a time"`
	Segment         int             `inactive:"no-inline" desc:" the current chunk of samples (a full segment's' worth) - zero is first chunk"`
	Dft             dft.Params
	Power           etensor.Float32   `view:"-" desc:" power of the dft, up to the nyquist limit frequency (1/2 input.WinSamples)"`
	LogPower        etensor.Float32   `view:"-" desc:" log power of the dft, up to the nyquist liit frequency (1/2 input.WinSamples)"`
	PowerSegment    etensor.Float32   `view:"no-inline" desc:" full segment's worth of power of the dft, up to the nyquist limit frequency (1/2 input.win_samples)"`
	LogPowerSegment etensor.Float32   `` /* 128-byte string literal not displayed */
	Mel             mel.Params        `view:"no-inline"`
	MelFBank        etensor.Float32   `` /* 150-byte string literal not displayed */
	MelFBankSegment etensor.Float32   `view:"no-inline" desc:" full segment's worth of mel feature-bank output"`
	MelFilters      etensor.Float32   `view:"no-inline" desc:" the actual filters"`
	MfccDctSegment  etensor.Float32   `` /* 160-byte string literal not displayed */
	MfccDct         etensor.Float32   `` /* 140-byte string literal not displayed */
	GaborSpecs      []agabor.Filter   `view:"no-inline" desc:" a set of gabor filter specifications, one spec per filter'"`
	GaborFilters    agabor.FilterSet  `desc:"the actual gabor filters, the first spec determines the size of all filters in the set"`
	GaborTab        etable.Table      `view:"no-inline" desc:"gabor filter table (view only)"`
	GborPoolsX      int               `view:"+" desc:" this values is the number of neuron pools along the time dimension in the input layer"`
	GborPoolsY      int               `view:"+" desc:" this values is the number of neuron pools along the freq dimension in the input layer"`
	GborOutput      etensor.Float32   `view:"no-inline" desc:" raw output of Gabor -- full segment's worth of gabor steps"`
	GborKwta        etensor.Float32   `view:"no-inline" desc:" post-kwta output of full segment's worth of gabor steps"`
	Inhibs          fffb.Inhibs       `view:"no-inline" desc:"inhibition values for A1 KWTA"`
	ExtGi           etensor.Float32   `view:"no-inline" desc:"A1 simple extra Gi from neighbor inhibition tensor"`
	NeighInhib      kwta.NeighInhib   `` /* 155-byte string literal not displayed */
	Kwta            kwta.KWTA         `desc:"kwta parameters, using FFFB form"`
	KwtaPool        bool              `desc:"if Kwta.On == true, call KwtaPool (true) or KwtaLayer (false)"`
	FftCoefs        []complex128      `view:"-" desc:" discrete fourier transform (fft) output complex representation"`
	Fft             *fourier.CmplxFFT `view:"-" desc:" struct for fast fourier transform"`

	// internal state - view:"-"
	FirstStep bool `view:"-" desc:" if first frame to process -- turns off prv smoothing of dft power"`
}

func (*SndEnv) ApplyGabor added in v0.9.7

func (se *SndEnv) ApplyGabor() (tsr *etensor.Float32)

ApplyGabor convolves the gabor filters with the mel output

func (*SndEnv) ApplyKwta added in v0.9.7

func (se *SndEnv) ApplyKwta(ch int)

ApplyKwta runs the kwta algorithm on the raw activations

func (*SndEnv) Defaults added in v0.9.7

func (se *SndEnv) Defaults()

Defaults

func (*SndEnv) Desc added in v0.9.7

func (se *SndEnv) Desc() string

func (*SndEnv) Init added in v0.9.7

func (se *SndEnv) Init(msSilenceAdd, msSilenceRmStart, msSilenceRmEnd float64) (err error, segments int)

Init sets various sound processing params based on default params and user overrides Can pass milliseconds of silence to remove at start and milliseconds at which to cut off sound (to remove silence at end) Can also pass milliseconds of silence to prepend to start of signal if you want some random amount of silence at start for variability

func (*SndEnv) LoadSound added in v0.9.7

func (se *SndEnv) LoadSound() bool

LoadSound

func (*SndEnv) Name added in v0.9.7

func (se *SndEnv) Name() string

func (*SndEnv) Pad added in v0.9.7

func (se *SndEnv) Pad(signal []float32) (padded []float32)

Pad pads the signal so that the length of signal divided by stride has no remainder

func (*SndEnv) ParamDefaults added in v0.9.7

func (se *SndEnv) ParamDefaults()

ParamDefaults initializes the Input

func (*SndEnv) ProcessSegment added in v0.9.7

func (se *SndEnv) ProcessSegment() (moreSegments bool)

ProcessSegment processes the entire segment's input by processing a small overlapping set of samples on each pass

func (*SndEnv) ProcessStep added in v0.9.7

func (se *SndEnv) ProcessStep(ch int, step int) error

ProcessStep processes a step worth of sound input from current input_pos, and increment input_pos by input.step_samples Process the data by doing a fourier transform and computing the power spectrum, then apply mel filters to get the frequency bands that mimic the non-linear human perception of sound

func (*SndEnv) SndToWindow added in v0.9.7

func (se *SndEnv) SndToWindow(stepOffset int, ch int) error

SndToWindow gets sound from the signal (i.e. the slice of input values) at given position and channel, into Window

func (*SndEnv) Tail added in v0.9.7

func (se *SndEnv) Tail(signal []float32) int

Tail returns the number of samples that remain beyond the last full stride

type SoundSampleType

type SoundSampleType int32

type Wave

type Wave struct {
	Buf *audio.IntBuffer
}

func (*Wave) Channels

func (snd *Wave) Channels() int

Channels returns the number of channels in the wav data or 0 is snd is nil

func (*Wave) GetFloatAtIdx

func (snd *Wave) GetFloatAtIdx(buf *audio.IntBuffer, idx int) float32

GetFloatAtIdx

func (*Wave) Load

func (snd *Wave) Load(fn string) error

Load loads the sound file and decodes it

func (*Wave) SampleRate

func (snd *Wave) SampleRate() int

SampleRate returns the sample rate of the sound or 0 is snd is nil

func (*Wave) SampleSize

func (snd *Wave) SampleSize() int

SampleSize returns the sample rate of the sound or 0 is snd is nil

func (*Wave) SampleType

func (snd *Wave) SampleType() SoundSampleType

todo: return to this SampleType

func (*Wave) SoundToTensor

func (snd *Wave) SoundToTensor(samples *etensor.Float32, channel int) bool

SoundToTensor converts sound data to floating point etensor with normalized -1..1 values (unless sound is stored as a float natively, in which case it is not guaranteed to be normalized) -- for use in signal processing routines -- can optionally select a specific channel (formats sound_data as a single-dimensional matrix of frames size), and -1 gets all available channels (formats sound_data as two-dimensional matrix with outer dimension as channels and inner dimension frames

func (*Wave) WriteWave

func (snd *Wave) WriteWave(fn string) error

WriteWave encodes the signal data and writes it to file using the sample rate and other values of the buf object

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL