bbq

package module
v0.0.0-...-8e6904f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 26, 2024 License: Apache-2.0 Imports: 19 Imported by: 0

README

BBQvec Logo

Status license GoDoc Crates.io Go CI Rust CI

BBQvec is an open-source, embedded vector database index for Go and Rust, providing approximate K-nearest-neighbors (aKNN).

Read more about the algorithm on our blog!

Getting Started

Go

package main

import (
  "fmt"

  bbq "github.com/daxe-ai/bbqvec"
)

func main() {
  // Declare store parameters
  dimensions := 200
  nBasis := 10

  // Initialize the store
  backend := bbq.NewMemoryBackend(dimensions)
  datastore, _ := bbq.NewVectorStore(backend, nBasis)

  // Create some test data, 100K random vectors
  vecs := bbq.NewRandVectorSet(100_000, dimensions, nil)
  datastore.AddVectorsWithOffset(0, vecs)
  /*
  Equivalent to:
  for i, v := range vecs {
  datastore.AddVector(bbq.ID(i), v)
  }
  */

  // Run a query
  targetVec := bbq.NewRandVector(dimensions, nil)
  results, _ := datastore.FindNearest(targetVec, 10, 1000, 1)

  // Inspect the results
  top := results.ToSlice()[0]
  vec, _ := backend.GetVector(top.ID)
  fmt.Println(top.ID, vec, top.Similarity)
}

Rust

use bbqvec::IndexIDIterator;

fn main() -> Result<()> {
  // Declare store parameters
  let dimensions = 200;
  let n_basis = 10;

  // Initialize the store
  let mem = bbqvec::MemoryBackend::new(dimensions, n_basis)?;
  let mut store = bbqvec::VectorStore::new(mem)?;

  // Create some test data, 100K random vectors
  let vecs = bbqvec::create_vector_set(dimensions, 100000);
  store.add_vector_iter(vecs.enumerate_ids())?;

  // Run a query
  let target = bbqvec::create_random_vector(dimensions);
  let results = store.find_nearest(&target, 10, 1000, 1)?;

  // Inspect the results
  for res in results.iter_results() {
    println!("{} {}", res.id, res.similarity)
  }
}

TODOs

We're still early; Go is the more tried-and-true and suits the beta use-case, but Rust is a good deal faster. We welcome contributions.

Go

  • More benchmarks
  • New Quantizations
    • Hamming Distance (single-bit vectors)
    • Novel quantizations

Rust

  • Finish disk backend to match Go (in progress, shortly)
  • New Quantizations
Acknowledgements

Thank you to MariaLetta for the free-gophers-pack and to rustacean.net for the CC0 logo characters.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrAlreadyBuilt = errors.New("Already built the index")
	ErrIDNotFound   = errors.New("ID not found")
)

Functions

This section is empty.

Types

type BackendInfo

type BackendInfo struct {
	HasIndexData bool
	Dimensions   int
	Quantization string
}

type Basis

type Basis []Vector

type CountingBitmap

type CountingBitmap struct {
	// contains filtered or unexported fields
}

func NewCountingBitmap

func NewCountingBitmap(maxCount int) *CountingBitmap

func (*CountingBitmap) Or

func (c *CountingBitmap) Or(in *roaring.Bitmap)

func (*CountingBitmap) String

func (c *CountingBitmap) String() string

func (*CountingBitmap) TopK

func (c *CountingBitmap) TopK(k int) *roaring.Bitmap

TopK may return more things than intended

type DiskBackend

type DiskBackend[V any] struct {
	// contains filtered or unexported fields
}

func NewDiskBackend

func NewDiskBackend[V any](directory string, dimensions int, quantization Quantization[V]) (*DiskBackend[V], error)

func (*DiskBackend[V]) Close

func (d *DiskBackend[V]) Close() error

func (*DiskBackend[V]) ComputeSimilarity

func (d *DiskBackend[V]) ComputeSimilarity(targetVector Vector, targetID ID) (float32, error)

func (*DiskBackend[V]) Exists

func (d *DiskBackend[V]) Exists(id ID) bool

func (*DiskBackend[V]) GetVector

func (d *DiskBackend[V]) GetVector(id ID) (v V, err error)

func (*DiskBackend[V]) Info

func (d *DiskBackend[V]) Info() BackendInfo

func (*DiskBackend[V]) LoadBases

func (d *DiskBackend[V]) LoadBases() ([]Basis, error)

func (*DiskBackend[V]) LoadBitmap

func (d *DiskBackend[V]) LoadBitmap(basis int, index int) (*roaring.Bitmap, error)

func (*DiskBackend[V]) PutVector

func (d *DiskBackend[V]) PutVector(id ID, v Vector) error

func (*DiskBackend[V]) SaveBases

func (d *DiskBackend[V]) SaveBases(bases []Basis, token uint64) (uint64, error)

func (*DiskBackend[V]) SaveBitmap

func (d *DiskBackend[V]) SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error

func (*DiskBackend[V]) Sync

func (d *DiskBackend[V]) Sync() error

type Float16Quantization

type Float16Quantization struct {
	// contains filtered or unexported fields
}

func (Float16Quantization) Lower

func (q Float16Quantization) Lower(v Vector) (float16Vec, error)

func (Float16Quantization) LowerSize

func (q Float16Quantization) LowerSize(dim int) int

func (Float16Quantization) Marshal

func (q Float16Quantization) Marshal(to []byte, lower float16Vec) error

func (Float16Quantization) Name

func (q Float16Quantization) Name() string

func (Float16Quantization) Similarity

func (q Float16Quantization) Similarity(x, y float16Vec) float32

func (Float16Quantization) Unmarshal

func (q Float16Quantization) Unmarshal(data []byte) (float16Vec, error)

type ID

type ID uint64

type IndexBackend

type IndexBackend interface {
	SaveBases(bases []Basis, token uint64) (uint64, error)
	LoadBases() ([]Basis, error)

	SaveBitmap(basis int, index int, bitmap *roaring.Bitmap) error
	LoadBitmap(basis, index int) (*roaring.Bitmap, error)
	Sync() error
}

type MemoryBackend

type MemoryBackend struct {
	// contains filtered or unexported fields
}

func NewMemoryBackend

func NewMemoryBackend(dimensions int) *MemoryBackend

func (*MemoryBackend) Close

func (mem *MemoryBackend) Close() error

func (*MemoryBackend) ComputeSimilarity

func (mem *MemoryBackend) ComputeSimilarity(vector Vector, targetID ID) (float32, error)

func (*MemoryBackend) Exists

func (mem *MemoryBackend) Exists(id ID) bool

func (*MemoryBackend) ForEachVector

func (mem *MemoryBackend) ForEachVector(cb func(ID) error) error

func (*MemoryBackend) GetVector

func (mem *MemoryBackend) GetVector(id ID) (Vector, error)

func (*MemoryBackend) Info

func (mem *MemoryBackend) Info() BackendInfo

func (*MemoryBackend) PutVector

func (mem *MemoryBackend) PutVector(id ID, vector Vector) error

type NoQuantization

type NoQuantization struct{}

func (NoQuantization) Lower

func (q NoQuantization) Lower(v Vector) (Vector, error)

func (NoQuantization) LowerSize

func (q NoQuantization) LowerSize(dim int) int

func (NoQuantization) Marshal

func (q NoQuantization) Marshal(to []byte, lower Vector) error

func (NoQuantization) Name

func (q NoQuantization) Name() string

func (NoQuantization) Similarity

func (q NoQuantization) Similarity(x, y Vector) float32

func (NoQuantization) Unmarshal

func (q NoQuantization) Unmarshal(data []byte) (Vector, error)

type PrintfFunc

type PrintfFunc func(string, ...any)

type Quantization

type Quantization[L any] interface {
	Similarity(x, y L) float32
	Lower(v Vector) (L, error)
	Marshal(to []byte, lower L) error
	Unmarshal(data []byte) (L, error)
	Name() string
	LowerSize(dim int) int
}

type QuantizedMemoryBackend

type QuantizedMemoryBackend[V any, Q Quantization[V]] struct {
	// contains filtered or unexported fields
}

func NewQuantizedMemoryBackend

func NewQuantizedMemoryBackend[V any, Q Quantization[V]](dimensions int, quantization Q) *QuantizedMemoryBackend[V, Q]

func (*QuantizedMemoryBackend[V, Q]) Close

func (q *QuantizedMemoryBackend[V, Q]) Close() error

func (*QuantizedMemoryBackend[V, Q]) ComputeSimilarity

func (q *QuantizedMemoryBackend[V, Q]) ComputeSimilarity(vector Vector, targetID ID) (float32, error)

func (*QuantizedMemoryBackend[V, Q]) Exists

func (q *QuantizedMemoryBackend[V, Q]) Exists(id ID) bool

func (*QuantizedMemoryBackend[V, Q]) ForEachVector

func (q *QuantizedMemoryBackend[V, Q]) ForEachVector(cb func(ID) error) error

func (*QuantizedMemoryBackend[V, Q]) GetVector

func (q *QuantizedMemoryBackend[V, Q]) GetVector(id ID) (v V, err error)

func (*QuantizedMemoryBackend[V, Q]) Info

func (q *QuantizedMemoryBackend[V, Q]) Info() BackendInfo

func (*QuantizedMemoryBackend[V, Q]) PutVector

func (q *QuantizedMemoryBackend[V, Q]) PutVector(id ID, vector Vector) error

type Result

type Result struct {
	Similarity float32
	ID         ID
}

func (Result) String

func (r Result) String() string

type ResultSet

type ResultSet struct {
	// contains filtered or unexported fields
}

func FullTableScanSearch

func FullTableScanSearch(be VectorBackend, target Vector, k int) (*ResultSet, error)

func NewResultSet

func NewResultSet(topK int) *ResultSet

func (*ResultSet) AddResult

func (rs *ResultSet) AddResult(id ID, sim float32) bool

func (*ResultSet) ComputeRecall

func (rs *ResultSet) ComputeRecall(baseline *ResultSet, at int) float64

func (*ResultSet) Len

func (rs *ResultSet) Len() int

func (*ResultSet) String

func (rs *ResultSet) String() string

func (*ResultSet) ToSlice

func (rs *ResultSet) ToSlice() []*Result

type Vector

type Vector []float32

func NewRandVector

func NewRandVector(dim int, rng *rand.Rand) Vector

func NewRandVectorSet

func NewRandVectorSet(n int, dim int, rng *rand.Rand) []Vector

func (Vector) Clone

func (v Vector) Clone() Vector

func (Vector) CosineSimilarity

func (v Vector) CosineSimilarity(other Vector) float32

func (Vector) Dimensions

func (v Vector) Dimensions() int

func (Vector) Normalize

func (v Vector) Normalize()

type VectorBackend

type VectorBackend interface {
	PutVector(id ID, v Vector) error
	ComputeSimilarity(targetVector Vector, targetID ID) (float32, error)
	Info() BackendInfo
	Exists(id ID) bool
	Close() error
}

type VectorGetter

type VectorGetter[T any] interface {
	GetVector(id ID) (T, error)
}

type VectorStore

type VectorStore struct {
	// contains filtered or unexported fields
}

func NewVectorStore

func NewVectorStore(backend VectorBackend, nBasis int, opts ...VectorStoreOption) (*VectorStore, error)

func (*VectorStore) AddVector

func (vs *VectorStore) AddVector(id ID, v Vector) error

func (*VectorStore) AddVectorsWithIDs

func (vs *VectorStore) AddVectorsWithIDs(ids []ID, vecs []Vector) error

func (*VectorStore) AddVectorsWithOffset

func (vs *VectorStore) AddVectorsWithOffset(offset ID, vecs []Vector) error

func (*VectorStore) Close

func (vs *VectorStore) Close() error

func (*VectorStore) FindNearest

func (vs *VectorStore) FindNearest(vector Vector, k int, searchk int, spill int) (*ResultSet, error)

func (*VectorStore) SetLogger

func (vs *VectorStore) SetLogger(printf PrintfFunc)

func (*VectorStore) Sync

func (vs *VectorStore) Sync() error

type VectorStoreOption

type VectorStoreOption func(vs *VectorStore) error

func WithPrespill

func WithPrespill(prespill int) VectorStoreOption

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL