query

package
v1.28.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2024 License: BSD-3-Clause Imports: 34 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CachePrefix = "weaviate-cache"
)
View Source
const (
	TenantOffLoadingStatus = "FROZEN"
)

Variables

View Source
var (
	ErrInvalidTenant          = errors.New("invalid tenant status")
	ErrTenantBelongsToNoNodes = errors.New("tenant belongs to no nodes")
)
View Source
var (
	ErrTenantNotFound       = errors.New("cache: tenant not found")
	ErrTenantDirectoryFound = errors.New("cache: tenant directory not found")
)

Functions

This section is empty.

Types

type API

type API struct {
	// contains filtered or unexported fields
}

API is the core query API that is transport agnostic (http, grpc, etc).

func NewAPI

func NewAPI(
	schema SchemaQuerier,
	lsm *LSMFetcher,
	vectorizer text2vecbase.TextVectorizer[[]float32],
	stopwords *stopwords.Detector,
	config *Config,
	log logrus.FieldLogger,
) *API

func (*API) Search

func (a *API) Search(ctx context.Context, req *SearchRequest) (*SearchResponse, error)

Search serves vector search over the offloaded tenant on object storage.

type CacheMetrics added in v1.28.0

type CacheMetrics struct {
	// OpsDuration tracks overall duration of each cache operation.
	OpsDuration *prometheus.HistogramVec
	CacheMiss   *prometheus.CounterVec

	// UsageCalcDuration tracks `usage()` calls that sits in both read and write path.
	UsageCalcDuration prometheus.Histogram
}

CacheMetrics exposes some insights about how cache operations.

func NewCacheMetrics added in v1.28.0

func NewCacheMetrics(namespace string, reg prometheus.Registerer) *CacheMetrics

type Config

type Config struct {
	GRPCListenAddr string `long:"grpc.listen" description:"gRPC address that query node listens at" default:"0.0.0.0:7071"`
	SchemaAddr     string `long:"schema.addr" description:"address to get schema information" default:"http://0.0.0.0:8080"`
	S3URL          string `long:"s3.url" description:"s3 URL to query offloaded tenants (e.g: s3://<url>)"`
	S3Endpoint     string `long:"s3.endpoint" description:"s3 endpoint to if mocking s3 (e.g: via minio)"`

	// NOTE(kavi): This `DataPath` makes `querier` statefulset. Depend on disk to serve query reqeust.
	// Main rationale is, we first download the objects from object store and put it on local disk
	// We need this, to make it work with existing query helpers, where we assume we serve query from
	// local disk. This will go away eventually once we start reading from object store into memory directly.
	DataPath string `long:"datapath" description:"place to look for tenant data after downloading it from object storage" default:"/tmp/weaviate"`

	VectorizerAddr string `long:"vectorize-addr" description:"vectorizer address to be used to vectorize near-text query" default:"0.0.0.0:9999"`

	// MetadataGRPCAddress is the host which will be used to connect to the metadata node's gRPC server.
	// Note that this should be replaced later to avoid having a single metadata node as a single point of failure.
	// If MetadataGRPCAddress is empty, the querier will connect to localhost.
	MetadataGRPCAddress string `long:"metadata.grpc.address" description:"metadata grpc address" default:":9050"`

	NoCache        bool  `long:"no-cache" description:"disable disk based cache on query path"`
	CacheMaxSizeGB int64 `long:"cache-max-size" description:"max size allocated for cache in GBs. More than this start evicting the cache" default:"20"`
}

Config represents any type of configs and flags to control the querier

type DiskCache added in v1.28.0

type DiskCache struct {
	// contains filtered or unexported fields
}

DiskCache is LRU disk based cache for tenants on-disk data.

func NewDiskCache added in v1.28.0

func NewDiskCache(basePath string, maxCap int64, metrics *CacheMetrics) *DiskCache

func (*DiskCache) AddTenant added in v1.28.0

func (d *DiskCache) AddTenant(collection, tenantID string, version int64) error

AddTenant is called after having the files in right directory AddTenant checks if file is present on that directory deterministically generated from `collection` & `tenant` & `version` with basePath

func (*DiskCache) BasePath added in v1.28.0

func (d *DiskCache) BasePath() string

BasePath returns base directory of cache data. Useful for client to know where to copy the files from upstream when using the cache.

func (*DiskCache) Tenant added in v1.28.0

func (d *DiskCache) Tenant(collection, tenantID string) (*TenantCache, error)

Tenant returns cached tenant info for given collection and tenantID. Caller can use returned data to get abosolute path to access tenant data.

func (*DiskCache) TenantKey added in v1.28.0

func (d *DiskCache) TenantKey(collection, tenantID string) string

type GRPC

type GRPC struct {

	// TODO(kavi): This should go away once we split v1.WeaviateServer into composable v1.Searcher
	protocol.UnimplementedWeaviateServer
	// contains filtered or unexported fields
}

GRPC transport on top of the query.API.

func NewGRPC

func NewGRPC(api *API, schema SchemaQuerier, log logrus.FieldLogger) *GRPC

func (*GRPC) Search

type LSMCache added in v1.28.0

type LSMCache interface {
	// get tenant
	Tenant(collection, tenantID string) (*TenantCache, error)

	// put tenant
	AddTenant(collection, tenantID string, version int64) error

	BasePath() string
}

LSMCache provides caching of tenant's LSM data.

type LSMDownloader added in v1.28.0

type LSMDownloader interface {
	DownloadToPath(ctx context.Context, collection, tenant, nodeName, path string) error
}

LSMDownloader is usually some component that used to download tenant's data.

type LSMFetcher added in v1.28.0

type LSMFetcher struct {
	// contains filtered or unexported fields
}

LSMFetcher fetches the tenant's LSM data using given `upstream` without or without local cache.

func NewLSMFetcher added in v1.28.0

func NewLSMFetcher(basePath string, upstream LSMDownloader, log logrus.FieldLogger) *LSMFetcher

NewLSMFetcher creates LSMFetcher without cache. Every call is forwarded to given `upstream` to fetch the LSM data for the tenant.

func NewLSMFetcherWithCache added in v1.28.0

func NewLSMFetcherWithCache(basePath string, upstream LSMDownloader, cache LSMCache, log logrus.FieldLogger) *LSMFetcher

NewLSMFetcherWithCache creates LSMFetcher with cache.

func (*LSMFetcher) Fetch added in v1.28.0

func (l *LSMFetcher) Fetch(ctx context.Context, node, collection, tenant string, version int64) (*lsmkv.Store, string, error)

Fetch try to fetch LSM data for the given (collection,tenant). It works in following modes 1. If cache is nil, fetch from upstream. 2. If cache is enabled and not found in cache, fetch from upstream. 3. If cache is enabled and found in the cache but local version is outdated, fetch from the upstream and remove old version. 4. if cache is enabled, found in the cache and local version is up to date, return from the cache.

type Result

type Result struct {
	Obj       *storobj.Object
	Certainty float64
}

type SchemaQuerier

type SchemaQuerier interface {
	// TenantStatus returns (STATUS, BELONGSTONODES, VERSION) tuple
	TenantStatus(ctx context.Context, collection, tenant string) (string, []string, int64, error)
	Collection(ctx context.Context, collection string) (*models.Class, error)
}

type SearchRequest

type SearchRequest struct {
	Collection string
	// if class is not nil, try avoid getting the class again from schema
	Class     *models.Class
	Tenant    string
	NearText  []string // vector search
	Certainty float64  // threshold to match with certainty of vectors match
	Limit     int
	Filters   *filters.LocalFilter
}

type SearchResponse

type SearchResponse struct {
	Results []Result
}

type TenantCache added in v1.28.0

type TenantCache struct {
	Collection   string
	TenantID     string
	Version      int64
	LastAccessed time.Time
	// contains filtered or unexported fields
}

func (*TenantCache) AbsolutePath added in v1.28.0

func (tc *TenantCache) AbsolutePath() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL