Documentation
¶
Index ¶
Constants ¶
const (
CachePrefix = "weaviate-cache"
)
const (
TenantOffLoadingStatus = "FROZEN"
)
Variables ¶
var ( ErrInvalidTenant = errors.New("invalid tenant status") ErrTenantBelongsToNoNodes = errors.New("tenant belongs to no nodes") )
var ( ErrTenantNotFound = errors.New("cache: tenant not found") ErrTenantDirectoryFound = errors.New("cache: tenant directory not found") )
Functions ¶
This section is empty.
Types ¶
type API ¶
type API struct {
// contains filtered or unexported fields
}
API is the core query API that is transport agnostic (http, grpc, etc).
func NewAPI ¶
func NewAPI( schema SchemaQuerier, lsm *LSMFetcher, vectorizer text2vecbase.TextVectorizer[[]float32], stopwords *stopwords.Detector, config *Config, log logrus.FieldLogger, ) *API
func (*API) Search ¶
func (a *API) Search(ctx context.Context, req *SearchRequest) (*SearchResponse, error)
Search serves vector search over the offloaded tenant on object storage.
type CacheMetrics ¶ added in v1.28.0
type CacheMetrics struct {
// OpsDuration tracks overall duration of each cache operation.
OpsDuration *prometheus.HistogramVec
CacheMiss *prometheus.CounterVec
// UsageCalcDuration tracks `usage()` calls that sits in both read and write path.
UsageCalcDuration prometheus.Histogram
}
CacheMetrics exposes some insights about how cache operations.
func NewCacheMetrics ¶ added in v1.28.0
func NewCacheMetrics(namespace string, reg prometheus.Registerer) *CacheMetrics
type Config ¶
type Config struct {
GRPCListenAddr string `long:"grpc.listen" description:"gRPC address that query node listens at" default:"0.0.0.0:7071"`
SchemaAddr string `long:"schema.addr" description:"address to get schema information" default:"http://0.0.0.0:8080"`
S3URL string `long:"s3.url" description:"s3 URL to query offloaded tenants (e.g: s3://<url>)"`
S3Endpoint string `long:"s3.endpoint" description:"s3 endpoint to if mocking s3 (e.g: via minio)"`
// NOTE(kavi): This `DataPath` makes `querier` statefulset. Depend on disk to serve query reqeust.
// Main rationale is, we first download the objects from object store and put it on local disk
// We need this, to make it work with existing query helpers, where we assume we serve query from
// local disk. This will go away eventually once we start reading from object store into memory directly.
DataPath string `long:"datapath" description:"place to look for tenant data after downloading it from object storage" default:"/tmp/weaviate"`
VectorizerAddr string `long:"vectorize-addr" description:"vectorizer address to be used to vectorize near-text query" default:"0.0.0.0:9999"`
// MetadataGRPCAddress is the host which will be used to connect to the metadata node's gRPC server.
// Note that this should be replaced later to avoid having a single metadata node as a single point of failure.
// If MetadataGRPCAddress is empty, the querier will connect to localhost.
MetadataGRPCAddress string `long:"metadata.grpc.address" description:"metadata grpc address" default:":9050"`
NoCache bool `long:"no-cache" description:"disable disk based cache on query path"`
CacheMaxSizeGB int64 `long:"cache-max-size" description:"max size allocated for cache in GBs. More than this start evicting the cache" default:"20"`
}
Config represents any type of configs and flags to control the querier
type DiskCache ¶ added in v1.28.0
type DiskCache struct {
// contains filtered or unexported fields
}
DiskCache is LRU disk based cache for tenants on-disk data.
func NewDiskCache ¶ added in v1.28.0
func NewDiskCache(basePath string, maxCap int64, metrics *CacheMetrics) *DiskCache
func (*DiskCache) AddTenant ¶ added in v1.28.0
AddTenant is called after having the files in right directory AddTenant checks if file is present on that directory deterministically generated from `collection` & `tenant` & `version` with basePath
func (*DiskCache) BasePath ¶ added in v1.28.0
BasePath returns base directory of cache data. Useful for client to know where to copy the files from upstream when using the cache.
type GRPC ¶
type GRPC struct {
// TODO(kavi): This should go away once we split v1.WeaviateServer into composable v1.Searcher
protocol.UnimplementedWeaviateServer
// contains filtered or unexported fields
}
GRPC transport on top of the query.API.
func NewGRPC ¶
func NewGRPC(api *API, schema SchemaQuerier, log logrus.FieldLogger) *GRPC
func (*GRPC) Search ¶
func (g *GRPC) Search(ctx context.Context, req *protocol.SearchRequest) (*protocol.SearchReply, error)
type LSMCache ¶ added in v1.28.0
type LSMCache interface {
// get tenant
Tenant(collection, tenantID string) (*TenantCache, error)
// put tenant
AddTenant(collection, tenantID string, version int64) error
BasePath() string
}
LSMCache provides caching of tenant's LSM data.
type LSMDownloader ¶ added in v1.28.0
type LSMDownloader interface {
DownloadToPath(ctx context.Context, collection, tenant, nodeName, path string) error
}
LSMDownloader is usually some component that used to download tenant's data.
type LSMFetcher ¶ added in v1.28.0
type LSMFetcher struct {
// contains filtered or unexported fields
}
LSMFetcher fetches the tenant's LSM data using given `upstream` without or without local cache.
func NewLSMFetcher ¶ added in v1.28.0
func NewLSMFetcher(basePath string, upstream LSMDownloader, log logrus.FieldLogger) *LSMFetcher
NewLSMFetcher creates LSMFetcher without cache. Every call is forwarded to given `upstream` to fetch the LSM data for the tenant.
func NewLSMFetcherWithCache ¶ added in v1.28.0
func NewLSMFetcherWithCache(basePath string, upstream LSMDownloader, cache LSMCache, log logrus.FieldLogger) *LSMFetcher
NewLSMFetcherWithCache creates LSMFetcher with cache.
func (*LSMFetcher) Fetch ¶ added in v1.28.0
func (l *LSMFetcher) Fetch(ctx context.Context, node, collection, tenant string, version int64) (*lsmkv.Store, string, error)
Fetch try to fetch LSM data for the given (collection,tenant). It works in following modes 1. If cache is nil, fetch from upstream. 2. If cache is enabled and not found in cache, fetch from upstream. 3. If cache is enabled and found in the cache but local version is outdated, fetch from the upstream and remove old version. 4. if cache is enabled, found in the cache and local version is up to date, return from the cache.
type SchemaQuerier ¶
type SearchRequest ¶
type SearchResponse ¶
type SearchResponse struct {
Results []Result
}
type TenantCache ¶ added in v1.28.0
type TenantCache struct {
Collection string
TenantID string
Version int64
LastAccessed time.Time
// contains filtered or unexported fields
}
func (*TenantCache) AbsolutePath ¶ added in v1.28.0
func (tc *TenantCache) AbsolutePath() string