Documentation
¶
Index ¶
- Variables
- func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DedupeUpsert(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, ...) (bool, error)
- func GetDefaultDSNs(indexDSN, vectorDSN string) (string, string, bool, error)
- func LogEmbeddingFunc(embeddingFunc vs.EmbeddingFunc) vs.EmbeddingFunc
- type Datastore
- func (s *Datastore) Close() error
- func (s *Datastore) CreateDataset(ctx context.Context, dataset types.Dataset, opts *types.DatasetCreateOpts) error
- func (s *Datastore) DeleteDataset(ctx context.Context, datasetID string) error
- func (s *Datastore) DeleteDocument(ctx context.Context, documentID, datasetID string) error
- func (s *Datastore) DeleteFile(ctx context.Context, datasetID, fileID string) error
- func (s *Datastore) ExportDatasetsToFile(ctx context.Context, path string, datasets ...string) error
- func (s *Datastore) FindFile(ctx context.Context, searchFile types.File) (*types.File, error)
- func (s *Datastore) GetDataset(ctx context.Context, datasetID string, opts *types.DatasetGetOpts) (*types.Dataset, error)
- func (s *Datastore) GetDatasetForDocument(ctx context.Context, documentID string) (*types.Dataset, error)
- func (s *Datastore) GetDocuments(ctx context.Context, datasetID string, where map[string]string, ...) ([]types.Document, error)
- func (s *Datastore) ImportDatasetsFromFile(ctx context.Context, path string, datasets ...string) error
- func (s *Datastore) Ingest(ctx context.Context, datasetID string, filename string, content []byte, ...) ([]string, error)
- func (s *Datastore) ListDatasets(ctx context.Context) ([]types.Dataset, error)
- func (s *Datastore) PruneFiles(ctx context.Context, datasetID string, pathPrefix string, keep []string) ([]types.File, error)
- func (s *Datastore) Retrieve(ctx context.Context, datasetIDs []string, query string, opts RetrieveOpts) (*types.RetrievalResponse, error)
- func (s *Datastore) SimilaritySearch(ctx context.Context, query string, numDocuments int, datasetID string, ...) ([]types2.Document, error)
- func (s *Datastore) UpdateDataset(ctx context.Context, updatedDataset types.Dataset, opts *UpdateDatasetOpts) (*types.Dataset, error)
- type IngestOpts
- type IsDuplicateFunc
- type RetrieveOpts
- type UpdateDatasetOpts
Constants ¶
This section is empty.
Variables ¶
var ErrDBFileNotFound = errors.New("file not found in database")
ErrDBFileNotFound is returned when a file is not found.
var IsDuplicateFuncs = map[string]IsDuplicateFunc{ "file_metadata": DedupeByFileMetadata, "dummy": DummyDedupe, "none": DummyDedupe, "ignore": DummyDedupe, "upsert": DedupeUpsert, }
IsDuplicateFuncs is a map of deduplication functions by name.
Functions ¶
func DedupeByFileMetadata ¶
func DedupeByFileMetadata(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DedupeByFileMetadata is a deduplication function that checks if the document is a duplicate based on the file metadata.
func DedupeUpsert ¶
func DummyDedupe ¶
func DummyDedupe(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
DummyDedupe is a dummy deduplication function that always returns false (i.e. "No Duplicate").
func GetDefaultDSNs ¶
GetDefaultDSNs returns the paths for the datastore and vectorstore databases. In addition, it returns a boolean indicating whether the datastore is an archive.
func LogEmbeddingFunc ¶
func LogEmbeddingFunc(embeddingFunc vs.EmbeddingFunc) vs.EmbeddingFunc
Types ¶
type Datastore ¶
type Datastore struct {
Index index.Index
Vectorstore vectorstore.VectorStore
EmbeddingConfig config.EmbeddingsConfig
EmbeddingModelProvider etypes.EmbeddingModelProvider
}
func NewDatastore ¶
func (*Datastore) CreateDataset ¶
func (*Datastore) DeleteDataset ¶
func (*Datastore) DeleteDocument ¶
func (*Datastore) DeleteFile ¶
func (*Datastore) ExportDatasetsToFile ¶
func (*Datastore) GetDataset ¶
func (*Datastore) GetDatasetForDocument ¶
func (*Datastore) GetDocuments ¶
func (*Datastore) ImportDatasetsFromFile ¶
func (*Datastore) Ingest ¶
func (s *Datastore) Ingest(ctx context.Context, datasetID string, filename string, content []byte, opts IngestOpts) ([]string, error)
Ingest loads a document from a reader and adds it to the dataset.
func (*Datastore) ListDatasets ¶
func (*Datastore) PruneFiles ¶
func (*Datastore) Retrieve ¶
func (s *Datastore) Retrieve(ctx context.Context, datasetIDs []string, query string, opts RetrieveOpts) (*types.RetrievalResponse, error)
func (*Datastore) SimilaritySearch ¶
type IngestOpts ¶
type IngestOpts struct {
FileMetadata *types.FileMetadata
IsDuplicateFuncName string
IsDuplicateFunc IsDuplicateFunc
IngestionFlows []flows.IngestionFlow
ExtraMetadata map[string]any
ReuseEmbeddings bool
ReuseFiles bool
}
type IsDuplicateFunc ¶
type IsDuplicateFunc func(ctx context.Context, d *Datastore, datasetID string, content []byte, opts IngestOpts) (bool, error)
IsDuplicateFunc is a function that determines whether a document is a duplicate or if it should be ingested. The function should return true if the document is a duplicate (and thus should not be ingested) and false otherwise.
type RetrieveOpts ¶
type RetrieveOpts struct {
TopK int
Keywords []string
RetrievalFlow *flows.RetrievalFlow
}
type UpdateDatasetOpts ¶
type UpdateDatasetOpts struct {
ReplaceMedata bool
}
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
lib
|
|
|
Package postprocessors is basically the same as package transformers, but used at a different stage of the RAG pipeline
|
Package postprocessors is basically the same as package transformers, but used at a different stage of the RAG pipeline |