Documentation
¶
Overview ¶
Package suggest provides fuzzy search and autocomplete functionality
Example ¶
This example demonstrates how to use this package.
package main
import (
"fmt"
"log"
"github.com/finalrep/suggest/pkg/dictionary"
"github.com/finalrep/suggest/pkg/metric"
"github.com/finalrep/suggest/pkg/suggest"
)
func main() {
// we create InMemoryDictionary. Here we can use anything we want,
// for example SqlDictionary, CDBDictionary and so on
dict := dictionary.NewInMemoryDictionary([]string{
"Nissan March",
"Nissan Juke",
"Nissan Maxima",
"Nissan Murano",
"Nissan Note",
"Toyota Mark II",
"Toyota Corolla",
"Toyota Corona",
})
// describe index configuration
indexDescription := suggest.IndexDescription{
Name: "cars", // name of the dictionary
NGramSize: 3, // size of the nGram
Wrap: [2]string{"$", "$"}, // wrap symbols (front and rear)
Pad: "$", // pad to replace with forbidden chars
Alphabet: []string{"english", "$"}, // alphabet of allowed chars (other chars will be replaced with pad symbol)
}
// create runtime search index builder
builder, err := suggest.NewRAMBuilder(dict, indexDescription)
if err != nil {
log.Fatalf("Unexpected error: %v", err)
}
service := suggest.NewService()
// asking our service for adding a new search index with given configuration
if err := service.AddIndex(indexDescription.Name, dict, builder); err != nil {
log.Fatalf("Unexpected error: %v", err)
}
// declare a search configuration (query, topK elements, type of metric, min similarity)
searchConf, err := suggest.NewSearchConfig("niss ma", 5, metric.CosineMetric(), 0.4)
if err != nil {
log.Fatalf("Unexpected error: %v", err)
}
result, err := service.Suggest("cars", searchConf)
if err != nil {
log.Fatalf("Unexpected error: %v", err)
}
values := make([]string, 0, len(result))
for _, item := range result {
values = append(values, item.Value)
}
fmt.Println(values)
}
Output: [Nissan Maxima Nissan March]
Index ¶
- func Index(directory store.Directory, dict dictionary.Dictionary, ...) error
- func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer
- func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer
- type Autocomplete
- type Builder
- type Candidate
- type Collector
- type CollectorManager
- type CollectorManagerFactory
- type Driver
- type FirstKCollectorManager
- type FuzzyCollectorManager
- type IndexDescription
- type NGramIndex
- type ResultItem
- type Scorer
- type SearchConfig
- type Service
- func (s *Service) AddIndex(name string, dict dictionary.Dictionary, builder Builder) error
- func (s *Service) AddIndexByDescription(description IndexDescription) error
- func (s *Service) AddOnDiscIndex(description IndexDescription) error
- func (s *Service) AddRunTimeIndex(description IndexDescription) error
- func (s *Service) Autocomplete(dictName string, query string, limit int) ([]ResultItem, error)
- func (s *Service) GetDictionaries() []string
- func (s *Service) Suggest(dictName string, config SearchConfig) ([]ResultItem, error)
- type Suggester
- type TopKQueue
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Index ¶
func Index( directory store.Directory, dict dictionary.Dictionary, config index.WriterConfig, tokenizer analysis.Tokenizer, ) error
Index builds a search index by using the given config and the dictionary and persists it the directory
func NewAutocompleteTokenizer ¶
func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer
NewAutocompleteTokenizer creates a tokenizer for autocomplete service
func NewSuggestTokenizer ¶
func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer
NewSuggestTokenizer creates a tokenizer for suggester service
Types ¶
type Autocomplete ¶
type Autocomplete interface {
// Autocomplete returns candidates where the query string is a substring of each candidate
Autocomplete(query string, factory CollectorManagerFactory) ([]Candidate, error)
}
Autocomplete provides autocomplete functionality for candidates search
func NewAutocomplete ¶
func NewAutocomplete( indices index.InvertedIndexIndices, searcher index.Searcher, tokenizer analysis.Tokenizer, ) Autocomplete
NewAutocomplete creates a new instance of Autocomplete
type Builder ¶
type Builder interface {
// Build configures and returns a new instance of NGramIndex
Build() (NGramIndex, error)
}
Builder is the entity that is responsible for tuning and creating a NGramIndex
func NewBuilder ¶
func NewBuilder(directory store.Directory, description IndexDescription) (Builder, error)
NewBuilder works with already indexed data
func NewFSBuilder ¶
func NewFSBuilder(description IndexDescription) (Builder, error)
NewFSBuilder works with already indexed data
func NewRAMBuilder ¶
func NewRAMBuilder(dict dictionary.Dictionary, description IndexDescription) (Builder, error)
NewRAMBuilder creates a search index by using the given dictionary and the index description in a RAMDriver directory
type Candidate ¶
type Candidate struct {
// Key is a position (docId) in posting list
Key index.Position
// Score is a float64 number that represents a score of a document
Score float64
}
Candidate is an item of Collector
type Collector ¶
type Collector interface {
merger.Collector
// SetScorer sets a scorer before collection starts
SetScorer(scorer Scorer)
}
Collector collects the doc stream satisfied to a search criteria
type CollectorManager ¶
type CollectorManager interface {
// Create creates a new collector that will be used for a search segment
Create() Collector
// Collect returns back the given collectors.
Collect(collectors ...Collector) error
// GetCandidates returns currently collected candidates.
GetCandidates() []Candidate
}
CollectorManager is responsible for creating collectors and reducing them into the result set
type CollectorManagerFactory ¶
type CollectorManagerFactory func() CollectorManager
CollectorManagerFactory is a factory method for creating a new instance of CollectorManager.
type FirstKCollectorManager ¶
type FirstKCollectorManager struct {
// contains filtered or unexported fields
}
FirstKCollectorManager represents first k collector manager.
func NewFirstKCollectorManager ¶
func NewFirstKCollectorManager(limit int, queue TopKQueue) *FirstKCollectorManager
NewFirstKCollectorManager creates a new instance of CollectorManager with firstK collectors
func (*FirstKCollectorManager) Collect ¶
func (m *FirstKCollectorManager) Collect(collectors ...Collector) error
Collect returns back the given collectors.
func (*FirstKCollectorManager) Create ¶
func (m *FirstKCollectorManager) Create() Collector
Create creates a new collector that will be used for a search segment
func (*FirstKCollectorManager) GetCandidates ¶
func (m *FirstKCollectorManager) GetCandidates() []Candidate
GetCandidates returns currently collected candidates.
type FuzzyCollectorManager ¶
type FuzzyCollectorManager struct {
// contains filtered or unexported fields
}
FuzzyCollectorManager represents fuzzy collector manager.
func NewFuzzyCollectorManager ¶
func NewFuzzyCollectorManager(queueFactory func() TopKQueue) *FuzzyCollectorManager
NewFuzzyCollectorManager creates a new instance of FuzzyCollectorManager.
func (*FuzzyCollectorManager) Collect ¶
func (m *FuzzyCollectorManager) Collect(collectors ...Collector) error
Collect returns back the given collectors.
func (*FuzzyCollectorManager) Create ¶
func (m *FuzzyCollectorManager) Create() Collector
Create creates a new collector that will be used for a search segment
func (*FuzzyCollectorManager) GetCandidates ¶
func (m *FuzzyCollectorManager) GetCandidates() []Candidate
GetCandidates returns currently collected candidates.
func (*FuzzyCollectorManager) GetLowestScore ¶
func (m *FuzzyCollectorManager) GetLowestScore() float64
GetLowestScore returns the lowest collected score.
type IndexDescription ¶
type IndexDescription struct {
Driver Driver `json:"driver"`
Name string `json:"name"`
NGramSize int `json:"nGramSize"`
SourcePath string `json:"source"`
OutputPath string `json:"output"`
Alphabet []string `json:"alphabet"`
Pad string `json:"pad"`
Wrap [2]string `json:"wrap"`
// contains filtered or unexported fields
}
IndexDescription is config for NgramIndex structure
func ReadConfigs ¶
func ReadConfigs(configPath string) ([]IndexDescription, error)
ReadConfigs reads and returns a list of IndexDescription from the given reader
func (*IndexDescription) GetDictionaryFile ¶
func (d *IndexDescription) GetDictionaryFile() string
GetDictionaryFile returns a path to a dictionary file from the configuration
func (*IndexDescription) GetIndexPath ¶
func (d *IndexDescription) GetIndexPath() string
GetIndexPath returns a output path of the built index
func (*IndexDescription) GetIndexTokenizer ¶
func (d *IndexDescription) GetIndexTokenizer() analysis.Tokenizer
GetIndexTokenizer returns a tokenizer for indexing
func (*IndexDescription) GetSourcePath ¶
func (d *IndexDescription) GetSourcePath() string
GetSourcePath returns a source path of the index description
func (*IndexDescription) GetWriterConfig ¶
func (d *IndexDescription) GetWriterConfig() index.WriterConfig
GetWriterConfig creates and returns IndexWriter config from the given index description
type NGramIndex ¶
type NGramIndex interface {
Suggester
Autocomplete
}
NGramIndex is the interface that provides the access to approximate string search and autocomplete
func NewNGramIndex ¶
func NewNGramIndex(suggester Suggester, autocomplete Autocomplete) NGramIndex
NewNGramIndex creates a new instance of NGramIndex
type ResultItem ¶
type ResultItem struct {
// Score is a float64 value of a candidate
Score float64
// Value is a string value of candidate
Value string
}
ResultItem represents element of top-k similar strings in dictionary for given query
type Scorer ¶
type Scorer interface {
// Score returns the score of the given candidate
Score(position merger.MergeCandidate) float64
}
Scorer is responsible for scoring an index position
type SearchConfig ¶
type SearchConfig struct {
// contains filtered or unexported fields
}
SearchConfig is a config for NGramIndex Suggest method
func NewSearchConfig ¶
func NewSearchConfig(query string, topK int, metric metric.Metric, similarity float64) (SearchConfig, error)
NewSearchConfig returns new instance of SearchConfig
type Service ¶
Service provides methods for autocomplete and topK approximate string search
func (*Service) AddIndex ¶
func (s *Service) AddIndex(name string, dict dictionary.Dictionary, builder Builder) error
AddIndex adds an index with the given name, dictionary and builder
func (*Service) AddIndexByDescription ¶
func (s *Service) AddIndexByDescription(description IndexDescription) error
AddIndexByDescription adds a new search index with given description
func (*Service) AddOnDiscIndex ¶
func (s *Service) AddOnDiscIndex(description IndexDescription) error
AddOnDiscIndex adds a new DISC search index with the given description
func (*Service) AddRunTimeIndex ¶
func (s *Service) AddRunTimeIndex(description IndexDescription) error
AddRunTimeIndex adds a new RAM search index with the given description
func (*Service) Autocomplete ¶
Autocomplete returns limit candidates where the query string is a prefix of each candidate
func (*Service) GetDictionaries ¶
GetDictionaries returns the managed list of dictionaries
func (*Service) Suggest ¶
func (s *Service) Suggest(dictName string, config SearchConfig) ([]ResultItem, error)
Suggest returns Top-k approximate strings for the given query in the dict
type Suggester ¶
type Suggester interface {
// Suggest returns top-k similar candidates
Suggest(query string, similarity float64, metric metric.Metric, factory CollectorManagerFactory) ([]Candidate, error)
}
Suggester is the interface that provides the access to approximate string search
func NewSuggester ¶
func NewSuggester( indices index.InvertedIndexIndices, searcher index.Searcher, tokenizer analysis.Tokenizer, ) Suggester
NewSuggester returns a new Suggester instance
type TopKQueue ¶
type TopKQueue interface {
// Add adds item with given position and distance to collection if item belongs to `top k items`
Add(candidate index.Position, score float64)
// GetLowestScore returns the lowest score of the collected candidates. If collection is empty, 0 will be returned
GetLowestScore() float64
// CanTakeWithScore returns true if a candidate with the given score can be accepted
CanTakeWithScore(score float64) bool
// IsFull tells if selector has collected `top k elements`
IsFull() bool
// GetCandidates returns `top k items`
GetCandidates() []Candidate
// Merge merges the given queue with the current
Merge(other TopKQueue)
// Reset resets the given queue with the provided topK
Reset(topK int)
}
TopKQueue is an accumulator that selects the "top k" elements added to it
func NewTopKQueue ¶
NewTopKQueue returns instance of TopKQueue