Documentation
¶
Overview ¶
Example (Basic) ¶
Example shows basic usage of the classifier
// Create classifier - no clients provided, rely on defaults with environment variables
clf, err := classifier.NewClassifier(classifier.Config{})
if err != nil {
log.Fatal(err)
}
// Classify some text
result, err := clf.Classify(context.Background(), "Thanks for the help!")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Label: %s\n", result.Label)
fmt.Printf("Cache Hit: %v\n", result.CacheHit)
fmt.Printf("Latency: %v\n", result.UserFacingLatency)
// Gracefully shutdown and save DSU state
if err := clf.Close(); err != nil {
log.Fatal(err)
}
Example (CustomConfig) ¶
Example shows customizing the configuration
// Create clients
embeddingClient, err := adapters.NewVoyageEmbeddingAdapter(nil)
if err != nil {
log.Fatal(err)
}
vectorClientLabel, err := adapters.NewPineconeVectorAdapter(nil, nil, "my_namespace_label")
if err != nil {
log.Fatal(err)
}
vectorClientContent, err := adapters.NewPineconeVectorAdapter(nil, nil, "my_namespace_content")
if err != nil {
log.Fatal(err)
}
llmClient, err := adapters.NewDefaultLLMClient(nil, "", "", "", nil)
if err != nil {
log.Fatal(err)
}
// Customize configuration with higher similarity threshold
clf, err := classifier.NewClassifier(classifier.Config{
EmbeddingClient: embeddingClient,
VectorClientLabel: vectorClientLabel,
VectorClientContent: vectorClientContent,
LLMClient: llmClient,
MinSimilarityContent: 0.90, // Higher threshold for cache hits
MinSimilarityLabel: 0.75, // Lower threshold for cache hits
DSUPersistence: classifier.NewFileDSUPersistence("./my_labels.bin"),
})
if err != nil {
log.Fatal(err)
}
// Classify text
result, err := clf.Classify(context.Background(), "How do I install this package?")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Label: %s\n", result.Label)
// Get metrics
metrics := clf.GetMetrics()
fmt.Printf("Unique Labels: %d\n", metrics.UniqueLabels)
fmt.Printf("Converged Labels: %d\n", metrics.ConvergedLabels)
fmt.Printf("Cache Hit Rate: %.2f%%\n", metrics.CacheHitRate)
// Gracefully shutdown and save state
if err := clf.Close(); err != nil {
log.Fatal(err)
}
Index ¶
Examples ¶
Constants ¶
const ( // DefaultMinSimilarity is the default threshold for vector similarity matching DefaultMinSimilarity = 0.80 // DefaultDSUFilePath is the default location for DSU state persistence DefaultDSUFilePath = "./dsu_state.bin" )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Classifier ¶
type Classifier struct {
// contains filtered or unexported fields
}
Classifier performs text classification with vector caching and label clustering
func NewClassifier ¶
func NewClassifier(cfg Config) (*Classifier, error)
NewClassifier creates a new Classifier with the given configuration
func (*Classifier) Classify ¶
Classify classifies the given text and returns the classification result
func (*Classifier) Close ¶
func (c *Classifier) Close() error
Close gracefully shuts down the classifier, waiting for background tasks to complete and saving the DSU state. It's safe to call Close multiple times.
func (*Classifier) GetMetrics ¶
func (c *Classifier) GetMetrics() Metrics
GetMetrics returns current classification metrics
func (*Classifier) SaveDSU ¶
func (c *Classifier) SaveDSU() error
SaveDSU saves the current DSU state to persistent storage This method is thread-safe and waits for any pending background tasks to complete
type Config ¶
type Config struct {
// EmbeddingClient generates embeddings for text. If nil, uses the default (Voyage AI).
EmbeddingClient EmbeddingClient
// VectorClient performs vector search and storage. If nil, uses the default (Pinecone).
VectorClientLabel VectorClient
VectorClientContent VectorClient
// LLMClient performs text classification. If nil, uses the default (OpenAI).
LLMClient LLMClient
Model string
BaseUrl string
Temperature *float32 // Optional temperature for LLM. If nil, uses model default.
// DSUPersistence handles loading/saving the label clustering state. If nil, uses file-based persistence at ./dsu_state.bin
DSUPersistence DisjointSetPersistence
// MinSimilarity is the threshold for vector similarity matching (0.0 to 1.0). If 0, uses DefaultMinSimilarity.
MinSimilarityContent float32
MinSimilarityLabel float32
}
Config holds configuration for the Classifier
type DisjointSetPersistence ¶
type DisjointSetPersistence interface {
Load() (*disjoint_set.DSU, error)
Save(dsu *disjoint_set.DSU) error
}
DisjointSetPersistence handles loading and saving the Disjoint Set Union structure
type EmbeddingClient ¶
type EmbeddingClient interface {
GenerateEmbedding(ctx context.Context, text string) ([]float32, error)
}
EmbeddingClient generates vector embeddings for text
type FileDSUPersistence ¶
type FileDSUPersistence struct {
// contains filtered or unexported fields
}
FileDSUPersistence implements DSUPersistence using file-based storage
func NewFileDSUPersistence ¶
func NewFileDSUPersistence(filepath string) *FileDSUPersistence
NewFileDSUPersistence creates a new file-based DSU persistence handler
func (*FileDSUPersistence) Load ¶
func (f *FileDSUPersistence) Load() (*disjoint_set.DSU, error)
Load loads the DSU from the file. If the file doesn't exist, returns a new empty DSU.
func (*FileDSUPersistence) Save ¶
func (f *FileDSUPersistence) Save(dsu *disjoint_set.DSU) error
Save saves the DSU to the file
type Metrics ¶
type Metrics struct {
// UniqueLabels is the total number of unique labels seen
UniqueLabels int
// ConvergedLabels is the number of distinct label clusters after DSU merging
ConvergedLabels int
// CacheHitRate is the percentage of classifications served from cache
CacheHitRate float32
}
Metrics provides statistics about the classifier's state
type Result ¶
type Result struct {
// Label is the classification category assigned to the text
Label string
// CacheHit indicates whether the classification was retrieved from the vector cache
CacheHit bool
// Confidence is the similarity score if cache hit, 0 otherwise
Confidence float32
// UserFacingLatency is the time the user waited for the classification
UserFacingLatency time.Duration
// BackgroundLatency is the time spent on background tasks (clustering, vector upserts)
// This is 0 if cache hit, since no background work is needed
BackgroundLatency time.Duration
}
Result represents the classification result