classifier

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 10, 2025 License: MIT Imports: 11 Imported by: 0

Documentation

Overview

Example (Basic)

Example shows basic usage of the classifier

package main

import (
	"context"
	"fmt"
	"log"

	"github.com/FrenchMajesty/consistent-classifier/pkg/classifier"
)

func main() {
	// Create classifier - no clients provided, rely on defaults with environment variables
	clf, err := classifier.NewClassifier(classifier.Config{})
	if err != nil {
		log.Fatal(err)
	}

	// Classify some text
	result, err := clf.Classify(context.Background(), "Thanks for the help!")
	if err != nil {
		log.Fatal(err)
	}

	fmt.Printf("Label: %s\n", result.Label)
	fmt.Printf("Cache Hit: %v\n", result.CacheHit)
	fmt.Printf("Latency: %v\n", result.UserFacingLatency)

	// Gracefully shutdown and save DSU state
	if err := clf.Close(); err != nil {
		log.Fatal(err)
	}
}
Example (CustomConfig)

Example shows customizing the configuration

package main

import (
	"context"
	"fmt"
	"log"

	"github.com/FrenchMajesty/consistent-classifier/pkg/adapters"
	"github.com/FrenchMajesty/consistent-classifier/pkg/classifier"
)

func main() {
	// Create clients
	embeddingClient, err := adapters.NewVoyageEmbeddingAdapter(nil)
	if err != nil {
		log.Fatal(err)
	}

	vectorClientLabel, err := adapters.NewPineconeVectorAdapter(nil, nil, "my_namespace_label")
	if err != nil {
		log.Fatal(err)
	}

	vectorClientContent, err := adapters.NewPineconeVectorAdapter(nil, nil, "my_namespace_content")
	if err != nil {
		log.Fatal(err)
	}

	llmClient, err := adapters.NewDefaultLLMClient(nil, "", "", "")
	if err != nil {
		log.Fatal(err)
	}

	// Customize configuration with higher similarity threshold
	clf, err := classifier.NewClassifier(classifier.Config{
		EmbeddingClient:      embeddingClient,
		VectorClientLabel:    vectorClientLabel,
		VectorClientContent:  vectorClientContent,
		LLMClient:            llmClient,
		MinSimilarityContent: 0.90, // Higher threshold for cache hits
		MinSimilarityLabel:   0.75, // Lower threshold for cache hits
		DSUPersistence:       classifier.NewFileDSUPersistence("./my_labels.bin"),
	})
	if err != nil {
		log.Fatal(err)
	}

	// Classify text
	result, err := clf.Classify(context.Background(), "How do I install this package?")
	if err != nil {
		log.Fatal(err)
	}

	fmt.Printf("Label: %s\n", result.Label)

	// Get metrics
	metrics := clf.GetMetrics()
	fmt.Printf("Unique Labels: %d\n", metrics.UniqueLabels)
	fmt.Printf("Converged Labels: %d\n", metrics.ConvergedLabels)
	fmt.Printf("Cache Hit Rate: %.2f%%\n", metrics.CacheHitRate)

	// Gracefully shutdown and save state
	if err := clf.Close(); err != nil {
		log.Fatal(err)
	}
}

Index

Examples

Constants

View Source
const (
	// DefaultMinSimilarity is the default threshold for vector similarity matching
	DefaultMinSimilarity = 0.80

	// DefaultDSUFilePath is the default location for DSU state persistence
	DefaultDSUFilePath = "./dsu_state.bin"
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Classifier

type Classifier struct {
	// contains filtered or unexported fields
}

Classifier performs text classification with vector caching and label clustering

func NewClassifier

func NewClassifier(cfg Config) (*Classifier, error)

NewClassifier creates a new Classifier with the given configuration

func (*Classifier) Classify

func (c *Classifier) Classify(ctx context.Context, text string) (*Result, error)

Classify classifies the given text and returns the classification result

func (*Classifier) Close

func (c *Classifier) Close() error

Close gracefully shuts down the classifier, waiting for background tasks to complete and saving the DSU state. It's safe to call Close multiple times.

func (*Classifier) GetMetrics

func (c *Classifier) GetMetrics() Metrics

GetMetrics returns current classification metrics

func (*Classifier) SaveDSU

func (c *Classifier) SaveDSU() error

SaveDSU saves the current DSU state to persistent storage This method is thread-safe and waits for any pending background tasks to complete

type Config

type Config struct {
	// EmbeddingClient generates embeddings for text. If nil, uses the default (Voyage AI).
	EmbeddingClient EmbeddingClient

	// VectorClient performs vector search and storage. If nil, uses the default (Pinecone).
	VectorClientLabel   VectorClient
	VectorClientContent VectorClient

	// LLMClient performs text classification. If nil, uses the default (OpenAI).
	LLMClient LLMClient
	Model     string
	BaseUrl   string

	// DSUPersistence handles loading/saving the label clustering state. If nil, uses file-based persistence at ./dsu_state.bin
	DSUPersistence DisjointSetPersistence

	// MinSimilarity is the threshold for vector similarity matching (0.0 to 1.0). If 0, uses DefaultMinSimilarity.
	MinSimilarityContent float32
	MinSimilarityLabel   float32
}

Config holds configuration for the Classifier

type DisjointSetPersistence

type DisjointSetPersistence interface {
	Load() (*disjoint_set.DSU, error)
	Save(dsu *disjoint_set.DSU) error
}

DisjointSetPersistence handles loading and saving the Disjoint Set Union structure

type EmbeddingClient

type EmbeddingClient interface {
	GenerateEmbedding(ctx context.Context, text string) ([]float32, error)
}

EmbeddingClient generates vector embeddings for text

type FileDSUPersistence

type FileDSUPersistence struct {
	// contains filtered or unexported fields
}

FileDSUPersistence implements DSUPersistence using file-based storage

func NewFileDSUPersistence

func NewFileDSUPersistence(filepath string) *FileDSUPersistence

NewFileDSUPersistence creates a new file-based DSU persistence handler

func (*FileDSUPersistence) Load

func (f *FileDSUPersistence) Load() (*disjoint_set.DSU, error)

Load loads the DSU from the file. If the file doesn't exist, returns a new empty DSU.

func (*FileDSUPersistence) Save

func (f *FileDSUPersistence) Save(dsu *disjoint_set.DSU) error

Save saves the DSU to the file

type LLMClient

type LLMClient interface {
	Classify(ctx context.Context, text string) (string, error)
}

LLMClient classifies text into category labels

type Metrics

type Metrics struct {
	// UniqueLabels is the total number of unique labels seen
	UniqueLabels int

	// ConvergedLabels is the number of distinct label clusters after DSU merging
	ConvergedLabels int

	// CacheHitRate is the percentage of classifications served from cache
	CacheHitRate float32
}

Metrics provides statistics about the classifier's state

type Result

type Result struct {
	// Label is the classification category assigned to the text
	Label string

	// CacheHit indicates whether the classification was retrieved from the vector cache
	CacheHit bool

	// Confidence is the similarity score if cache hit, 0 otherwise
	Confidence float32

	// UserFacingLatency is the time the user waited for the classification
	UserFacingLatency time.Duration

	// BackgroundLatency is the time spent on background tasks (clustering, vector upserts)
	// This is 0 if cache hit, since no background work is needed
	BackgroundLatency time.Duration
}

Result represents the classification result

type VectorClient

type VectorClient interface {
	Search(ctx context.Context, vector []float32, topK int) ([]types.VectorMatch, error)
	Upsert(ctx context.Context, id string, vector []float32, metadata map[string]any) error
}

VectorClient performs vector similarity search and storage operations

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL