Documentation
¶
Index ¶
- type Chunk
- type ChunkImage
- type ChunkMatch
- type Community
- type DBStats
- type Document
- type Entity
- type QueryLog
- type Relationship
- type RetrievalResult
- type Store
- func (s *Store) AllEntities(ctx context.Context) ([]Entity, error)
- func (s *Store) AllRelationships(ctx context.Context) ([]Relationship, error)
- func (s *Store) ChunkHasEmbedding(ctx context.Context, chunkID int64) (bool, error)
- func (s *Store) ClearCommunities(ctx context.Context) error
- func (s *Store) Close() error
- func (s *Store) DB() *sql.DB
- func (s *Store) DBStats(ctx context.Context) (*DBStats, error)
- func (s *Store) DeleteDocument(ctx context.Context, id int64) error
- func (s *Store) DeleteDocumentData(ctx context.Context, docID int64) error
- func (s *Store) EmbeddingDim() int
- func (s *Store) FTSSearch(ctx context.Context, query string, limit int) ([]RetrievalResult, error)
- func (s *Store) GetChunksByDocument(ctx context.Context, docID int64) ([]Chunk, error)
- func (s *Store) GetCommunities(ctx context.Context, level int) ([]Community, error)
- func (s *Store) GetCorpusLanguages(ctx context.Context) ([]string, error)
- func (s *Store) GetDocument(ctx context.Context, id int64) (*Document, error)
- func (s *Store) GetDocumentByPath(ctx context.Context, path string) (*Document, error)
- func (s *Store) GetEntitiesByNames(ctx context.Context, names []string) ([]Entity, error)
- func (s *Store) GetImagesByChunkIDs(ctx context.Context, chunkIDs []int64, includeData bool) (map[int64][]ChunkImage, error)
- func (s *Store) GetRelatedEntities(ctx context.Context, entityIDs []int64, limit int) ([]Entity, error)
- func (s *Store) GraphSearch(ctx context.Context, entityIDs []int64, limit int) ([]RetrievalResult, error)
- func (s *Store) InsertChunkImages(ctx context.Context, images []ChunkImage) error
- func (s *Store) InsertChunks(ctx context.Context, chunks []Chunk) ([]int64, error)
- func (s *Store) InsertCommunity(ctx context.Context, c Community) (int64, error)
- func (s *Store) InsertEmbedding(ctx context.Context, chunkID int64, embedding []float32) error
- func (s *Store) InsertRelationship(ctx context.Context, r Relationship) (int64, error)
- func (s *Store) LinkEntityChunk(ctx context.Context, entityID, chunkID int64) error
- func (s *Store) ListDocuments(ctx context.Context) ([]Document, error)
- func (s *Store) LogQuery(ctx context.Context, q QueryLog) error
- func (s *Store) Migrate(ctx context.Context) error
- func (s *Store) SampleChunks(ctx context.Context, n int) ([]Chunk, error)
- func (s *Store) SearchChunksByContent(ctx context.Context, substring string) ([]ChunkMatch, error)
- func (s *Store) SearchEntitiesByNameEN(ctx context.Context, terms []string, limit int) ([]Entity, error)
- func (s *Store) SearchEntitiesByTerms(ctx context.Context, terms []string, limit int) ([]Entity, error)
- func (s *Store) UpdateDocumentLanguage(ctx context.Context, docID int64, language string) error
- func (s *Store) UpdateDocumentParseMethod(ctx context.Context, id int64, method string) error
- func (s *Store) UpdateDocumentStatus(ctx context.Context, id int64, status string) error
- func (s *Store) UpsertDocument(ctx context.Context, doc Document) (int64, error)
- func (s *Store) UpsertEntity(ctx context.Context, e Entity) (int64, error)
- func (s *Store) UpsertEntityAndLink(ctx context.Context, e Entity, chunkID int64) (int64, error)
- func (s *Store) VectorSearch(ctx context.Context, queryEmbedding []float32, k int) ([]RetrievalResult, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Chunk ¶
type Chunk struct {
ID int64 `json:"id"`
DocumentID int64 `json:"document_id"`
ParentChunkID *int64 `json:"parent_chunk_id,omitempty"`
Content string `json:"content"`
ChunkType string `json:"chunk_type"`
Heading string `json:"heading"`
PageNumber int `json:"page_number"`
PositionInDoc int `json:"position_in_doc"`
TokenCount int `json:"token_count"`
Metadata string `json:"metadata,omitempty"`
ContentHash string `json:"content_hash"`
}
Chunk represents a row in the chunks table.
type ChunkImage ¶ added in v0.2.0
type ChunkImage struct {
ID int64 `json:"id"`
ChunkID int64 `json:"chunk_id"`
DocumentID int64 `json:"document_id"`
Caption string `json:"caption,omitempty"`
MIMEType string `json:"mime_type"`
Width int `json:"width"`
Height int `json:"height"`
PageNumber int `json:"page_number"`
Data []byte `json:"data,omitempty"`
}
ChunkImage represents an image associated with a chunk.
type ChunkMatch ¶
type ChunkMatch struct {
ChunkID int64 `json:"chunk_id"`
Heading string `json:"heading"`
PageNumber int `json:"page_number"`
}
ChunkMatch holds the result of a content substring search.
type Community ¶
type Community struct {
ID int64 `json:"id"`
Level int `json:"level"`
Summary string `json:"summary"`
EntityIDs string `json:"entity_ids"` // JSON array
}
Community represents a row in the communities table.
type DBStats ¶
type DBStats struct {
Chunks int `json:"chunks"`
Embeddings int `json:"embeddings"`
Entities int `json:"entities"`
Relationships int `json:"relationships"`
Communities int `json:"communities"`
Documents int `json:"documents"`
}
DBStats holds counts of key database objects.
type Document ¶
type Document struct {
ID int64 `json:"id"`
Path string `json:"path"`
Filename string `json:"filename"`
Format string `json:"format"`
ContentHash string `json:"content_hash"`
ParseMethod string `json:"parse_method"`
Status string `json:"status"`
Metadata string `json:"metadata,omitempty"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
}
Document represents a row in the documents table.
type Entity ¶
type Entity struct {
ID int64 `json:"id"`
Name string `json:"name"`
NameEN string `json:"name_en"`
EntityType string `json:"entity_type"`
Description string `json:"description"`
EmbeddingID *int64 `json:"embedding_id,omitempty"`
Metadata string `json:"metadata,omitempty"`
}
Entity represents a row in the entities table.
type QueryLog ¶
type QueryLog struct {
Query string `json:"query"`
Answer string `json:"answer"`
Confidence float64 `json:"confidence"`
Sources interface{} `json:"sources"`
RetrievalMethod string `json:"retrieval_method"`
ModelUsed string `json:"model_used"`
Rounds int `json:"rounds"`
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
QueryLog represents a row in the query_log table.
type Relationship ¶
type Relationship struct {
ID int64 `json:"id"`
SourceEntityID int64 `json:"source_entity_id"`
TargetEntityID int64 `json:"target_entity_id"`
RelationType string `json:"relation_type"`
Weight float64 `json:"weight"`
Description string `json:"description"`
SourceChunkID *int64 `json:"source_chunk_id,omitempty"`
Metadata string `json:"metadata,omitempty"`
}
Relationship represents a row in the relationships table.
type RetrievalResult ¶
type RetrievalResult struct {
ChunkID int64 `json:"chunk_id"`
DocumentID int64 `json:"document_id"`
Content string `json:"content"`
Heading string `json:"heading"`
ChunkType string `json:"chunk_type"`
PageNumber int `json:"page_number"`
PositionInDoc int `json:"position_in_doc"`
Filename string `json:"filename"`
Path string `json:"path"`
Score float64 `json:"score"`
ChunkMeta string `json:"chunk_metadata,omitempty"`
DocMeta string `json:"doc_metadata,omitempty"`
}
RetrievalResult holds a chunk with its retrieval score and document info.
type Store ¶
type Store struct {
// contains filtered or unexported fields
}
Store wraps the SQLite database for all goreason persistence.
func New ¶
New opens (or creates) a SQLite database at the given path and initialises the schema including sqlite-vec and FTS5 virtual tables.
func (*Store) AllEntities ¶
AllEntities returns every entity in the database.
func (*Store) AllRelationships ¶
func (s *Store) AllRelationships(ctx context.Context) ([]Relationship, error)
AllRelationships returns every relationship in the database.
func (*Store) ChunkHasEmbedding ¶
ChunkHasEmbedding checks if a specific chunk has a vector embedding.
func (*Store) ClearCommunities ¶
ClearCommunities removes all community data.
func (*Store) DBStats ¶
DBStats returns counts of chunks, embeddings, entities, relationships, communities, and documents.
func (*Store) DeleteDocument ¶
DeleteDocument removes a document and cascades to all related data.
func (*Store) DeleteDocumentData ¶
DeleteDocumentData removes all chunks, embeddings, images, and entity data for a document but keeps the document record itself.
func (*Store) EmbeddingDim ¶
EmbeddingDim returns the configured embedding dimension.
func (*Store) GetChunksByDocument ¶
GetChunksByDocument returns all chunks for a given document.
func (*Store) GetCommunities ¶
GetCommunities returns all communities at a given level.
func (*Store) GetCorpusLanguages ¶
GetCorpusLanguages returns the distinct non-null languages across all documents.
func (*Store) GetDocument ¶
GetDocument retrieves a document by ID.
func (*Store) GetDocumentByPath ¶
GetDocumentByPath retrieves a document by its file path.
func (*Store) GetEntitiesByNames ¶
GetEntitiesByNames returns entities matching any of the given names.
func (*Store) GetImagesByChunkIDs ¶ added in v0.2.0
func (s *Store) GetImagesByChunkIDs(ctx context.Context, chunkIDs []int64, includeData bool) (map[int64][]ChunkImage, error)
GetImagesByChunkIDs returns images grouped by chunk ID. When includeData is false, the Data field is left empty to avoid loading BLOBs.
func (*Store) GetRelatedEntities ¶
func (s *Store) GetRelatedEntities(ctx context.Context, entityIDs []int64, limit int) ([]Entity, error)
GetRelatedEntities performs a 1-hop expansion from the given seed entity IDs via the relationships table, returning entities that are directly connected but not already in the seed set. Used by synthesis-mode retrieval to discover semantically distant entities (e.g., from "seguridad y normativa" → "ip54").
func (*Store) GraphSearch ¶
func (s *Store) GraphSearch(ctx context.Context, entityIDs []int64, limit int) ([]RetrievalResult, error)
GraphSearch finds chunks reachable via entity relationships.
func (*Store) InsertChunkImages ¶ added in v0.2.0
func (s *Store) InsertChunkImages(ctx context.Context, images []ChunkImage) error
InsertChunkImages batch-inserts images associated with chunks.
func (*Store) InsertChunks ¶
InsertChunks inserts a batch of chunks and returns their IDs. The chunker assigns temporary position-based IDs; this method remaps ParentChunkID values to the real database IDs as chunks are inserted.
func (*Store) InsertCommunity ¶
InsertCommunity stores a community detection result.
func (*Store) InsertEmbedding ¶
InsertEmbedding stores a vector embedding for a chunk.
func (*Store) InsertRelationship ¶
InsertRelationship creates a relationship between two entities.
func (*Store) LinkEntityChunk ¶
LinkEntityChunk creates a provenance link between an entity and a chunk.
func (*Store) ListDocuments ¶
ListDocuments returns all documents ordered by creation time.
func (*Store) SampleChunks ¶
SampleChunks returns up to n chunks sampled from the database. Used for language detection and other heuristics.
func (*Store) SearchChunksByContent ¶
SearchChunksByContent searches all chunks for a case-insensitive substring match.
func (*Store) SearchEntitiesByNameEN ¶
func (s *Store) SearchEntitiesByNameEN(ctx context.Context, terms []string, limit int) ([]Entity, error)
SearchEntitiesByNameEN finds entities whose English canonical name contains any of the given terms as substrings. Same pattern as SearchEntitiesByTerms but operates on the name_en column for cross-language entity matching.
func (*Store) SearchEntitiesByTerms ¶
func (s *Store) SearchEntitiesByTerms(ctx context.Context, terms []string, limit int) ([]Entity, error)
SearchEntitiesByTerms finds entities whose names contain any of the given terms as substrings. This enables graph search to work when query terms are single words (e.g. "rejected") and entity names are multi-word phrases (e.g. "rechazador de envases"). Results are limited to avoid noise from very short or generic terms.
func (*Store) UpdateDocumentLanguage ¶
UpdateDocumentLanguage sets the detected language for a document.
func (*Store) UpdateDocumentParseMethod ¶
UpdateDocumentParseMethod updates just the parse_method field.
func (*Store) UpdateDocumentStatus ¶
UpdateDocumentStatus updates just the status field.
func (*Store) UpsertDocument ¶
UpsertDocument inserts or updates a document record. Returns the document ID.
func (*Store) UpsertEntity ¶
UpsertEntity inserts or updates an entity. Returns the entity ID.
func (*Store) UpsertEntityAndLink ¶
UpsertEntityAndLink atomically upserts an entity and links it to a chunk in a single transaction, preventing FOREIGN KEY failures from concurrent access.
func (*Store) VectorSearch ¶
func (s *Store) VectorSearch(ctx context.Context, queryEmbedding []float32, k int) ([]RetrievalResult, error)
VectorSearch performs a KNN search returning the top-k nearest chunks.