schema

package
v0.31.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 4, 2026 License: MIT Imports: 4 Imported by: 0

Documentation

Overview

Package schema defines core data structures and interfaces used throughout the goframe library. It provides types for documents, sparse vectors, retrievers, and rerankers.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ChatMessageType

type ChatMessageType string

ChatMessageType represents the role of a message in a conversation.

const (
	// ChatMessageTypeSystem represents a system message that sets behavior.
	ChatMessageTypeSystem ChatMessageType = "system"
	// ChatMessageTypeHuman represents a user message.
	ChatMessageTypeHuman ChatMessageType = "human"
	// ChatMessageTypeAI represents an assistant/AI message.
	ChatMessageTypeAI ChatMessageType = "ai"
	// ChatMessageTypeGeneric represents a generic message type.
	ChatMessageTypeGeneric ChatMessageType = "generic"
	// ChatMessageTypeTool represents a tool result message.
	ChatMessageTypeTool ChatMessageType = "tool"
)

Chat message type constants.

type CodeChunk

type CodeChunk struct {
	// Content is the code content of the chunk.
	Content string `json:"content"`
	// LineStart is the starting line number in the source file.
	LineStart int `json:"lineStart"`
	// LineEnd is the ending line number in the source file.
	LineEnd int `json:"lineEnd"`
	// Type is the chunk type (e.g., "function", "struct", "import").
	Type string `json:"type"`
	// Identifier is the primary identifier of the chunk (e.g., function name).
	Identifier string `json:"identifier"`
	// Annotations contains additional annotations for the chunk.
	Annotations map[string]string `json:"annotations"`
	// TokenCount is the estimated number of tokens in the chunk.
	TokenCount int `json:"tokenCount"`
	// EnrichedContent contains the content with added context.
	EnrichedContent string `json:"enrichedContent"`
	// ParentContext contains context from the parent structure.
	ParentContext string `json:"parentContext"`
	// ContextLevel indicates the nesting level of the context.
	ContextLevel int `json:"contextLevel"`
	// Sparse is an optional sparse vector for hybrid search.
	Sparse *SparseVector `json:"sparse,omitempty"`
	// ParentID uniquely identifies the parent code structure (function/class) this chunk belongs to.
	// Empty for top-level chunks that are not split.
	ParentID string `json:"parentID,omitempty"`
	// FullParentText contains the complete text of the parent structure.
	// WARNING: This can be large. Logic should truncate this before storage.
	FullParentText string `json:"fullParentText,omitempty"`
	// IsDefinition is true if this chunk represents the primary source-of-truth definition of a symbol.
	IsDefinition bool `json:"is_definition"`
	// SymbolType is the category of the symbol (e.g., struct, interface, function).
	SymbolType string `json:"symbol_type"`
}

CodeChunk represents a chunk of code with metadata.

type CodeChunkingOptions

type CodeChunkingOptions struct {
	// ChunkSize is the target size in tokens for each chunk.
	ChunkSize int
	// OverlapTokens is the number of overlapping tokens between chunks.
	OverlapTokens int
	// PreserveStructure attempts to keep related code together.
	PreserveStructure bool
	// LanguageHints provides hints about the language for better chunking.
	LanguageHints []string
	// MaxLinesPerChunk limits the maximum lines per chunk.
	MaxLinesPerChunk int
	// MinCharsPerChunk is the minimum characters required for a valid chunk.
	MinCharsPerChunk int
}

CodeChunkingOptions configures how code is chunked.

type CodeEntityDefinition

type CodeEntityDefinition struct {
	// Type is the entity type (e.g., "function", "struct", "interface").
	Type string `json:"type"`
	// Name is the entity name.
	Name string `json:"name"`
	// LineStart is the starting line number.
	LineStart int `json:"line_start"`
	// LineEnd is the ending line number.
	LineEnd int `json:"line_end"`
	// Visibility is the export visibility (e.g., "public", "private").
	Visibility string `json:"visibility"`
	// Signature is the function/method signature.
	Signature string `json:"signature"`
	// Documentation is the doc comment for the entity.
	Documentation string `json:"documentation"`
}

CodeEntityDefinition represents a code entity definition (function, struct, etc.).

type CodeSymbol

type CodeSymbol struct {
	// Name is the symbol name.
	Name string `json:"name"`
	// Type is the symbol type (e.g., "function", "variable", "type").
	Type string `json:"type"`
	// LineStart is the starting line number.
	LineStart int `json:"line_start"`
	// LineEnd is the ending line number.
	LineEnd int `json:"line_end"`
	// IsExport indicates if the symbol is exported.
	IsExport bool `json:"is_export"`
}

CodeSymbol represents a symbol found in code.

type ContentChoice added in v0.2.0

type ContentChoice struct {
	// Content is the generated text content.
	Content string
	// StopReason is the reason generation stopped (e.g., "stop", "length").
	StopReason string
	// GenerationInfo contains metadata about the generation.
	GenerationInfo map[string]any
	// ReasoningContent contains chain-of-thought reasoning (for models that support it).
	ReasoningContent string
}

ContentChoice represents a single generated response choice.

type ContentPart

type ContentPart interface {
	String() string
	// contains filtered or unexported methods
}

ContentPart represents a part of a message content. Content parts can be text, images, or other multimodal content.

type ContentResponse added in v0.2.0

type ContentResponse struct {
	// Choices contains the generated content choices.
	Choices []*ContentChoice
}

ContentResponse represents the response from an LLM generation call.

type Document

type Document struct {
	// PageContent is the text content of the document.
	PageContent string
	// Metadata contains arbitrary key-value pairs associated with the document.
	Metadata map[string]any
	// Sparse is an optional sparse vector for hybrid search.
	Sparse *SparseVector
}

Document represents a text document with its content and metadata. It is the primary data structure for RAG operations, containing the text content, associated metadata, and optional sparse vector.

func NewDocument

func NewDocument(content string, metadata map[string]any) Document

NewDocument creates a new Document with the given content and metadata. If metadata is nil, an empty map is created.

func (Document) String

func (d Document) String() string

String returns the page content of the document.

type FileMetadata

type FileMetadata struct {
	// FilePath is the path to the file.
	FilePath string `json:"file_path"`
	// Language is the programming language (e.g., "go", "typescript").
	Language string `json:"language"`
	// PackageName is the name of the package/module.
	PackageName string `json:"package_name"`
	// Imports is the list of imported packages/modules.
	Imports []string `json:"imports"`
	// Definitions contains the top-level definitions in the file.
	Definitions []CodeEntityDefinition `json:"definitions"`
	// Symbols contains all symbols found in the file.
	Symbols []CodeSymbol `json:"symbols"`
	// Properties contains additional file properties.
	Properties map[string]string `json:"properties"`
}

FileMetadata contains metadata extracted from a source file.

type ImageContent added in v0.15.0

type ImageContent struct {
	// Data is the base64-encoded image data.
	Data string
	// MimeType is the MIME type of the image (e.g., "image/png", "image/jpeg").
	MimeType string
}

ImageContent represents image content in a message (base64-encoded).

func (ImageContent) String added in v0.15.0

func (ic ImageContent) String() string

String returns a placeholder for image content.

type MessageContent

type MessageContent struct {
	// Role is the role of the message sender.
	Role ChatMessageType
	// Parts contains the content parts of the message.
	Parts []ContentPart
}

MessageContent represents a message in a conversation with a role and content parts.

func NewAIMessage

func NewAIMessage(text string) MessageContent

NewAIMessage creates a new AI/assistant message with the given text.

func NewHumanMessage

func NewHumanMessage(text string) MessageContent

NewHumanMessage creates a new human/user message with the given text.

func NewHumanMessageWithImage added in v0.15.0

func NewHumanMessageWithImage(text string, imageData, mimeType string) MessageContent

NewHumanMessageWithImage creates a human message with text and an image.

func NewSystemMessage

func NewSystemMessage(text string) MessageContent

NewSystemMessage creates a new system message with the given text.

func NewTextMessage

func NewTextMessage(role ChatMessageType, text string) MessageContent

NewTextMessage creates a new message with text content.

func NewToolResultMessage added in v0.15.0

func NewToolResultMessage(toolName, content string) MessageContent

NewToolResultMessage creates a tool result message with the given tool name and content.

func (MessageContent) GetImages added in v0.15.0

func (mc MessageContent) GetImages() []ImageContent

GetImages extracts all images from the message parts.

func (MessageContent) GetTextContent

func (mc MessageContent) GetTextContent() string

GetTextContent returns the text content of the message.

func (MessageContent) String

func (mc MessageContent) String() string

String returns the concatenated text of all content parts.

type ModelDetails

type ModelDetails struct {
	// Family is the model family (e.g., "llama", "gemma").
	Family string
	// ParameterSize is the parameter count (e.g., "7B", "13B").
	ParameterSize string
	// Quantization is the quantization level (e.g., "q4_0", "f16").
	Quantization string
	// Dimension is the embedding dimension of the model.
	Dimension int64
}

ModelDetails contains information about an LLM model.

func (ModelDetails) String

func (md ModelDetails) String() string

String returns a human-readable representation of the model details.

type NoOpReranker added in v0.15.0

type NoOpReranker struct{}

NoOpReranker is a reranker that returns documents in their original order with a constant high score. Use this when reranking is not needed.

func (NoOpReranker) Rerank added in v0.15.0

func (n NoOpReranker) Rerank(_ context.Context, _ string, docs []Document) ([]ScoredDocument, error)

Rerank implements Reranker by returning documents with a constant score of 10.0.

type OutputParser added in v0.15.0

type OutputParser[T any] interface {
	// Parse converts the raw text output into type T.
	Parse(ctx context.Context, text string) (T, error)
}

OutputParser converts raw LLM text output into a structured type. Implementations can parse JSON, CSV, or custom formats into typed values.

type ParserPlugin

type ParserPlugin interface {
	// Name returns the name of the parser (e.g., "go", "typescript").
	Name() string
	// Extensions returns the file extensions this parser handles (e.g., ".go", ".ts").
	Extensions() []string
	// CanHandle returns true if this parser can handle the given file.
	CanHandle(path string, info fs.FileInfo) bool
	// Chunk splits the content into semantic chunks with metadata.
	Chunk(content string, path string, opts *CodeChunkingOptions) ([]CodeChunk, error)
	// ExtractMetadata extracts file-level metadata like package name and imports.
	ExtractMetadata(content string, path string) (FileMetadata, error)
	// IsGenerated returns true if the file appears to be auto-generated.
	IsGenerated(content string, path string) bool
	// ExtractUsedSymbols identifies potential external types/functions
	// being used in the code that might need a definition lookup.
	ExtractUsedSymbols(content string) []string
}

ParserPlugin defines the interface for language-specific code parsing. Implementations handle parsing, chunking, and metadata extraction for specific programming languages or file types.

type Reranker added in v0.15.0

type Reranker interface {
	// Rerank reorders documents by relevance to the query,
	// returning scored documents with explanations.
	Rerank(ctx context.Context, query string, docs []Document) ([]ScoredDocument, error)
}

Reranker is the interface for reranking documents. Implementations reorder documents by relevance to a query, typically using an LLM or cross-encoder model.

type Retriever

type Retriever interface {
	// GetRelevantDocuments returns documents relevant to the query.
	GetRelevantDocuments(ctx context.Context, query string) ([]Document, error)
}

Retriever is the interface for document retrieval. Implementations return documents relevant to a query from a corpus.

type ScoredDocument added in v0.15.0

type ScoredDocument struct {
	Document
	// Score is the relevance score, typically between 0 and 10.
	Score float64
	// Reason contains an explanation for the score, if available.
	Reason string
}

ScoredDocument represents a document with an associated relevance score and optional reasoning for the score, typically produced by a reranker.

type SparseVector added in v0.15.0

type SparseVector struct {
	Indices []uint32  `json:"indices"`
	Values  []float32 `json:"values"`
}

SparseVector represents a sparse vector with indices and values. Sparse vectors are used for hybrid search combining dense embeddings with exact term matching for improved retrieval accuracy.

type StringParser added in v0.15.0

type StringParser struct{}

StringParser is an identity parser that returns the raw LLM output as-is. Use StringParser when no parsing is needed and the raw string is sufficient.

func (StringParser) Parse added in v0.15.0

func (StringParser) Parse(_ context.Context, text string) (string, error)

Parse returns the input text unchanged.

type TextContent

type TextContent struct {
	// Text is the text content.
	Text string
}

TextContent represents text content in a message.

func (TextContent) String

func (tc TextContent) String() string

String returns the text content.

type ToolResultContent added in v0.15.0

type ToolResultContent struct {
	// ToolName is the name of the tool that was executed.
	ToolName string
	// Content is the result of the tool execution.
	Content string
}

ToolResultContent represents a tool execution result in a message.

func (ToolResultContent) String added in v0.15.0

func (trc ToolResultContent) String() string

String returns the tool result content.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL