Documentation
¶
Index ¶
- func AddPageContextToPrompt(filename, originalPrompt string) string
- func BuildPrompt(base, appendPrompt, filename, format string) string
- func DefaultMessageFormatter(prefix string) func(inProgress, completed, failed, total int64) string
- func NewExpandSinglePdfStage(service *ProviderService) fluxus.StageFunc[*PipelineItem, []*PipelineItem]
- func NewGrayScaleStage(processor image.GrayScaleProcessor) fluxus.StageFunc[*PipelineItem, *PipelineItem]
- func NewLastStageProgressStage(progress *ProgressTracker) fluxus.StageFunc[*PipelineItem, *PipelineItem]
- func NewSingleInputOCRStageWithProgress(ocrFunc ocrFunc, progress *ProgressTracker) fluxus.StageFunc[*PipelineItem, *BatchResult]
- func NewTextCorrectionStage(textCorrectionFunc textCorrectionFunc, progress *ProgressTracker) fluxus.StageFunc[*OCRResult, *OCRResult]
- func StartOCRPipeline(ops []imageio.ImageIO, service *ProviderService) error
- func WithDoclingBaseURL(baseURL string) func(*DoclingClient)
- type BatchResult
- type Config
- type Configurable
- type DoclingCliClient
- type DoclingClient
- type DoclingConvertDocumentResponse
- type DoclingDocumentResponse
- type DoclingHealthResponse
- type DoclingOptions
- type DoclingProcessPayload
- type DoclingProvider
- func (p *DoclingProvider) ApplyOptions() (map[string]string, error)
- func (p *DoclingProvider) GetConfig() Config
- func (p *DoclingProvider) InputToMessages(input OCRInput, ctx context.Context) (*DoclingProcessPayload, error)
- func (p *DoclingProvider) OCR(ctx context.Context, input OCRInput) (*OCRResult, error)
- func (p *DoclingProvider) SupportsPDF() bool
- func (p *DoclingProvider) WithImage(ctx context.Context, input OCRInput) (*DoclingProcessPayload, error)
- func (p *DoclingProvider) WithPDF(ctx context.Context, input OCRInput) (*DoclingProcessPayload, error)
- type GeminiProvider
- type InputType
- type MistralOcrDimensions
- type MistralOcrDocument
- type MistralOcrImage
- type MistralOcrPage
- type MistralOcrResponse
- type MistralOcrResquest
- type MistralProvider
- type OCRImage
- type OCRInput
- type OCRProvider
- func NewDoclingProvider(config Config) (OCRProvider, error)
- func NewGeminiProvider(config Config) (OCRProvider, error)
- func NewMistralProvider(config Config) (OCRProvider, error)
- func NewOCRProvider(config Config) (OCRProvider, error)
- func NewOllamaProvider(config Config) (OCRProvider, error)
- func NewOpenAIProvider(config Config) (OCRProvider, error)
- func NewProvider(providerName, model string, config Config) (OCRProvider, error)
- func NewTesseractProvider(config Config) (OCRProvider, error)
- type OCRResult
- type OllamaMessage
- type OllamaOptions
- type OllamaProvider
- func (o *OllamaProvider) ApplyOptions() (*OllamaOptions, error)
- func (o *OllamaProvider) Complete(ctx context.Context, text string) (string, error)
- func (o *OllamaProvider) GetConfig() Config
- func (o *OllamaProvider) OCR(ctx context.Context, input OCRInput) (*OCRResult, error)
- func (o *OllamaProvider) SupportsPDF() bool
- type OllamaRequest
- type OllamaResponse
- type OpenAIOptions
- type OpenAIProvider
- func (o *OpenAIProvider) ApplyOptions() (*OpenAIOptions, error)
- func (o *OpenAIProvider) Complete(ctx context.Context, text string) (string, error)
- func (o *OpenAIProvider) GetConfig() Config
- func (o *OpenAIProvider) InputToMessages(input OCRInput) ([]openai.ChatCompletionMessageParamUnion, error)
- func (o *OpenAIProvider) OCR(ctx context.Context, input OCRInput) (*OCRResult, error)
- func (o *OpenAIProvider) SupportsPDF() bool
- func (o *OpenAIProvider) WithImage(base64Image string, prompt string) openai.ChatCompletionMessageParamUnion
- func (o *OpenAIProvider) WithPDF(base64PDF string, prompt string) openai.ChatCompletionMessageParamUnion
- type PDFCapable
- type PipelineConfig
- type PipelineItem
- type ProgressTracker
- func NewProgressTracker(config ProgressTrackerConfig) *ProgressTracker
- func WithCustomProgress(total int, formatter func(inProgress, completed, failed, total int64) string) *ProgressTracker
- func WithGenericProgress(total int) *ProgressTracker
- func WithPrefixProgress(total int, prefix string) *ProgressTracker
- func (pt *ProgressTracker) AppendToPrefix(suffix string)
- func (pt *ProgressTracker) GetCounters() (inProgress, completed, failed, total int64)
- func (pt *ProgressTracker) GetPrefix() string
- func (pt *ProgressTracker) IncrementCompleted()
- func (pt *ProgressTracker) IncrementFailed()
- func (pt *ProgressTracker) IncrementInProgress()
- func (pt *ProgressTracker) IsComplete() bool
- func (pt *ProgressTracker) SetPrefix(newPrefix string)
- func (pt *ProgressTracker) SetTotal(newTotal int64)
- func (pt *ProgressTracker) Start()
- func (pt *ProgressTracker) Stop(message string)
- type ProgressTrackerConfig
- type ProviderConfig
- type ProviderOptionsInterface
- type ProviderService
- type RateLimitConfig
- type RateLimiter
- type TesseractClient
- func (c *TesseractClient) ConstructTesseractCommand(ctx context.Context, args []string) *exec.Cmd
- func (c *TesseractClient) IsTesseractInstalled() bool
- func (c *TesseractClient) OCRImageCmd(ctx context.Context, image image.Image, lang string) (*OCRResult, error)
- func (c *TesseractClient) RunTesseractCommand(ctx context.Context, cmd *exec.Cmd, input *bytes.Buffer) (string, error)
- type TesseractProvider
- type TextCorrectionConfig
- type TextProcessor
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func AddPageContextToPrompt ¶
AddPageContextToPrompt enhances the prompt with page-specific context for multi-page documents. It extracts page information from filenames with format "document.pdf-page-2-of-5" or "document.pdf-page-2" and adds appropriate context to help the OCR provider understand the document structure and place headings.
func BuildPrompt ¶
func DefaultMessageFormatter ¶
DefaultMessageFormatter creates a default message formatter with the given prefix
func NewExpandSinglePdfStage ¶
func NewExpandSinglePdfStage(service *ProviderService) fluxus.StageFunc[*PipelineItem, []*PipelineItem]
NewExpandSinglePdfStage creates a PDF expansion stage with the given provider
func NewGrayScaleStage ¶
func NewGrayScaleStage(processor image.GrayScaleProcessor) fluxus.StageFunc[*PipelineItem, *PipelineItem]
NewGrayScaleStage creates a grayscale processing stage with the given processor
func NewLastStageProgressStage ¶
func NewLastStageProgressStage(progress *ProgressTracker) fluxus.StageFunc[*PipelineItem, *PipelineItem]
func NewSingleInputOCRStageWithProgress ¶
func NewSingleInputOCRStageWithProgress(ocrFunc ocrFunc, progress *ProgressTracker) fluxus.StageFunc[*PipelineItem, *BatchResult]
func NewTextCorrectionStage ¶
func NewTextCorrectionStage(textCorrectionFunc textCorrectionFunc, progress *ProgressTracker) fluxus.StageFunc[*OCRResult, *OCRResult]
NewTextCorrectionStage creates a text correction stage for post-processing OCR results
func StartOCRPipeline ¶
func StartOCRPipeline(ops []imageio.ImageIO, service *ProviderService) error
StartOCRPipeline orchestrates the OCR workflow. Accepts the an OCR provider and a list of imageIO operations.
func WithDoclingBaseURL ¶
func WithDoclingBaseURL(baseURL string) func(*DoclingClient)
Types ¶
type BatchResult ¶
type BatchResult struct {
Item *PipelineItem
Result *OCRResult
Error error
}
BatchResult holds the processed result and its corresponding input item, used for stitching results back together in case of PDF files.
func ProcessBatch ¶
func ProcessBatch(ctx context.Context, items []*PipelineItem, ocrFunc ocrFunc, concurrency int, progress *ProgressTracker) ([]*BatchResult, error)
ProcessBatch processes a collection of pipeline items concurrently using a go-fluxus pipeline,has progress tracking built in. 'concurrency' arg controls the throughput of the worker pool 'limiter' arg is used to rate limit the OCR calls 'ocrFunc' arg is a given function to execute for each item
type Config ¶
type Config struct {
OCR ProviderConfig `yaml:"ocr"`
Pipeline PipelineConfig `yaml:"pipeline"`
RateLimit RateLimitConfig `yaml:"rate_limit"`
TextCorrection TextCorrectionConfig `yaml:"text_correction"`
// Provider-specific options that implement the ProviderOptionsInterface
DoclingOptions *DoclingOptions `yaml:"docling_options,omitempty"`
OpenAIOptions *OpenAIOptions `yaml:"openai_options,omitempty"`
OllamaOptions *OllamaOptions `yaml:"ollama_options,omitempty"`
}
type Configurable ¶
type Configurable interface {
GetConfig() Config
}
type DoclingCliClient ¶
DoclingCliClient holds the docling CLI information
func NewDoclingCliClient ¶
func NewDoclingCliClient() *DoclingCliClient
NewDoclingCliClient creates a new CLI client and checks if docling CLI is available
func (*DoclingCliClient) IsAvailable ¶
func (c *DoclingCliClient) IsAvailable() bool
func (*DoclingCliClient) ProcessFile ¶
func (c *DoclingCliClient) ProcessFile(ctx context.Context, fileBytes []byte, filename string, options map[string]string, outputDir string) (*DoclingConvertDocumentResponse, error)
ProcessFile processes a file using the docling CLI
type DoclingClient ¶
func NewDoclingClient ¶
func NewDoclingClient(opts ...func(*DoclingClient)) *DoclingClient
func (*DoclingClient) HealthCheck ¶
func (d *DoclingClient) HealthCheck(ctx context.Context) error
func (*DoclingClient) ProcessFile ¶
func (d *DoclingClient) ProcessFile(ctx context.Context, imageBytes []byte, filename string, options map[string]string) (*DoclingConvertDocumentResponse, error)
type DoclingDocumentResponse ¶
type DoclingHealthResponse ¶
type DoclingHealthResponse struct {
Status string `json:"status"`
}
type DoclingOptions ¶
type DoclingOptions struct {
OCR *bool `yaml:"do_ocr,omitempty"`
Force_OCR *bool `yaml:"force_ocr,omitempty"`
Pipeline string `yaml:"pipeline,omitempty"`
PDF_Backend string `yaml:"pdf_backend,omitempty"`
Abort_On_Error *bool `yaml:"abort_on_error,omitempty"`
Document_Timeout string `yaml:"document_timeout,omitempty"`
Num_Threads string `yaml:"num_threads,omitempty"`
Device string `yaml:"device,omitempty"`
Verbose string `yaml:"verbose,omitempty"`
}
DoclingOptions 3 state bool to overcome go unset,specified false bool fields.
type DoclingProcessPayload ¶
type DoclingProvider ¶
type DoclingProvider struct {
Client *DoclingClient
CliClient *DoclingCliClient
Config Config
}
DoclingProvider implements the OCRProvider interface
func (*DoclingProvider) ApplyOptions ¶
func (p *DoclingProvider) ApplyOptions() (map[string]string, error)
ApplyOptions merges the schema.yml openai specific options with the default options
func (*DoclingProvider) GetConfig ¶
func (p *DoclingProvider) GetConfig() Config
func (*DoclingProvider) InputToMessages ¶
func (p *DoclingProvider) InputToMessages(input OCRInput, ctx context.Context) (*DoclingProcessPayload, error)
func (*DoclingProvider) SupportsPDF ¶
func (p *DoclingProvider) SupportsPDF() bool
func (*DoclingProvider) WithImage ¶
func (p *DoclingProvider) WithImage(ctx context.Context, input OCRInput) (*DoclingProcessPayload, error)
func (*DoclingProvider) WithPDF ¶
func (p *DoclingProvider) WithPDF(ctx context.Context, input OCRInput) (*DoclingProcessPayload, error)
type GeminiProvider ¶
type GeminiProvider struct {
// contains filtered or unexported fields
}
func (*GeminiProvider) GetConfig ¶
func (g *GeminiProvider) GetConfig() Config
func (*GeminiProvider) InputToMessages ¶
func (g *GeminiProvider) InputToMessages(input OCRInput) ([]*genai.Part, error)
type MistralOcrDimensions ¶
type MistralOcrDocument ¶
type MistralOcrImage ¶
type MistralOcrPage ¶
type MistralOcrPage struct {
Index int `json:"index"`
Markdown string `json:"markdown"`
Images []MistralOcrImage `json:"images"`
Dimensions MistralOcrDimensions `json:"dimensions"`
}
type MistralOcrResponse ¶
type MistralOcrResponse struct {
Pages []MistralOcrPage `json:"pages"`
Model string `json:"model"`
UsageInfo map[string]any `json:"usage_info"`
}
type MistralOcrResquest ¶
type MistralOcrResquest struct {
Model string `json:"model"`
ID string `json:"id,omitempty"`
Document MistralOcrDocument `json:"document"`
Pages []int `json:"pages,omitempty"`
IncludeImageBase64 bool `json:"include_image_base64"`
ImageLimit int `json:"image_limit,omitempty"`
ImageMinSize int `json:"image_min_size,omitempty"`
}
Mistral single Image OCR request
type MistralProvider ¶
type MistralProvider struct {
// contains filtered or unexported fields
}
MistralProvider implements the Provider Interface
func (*MistralProvider) GetConfig ¶
func (m *MistralProvider) GetConfig() Config
func (*MistralProvider) InputToMessages ¶
func (m *MistralProvider) InputToMessages(input OCRInput) (MistralOcrResquest, error)
func (*MistralProvider) SupportsPDF ¶
func (m *MistralProvider) SupportsPDF() bool
type OCRImage ¶
type OCRImage struct {
MistralImages []MistralOcrImage
}
type OCRProvider ¶
func NewDoclingProvider ¶
func NewDoclingProvider(config Config) (OCRProvider, error)
func NewGeminiProvider ¶
func NewGeminiProvider(config Config) (OCRProvider, error)
func NewMistralProvider ¶
func NewMistralProvider(config Config) (OCRProvider, error)
func NewOCRProvider ¶
func NewOCRProvider(config Config) (OCRProvider, error)
func NewOllamaProvider ¶
func NewOllamaProvider(config Config) (OCRProvider, error)
func NewOpenAIProvider ¶
func NewOpenAIProvider(config Config) (OCRProvider, error)
NewOpenAIProvider creates a new OpenAI provider
func NewProvider ¶
func NewProvider(providerName, model string, config Config) (OCRProvider, error)
func NewTesseractProvider ¶
func NewTesseractProvider(config Config) (OCRProvider, error)
type OCRResult ¶
func MistralToOCRResult ¶
func MistralToOCRResult(res *MistralOcrResponse) (*OCRResult, error)
MistralToOCRResult maps the external MistralOcrResponse into our internal OCRResult
type OllamaMessage ¶
type OllamaOptions ¶
type OllamaProvider ¶
type OllamaProvider struct {
// contains filtered or unexported fields
}
OllamaProvider implements the Provider Interface
func (*OllamaProvider) ApplyOptions ¶
func (o *OllamaProvider) ApplyOptions() (*OllamaOptions, error)
ApplyOptions merges the schema.yml ollama specific options with the default options
func (*OllamaProvider) GetConfig ¶
func (o *OllamaProvider) GetConfig() Config
func (*OllamaProvider) SupportsPDF ¶
func (o *OllamaProvider) SupportsPDF() bool
type OllamaRequest ¶
type OllamaRequest struct {
Model string `json:"model"`
Messages []OllamaMessage `json:"messages"`
Stream bool `json:"stream"`
Options map[string]any `json:"options"`
}
type OllamaResponse ¶
type OpenAIOptions ¶
type OpenAIProvider ¶
type OpenAIProvider struct {
// contains filtered or unexported fields
}
OpenAIProvider implements the OCRProvider interface
func (*OpenAIProvider) ApplyOptions ¶
func (o *OpenAIProvider) ApplyOptions() (*OpenAIOptions, error)
ApplyOptions merges the schema.yml openai specific options with the default options
func (*OpenAIProvider) GetConfig ¶
func (o *OpenAIProvider) GetConfig() Config
func (*OpenAIProvider) InputToMessages ¶
func (o *OpenAIProvider) InputToMessages(input OCRInput) ([]openai.ChatCompletionMessageParamUnion, error)
func (*OpenAIProvider) SupportsPDF ¶
func (o *OpenAIProvider) SupportsPDF() bool
func (*OpenAIProvider) WithImage ¶
func (o *OpenAIProvider) WithImage(base64Image string, prompt string) openai.ChatCompletionMessageParamUnion
func (*OpenAIProvider) WithPDF ¶
func (o *OpenAIProvider) WithPDF(base64PDF string, prompt string) openai.ChatCompletionMessageParamUnion
type PDFCapable ¶
type PDFCapable interface {
SupportsPDF() bool
}
type PipelineConfig ¶
type PipelineItem ¶
type PipelineItem struct {
Input *OCRInput
OriginalIndex int
PageIndex int // -1 for non-PDF pages or single-page docs
}
PipelineItem tracks an item through the processing pipeline. It maps expanded items (like PDF pages) back to their original source file.
type ProgressTracker ¶
type ProgressTracker struct {
// contains filtered or unexported fields
}
ProgressTracker handles progress tracking with atomic counters and periodic updates
func NewProgressTracker ¶
func NewProgressTracker(config ProgressTrackerConfig) *ProgressTracker
NewProgressTracker creates a new progress tracker
func WithCustomProgress ¶
func WithCustomProgress(total int, formatter func(inProgress, completed, failed, total int64) string) *ProgressTracker
WithCustomProgress creates a progress tracker with custom message formatter
func WithGenericProgress ¶
func WithGenericProgress(total int) *ProgressTracker
WithGenericProgress creates a progress tracker with default settings
func WithPrefixProgress ¶
func WithPrefixProgress(total int, prefix string) *ProgressTracker
WithPrefixProgress creates a progress tracker with a custom prefix
func (*ProgressTracker) AppendToPrefix ¶
func (pt *ProgressTracker) AppendToPrefix(suffix string)
AppendToPrefix concatenates a string to the current prefix (only works with default formatter)
func (*ProgressTracker) GetCounters ¶
func (pt *ProgressTracker) GetCounters() (inProgress, completed, failed, total int64)
func (*ProgressTracker) GetPrefix ¶
func (pt *ProgressTracker) GetPrefix() string
GetPrefix returns the current prefix (only meaningful with default formatter)
func (*ProgressTracker) IncrementCompleted ¶
func (pt *ProgressTracker) IncrementCompleted()
func (*ProgressTracker) IncrementFailed ¶
func (pt *ProgressTracker) IncrementFailed()
func (*ProgressTracker) IncrementInProgress ¶
func (pt *ProgressTracker) IncrementInProgress()
func (*ProgressTracker) IsComplete ¶
func (pt *ProgressTracker) IsComplete() bool
func (*ProgressTracker) SetPrefix ¶
func (pt *ProgressTracker) SetPrefix(newPrefix string)
SetPrefix updates the prefix for the progress tracker (only works with default formatter)
func (*ProgressTracker) SetTotal ¶
func (pt *ProgressTracker) SetTotal(newTotal int64)
func (*ProgressTracker) Start ¶
func (pt *ProgressTracker) Start()
Start begins the progress tracking with periodic updates
func (*ProgressTracker) Stop ¶
func (pt *ProgressTracker) Stop(message string)
type ProgressTrackerConfig ¶
type ProviderConfig ¶
type ProviderOptionsInterface ¶
type ProviderOptionsInterface interface {
// Apply merges default options with schema options and returns a config type the provider can use.
Apply(defaults any, config Config) (any, error)
}
ProviderOptionsInterface allows each provider to define their own options struct
type ProviderService ¶
type ProviderService struct {
// contains filtered or unexported fields
}
ProviderService composes providers with their dependencies
func NewProviderService ¶
func NewProviderService(provider OCRProvider, config Config) *ProviderService
func (*ProviderService) GetConfig ¶
func (s *ProviderService) GetConfig() Config
func (*ProviderService) SupportsPDF ¶
func (s *ProviderService) SupportsPDF() bool
type RateLimitConfig ¶
type RateLimiter ¶
type RateLimiter struct {
// contains filtered or unexported fields
}
func NewRateLimiter ¶
func NewRateLimiter(config RateLimitConfig) *RateLimiter
func (*RateLimiter) IsEnabled ¶
func (r *RateLimiter) IsEnabled() bool
type TesseractClient ¶
type TesseractClient struct{}
func (*TesseractClient) ConstructTesseractCommand ¶
func (*TesseractClient) IsTesseractInstalled ¶
func (c *TesseractClient) IsTesseractInstalled() bool
IsTesseractInstalled checks if tesseract is installed in $PATH
func (*TesseractClient) OCRImageCmd ¶
type TesseractProvider ¶
type TesseractProvider struct {
// contains filtered or unexported fields
}
Impliments the Provider interface
func (*TesseractProvider) GetConfig ¶
func (p *TesseractProvider) GetConfig() Config
type TextCorrectionConfig ¶
type TextCorrectionConfig struct {
Enabled bool `yaml:"enabled"`
Provider ProviderConfig `yaml:"provider"`
RateLimit RateLimitConfig `yaml:"rate_limit"`
}
type TextProcessor ¶
func NewTextCorrectionProvider ¶
func NewTextCorrectionProvider(config Config) (TextProcessor, error)