docprocessing

package
v0.33.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 26, 2025 License: Apache-2.0 Imports: 25 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// Document processing security limits
	DefaultMaxMemoryLimit             = int64(5 * 1024 * 1024 * 1024) // 5GB default memory limit
	DefaultMaxFileSizeMB              = 100                           // Default file size in MB
	DocProcessingMaxMemoryLimitEnvVar = "DOCLING_MAX_MEMORY_LIMIT"
	DocProcessingMaxFileSizeEnvVar    = "DOCLING_MAX_FILE_SIZE"
)
View Source
const (
	EnvOpenAIAPIBase  = "DOCLING_VLM_API_URL"     // e.g., "https://api.openai.com/v1"
	EnvOpenAIModel    = "DOCLING_VLM_MODEL"       // e.g., "gpt-4-vision-preview"
	EnvOpenAIAPIKey   = "DOCLING_VLM_API_KEY"     // API key for the provider (consistent with VLM naming)
	EnvLLMMaxTokens   = "DOCLING_LLM_MAX_TOKENS"  // Maximum tokens for LLM response (default: 16384)
	EnvLLMTemperature = "DOCLING_LLM_TEMPERATURE" // Temperature for LLM inference (default: 0.1)
	EnvLLMTimeout     = "DOCLING_LLM_TIMEOUT"     // Timeout for LLM requests in seconds (default: 240)

	// Prompt configuration environment variables
	EnvPromptBase         = "DOCLING_LLM_PROMPT_BASE"         // Base prompt for diagram analysis
	EnvPromptFlowchart    = "DOCLING_LLM_PROMPT_FLOWCHART"    // Flowchart-specific prompt
	EnvPromptArchitecture = "DOCLING_LLM_PROMPT_ARCHITECTURE" // Architecture diagram prompt
	EnvPromptChart        = "DOCLING_LLM_PROMPT_CHART"        // Chart analysis prompt
	EnvPromptGeneric      = "DOCLING_LLM_PROMPT_GENERIC"      // Generic diagram prompt
)

Environment variable constants for LLM integration

View Source
const (
	DefaultMaxTokens   = 16384
	DefaultTemperature = 0.1
	DefaultTimeout     = 240
)

Default LLM configuration values

View Source
const (
	// VLM Pipeline Configuration
	EnvVLMAPIURL        = "DOCLING_VLM_API_URL"        // User-provided API endpoint URL (e.g., "http://localhost:1234/v1")
	EnvVLMModel         = "DOCLING_VLM_MODEL"          // Model name/ID (e.g., "gpt-4-vision-preview", "SmolVLM-Instruct")
	EnvVLMAPIKey        = "DOCLING_VLM_API_KEY"        // Authentication key for external APIs
	EnvVLMTimeout       = "DOCLING_VLM_TIMEOUT"        // Request timeout in seconds (default: 240)
	EnvVLMFallbackLocal = "DOCLING_VLM_FALLBACK_LOCAL" // Enable local model fallback (default: true)

	// Image Processing Configuration
	EnvImageScale = "DOCLING_IMAGE_SCALE" // Image resolution scale factor (default: 3.0, range: 1.0-4.0)

	// Performance Optimisation Configuration
	EnvDisablePictureClassification = "DOCLING_DISABLE_PICTURE_CLASSIFICATION" // Disable picture classification to speed up processing (default: false)
	EnvDisablePictureDescription    = "DOCLING_DISABLE_PICTURE_DESCRIPTION"    // Disable picture description to speed up processing (default: false)
	EnvAcceleratorProcesses         = "DOCLING_ACCELERATOR_PROCESSES"          // Number of accelerator processes (default: CPU cores - 1)
)

Environment variable constants for VLM Pipeline integration and image processing

View Source
const (
	DefaultDiagramPrompt = `` /* 514-byte string literal not displayed */

)

Default prompts

Variables

View Source
var SupportedFileTypes = map[string]bool{

	".pdf":  true,
	".docx": true,
	".doc":  true,
	".xlsx": true,
	".xls":  true,
	".pptx": true,
	".ppt":  true,
	".txt":  true,
	".md":   true,
	".rtf":  true,

	".html": true,
	".htm":  true,
	".csv":  true,

	".png":  true,
	".jpg":  true,
	".jpeg": true,
	".gif":  true,
	".bmp":  true,
	".tiff": true,
	".tif":  true,
}

Supported file types for document processing

Functions

func CleanupEmbeddedScripts

func CleanupEmbeddedScripts() error

CleanupEmbeddedScripts removes the temporary directory containing extracted scripts This should be called during graceful shutdown, but the OS will clean up temp files anyway

func GetEmbeddedScriptPath

func GetEmbeddedScriptPath() (string, error)

GetEmbeddedScriptPath extracts the embedded Python scripts to a temporary directory and returns the path to the main docling_processor.py script. This is thread-safe and only extracts once per process.

func IsEmbeddedScriptsAvailable

func IsEmbeddedScriptsAvailable() bool

IsEmbeddedScriptsAvailable checks if the embedded Python scripts are available

func IsLLMConfigured

func IsLLMConfigured() bool

IsLLMConfigured checks if the required environment variables are set

func ReadEmbeddedFile

func ReadEmbeddedFile(path string) ([]byte, error)

ReadEmbeddedFile reads an embedded file and returns its content

Types

type BatchProcessingRequest

type BatchProcessingRequest struct {
	Sources        []string       `json:"sources"`                   // Multiple document sources
	ProcessingMode ProcessingMode `json:"processing_mode,omitempty"` // Processing mode for all documents
	OutputFormat   OutputFormat   `json:"output_format,omitempty"`   // Output format for all documents
	EnableOCR      bool           `json:"enable_ocr,omitempty"`      // Enable OCR for all documents
	OCRLanguages   []string       `json:"ocr_languages,omitempty"`   // OCR languages for all documents
	PreserveImages bool           `json:"preserve_images,omitempty"` // Extract images from all documents
	CacheEnabled   *bool          `json:"cache_enabled,omitempty"`   // Cache setting for all documents
	Timeout        *int           `json:"timeout,omitempty"`         // Timeout for each document
	MaxConcurrency int            `json:"max_concurrency,omitempty"` // Maximum concurrent processing
}

BatchProcessingRequest represents a request to process multiple documents

type BatchProcessingResponse

type BatchProcessingResponse struct {
	Results   []DocumentProcessingResponse `json:"results"`    // Individual processing results
	Summary   BatchSummary                 `json:"summary"`    // Batch processing summary
	TotalTime time.Duration                `json:"total_time"` // Total processing time
	Timestamp time.Time                    `json:"timestamp"`  // Batch processing timestamp
}

BatchProcessingResponse represents the response from batch processing

type BatchSummary

type BatchSummary struct {
	TotalDocuments  int `json:"total_documents"`  // Total number of documents
	SuccessfulCount int `json:"successful_count"` // Number of successfully processed documents
	FailedCount     int `json:"failed_count"`     // Number of failed documents
	CacheHitCount   int `json:"cache_hit_count"`  // Number of cache hits
	TotalPages      int `json:"total_pages"`      // Total pages processed
	TotalWords      int `json:"total_words"`      // Total words processed
	TotalImages     int `json:"total_images"`     // Total images extracted
	TotalTables     int `json:"total_tables"`     // Total tables extracted
}

BatchSummary provides summary statistics for batch processing

type BoundingBox

type BoundingBox struct {
	X      float64 `json:"x"`      // X coordinate (left)
	Y      float64 `json:"y"`      // Y coordinate (top)
	Width  float64 `json:"width"`  // Width
	Height float64 `json:"height"` // Height
}

BoundingBox represents the position and size of an element on a page

type CacheManager

type CacheManager struct {
	// contains filtered or unexported fields
}

CacheManager handles caching of document processing results

func NewCacheManager

func NewCacheManager(config *Config) *CacheManager

NewCacheManager creates a new cache manager

func (*CacheManager) CleanExpired

func (cm *CacheManager) CleanExpired() error

CleanExpired removes expired cache entries

func (*CacheManager) CleanOldFiles

func (cm *CacheManager) CleanOldFiles(maxAge time.Duration) error

CleanOldFiles removes cache files older than the specified duration This is useful for cleaning up files that may not have proper TTL metadata

func (*CacheManager) Clear

func (cm *CacheManager) Clear() error

Clear removes all cached results

func (*CacheManager) ClearFileCache

func (cm *CacheManager) ClearFileCache(source string) error

ClearFileCache removes all cache entries for a specific source file

func (*CacheManager) Delete

func (cm *CacheManager) Delete(cacheKey string) error

Delete removes a cached result

func (*CacheManager) GenerateCacheKey

func (cm *CacheManager) GenerateCacheKey(req *DocumentProcessingRequest) string

GenerateCacheKey generates a cache key for the given request

func (*CacheManager) Get

func (cm *CacheManager) Get(cacheKey string) (*DocumentProcessingResponse, bool)

Get retrieves a cached result if it exists and is valid

func (*CacheManager) GetCacheFilePath

func (cm *CacheManager) GetCacheFilePath(cacheKey string) string

GetCacheFilePath returns the file path for a cache key

func (*CacheManager) GetStats

func (cm *CacheManager) GetStats() (*CacheStats, error)

GetStats returns cache statistics

func (*CacheManager) PerformMaintenance

func (cm *CacheManager) PerformMaintenance(maxAge time.Duration) error

PerformMaintenance performs routine cache maintenance including: - Removing expired entries - Removing old files (older than maxAge)

func (*CacheManager) Set

func (cm *CacheManager) Set(cacheKey string, response *DocumentProcessingResponse) error

Set stores a result in the cache

type CacheStats

type CacheStats struct {
	Enabled      bool   `json:"enabled"`
	Directory    string `json:"directory"`
	TotalFiles   int    `json:"total_files"`
	TotalSize    int64  `json:"total_size"`    // Size in bytes
	ExpiredFiles int    `json:"expired_files"` // Number of expired files
}

CacheStats provides statistics about the cache

type CachedResponse

type CachedResponse struct {
	Response  DocumentProcessingResponse `json:"response"`
	CacheKey  string                     `json:"cache_key"`
	Timestamp time.Time                  `json:"timestamp"`
	TTL       time.Duration              `json:"ttl"` // Time to live
}

CachedResponse represents a cached document processing response

type Config

type Config struct {
	// Python Configuration
	PythonPath string // Path to Python executable with Docling installed

	// Cache Configuration
	CacheDir     string // Directory for caching processed documents
	CacheEnabled bool   // Enable/disable caching

	// Hardware Configuration
	HardwareAcceleration HardwareAcceleration // Hardware acceleration mode

	// Processing Configuration
	Timeout        int   // Processing timeout in seconds
	MaxFileSize    int   // Maximum file size in MB
	MaxMemoryLimit int64 // Maximum memory limit in bytes

	// OCR Configuration
	OCRLanguages []string // Default OCR languages

	// Vision Model Configuration
	VisionModel string // Vision model to use

	// Certificate Configuration
	ExtraCACerts string // Path to additional CA certificates file or directory
}

Config holds the configuration for document processing

func DefaultConfig

func DefaultConfig() *Config

DefaultConfig returns the default configuration

func LoadConfig

func LoadConfig() *Config

LoadConfig loads configuration from environment variables

func (*Config) CleanupTemporaryFiles added in v0.21.2

func (c *Config) CleanupTemporaryFiles() error

CleanupTemporaryFiles performs cleanup of temporary files and directories

func (*Config) EnsureCacheDir

func (c *Config) EnsureCacheDir() error

EnsureCacheDir creates the cache directory if it doesn't exist

func (*Config) GetCertificateEnvironment

func (c *Config) GetCertificateEnvironment() []string

GetCertificateEnvironment returns environment variables for certificate configuration

func (*Config) GetMaxMemoryLimit added in v0.21.2

func (c *Config) GetMaxMemoryLimit() int64

GetMaxMemoryLimit returns the configured maximum memory limit in bytes

func (*Config) GetScriptPath

func (c *Config) GetScriptPath() string

GetScriptPath returns the path to the Python wrapper script

func (*Config) GetSystemInfo

func (c *Config) GetSystemInfo() *SystemInfo

GetSystemInfo returns system information for diagnostics

func (*Config) ResolveHardwareAcceleration

func (c *Config) ResolveHardwareAcceleration() HardwareAcceleration

ResolveHardwareAcceleration resolves the hardware acceleration setting

func (*Config) Validate

func (c *Config) Validate() error

Validate validates the configuration

func (*Config) ValidateCertificates

func (c *Config) ValidateCertificates() error

ValidateCertificates validates the certificate configuration

func (*Config) ValidateFileSize added in v0.21.2

func (c *Config) ValidateFileSize(fileSizeBytes int64) error

ValidateFileSize validates that the file size is within limits

func (*Config) ValidateFileType added in v0.21.2

func (c *Config) ValidateFileType(filePath string) error

ValidateFileType validates that the file type is supported for processing

func (*Config) ValidateMemoryLimit added in v0.21.2

func (c *Config) ValidateMemoryLimit() error

ValidateMemoryLimit validates that memory usage is within limits

type DiagramAnalysis

type DiagramAnalysis struct {
	Description    string           `json:"description"`
	DiagramType    string           `json:"diagram_type"`
	MermaidCode    string           `json:"mermaid_code"`
	Elements       []DiagramElement `json:"elements"`
	Confidence     float64          `json:"confidence"`
	Properties     map[string]any   `json:"properties"`
	ProcessingTime time.Duration    `json:"processing_time"`
	TokenUsage     *TokenUsage      `json:"token_usage,omitempty"` // Token usage from LLM provider (if available)
}

DiagramAnalysis represents the result of LLM-based diagram analysis

type DiagramElement

type DiagramElement struct {
	Type        string       `json:"type"`                   // Element type (text, shape, connector, etc.)
	Content     string       `json:"content,omitempty"`      // Text content of the element
	Position    string       `json:"position,omitempty"`     // Position description within diagram
	BoundingBox *BoundingBox `json:"bounding_box,omitempty"` // Position within the diagram
}

DiagramElement represents a text or structural element within a diagram

type DiagramLLMClient

type DiagramLLMClient struct {
	// contains filtered or unexported fields
}

DiagramLLMClient handles LLM-based diagram analysis using OpenAI API

func NewDiagramLLMClient

func NewDiagramLLMClient() (*DiagramLLMClient, error)

NewDiagramLLMClient creates a new LLM client for diagram analysis using OpenAI API

func (*DiagramLLMClient) AnalyseDiagram

func (c *DiagramLLMClient) AnalyseDiagram(diagram *ExtractedDiagram) (*DiagramAnalysis, error)

AnalyseDiagram performs LLM-based analysis of a diagram

type DocumentMetadata

type DocumentMetadata struct {
	Title        string            `json:"title,omitempty"`         // Document title
	Author       string            `json:"author,omitempty"`        // Document author
	Subject      string            `json:"subject,omitempty"`       // Document subject
	Creator      string            `json:"creator,omitempty"`       // Document creator
	Producer     string            `json:"producer,omitempty"`      // Document producer
	CreationDate *time.Time        `json:"creation_date,omitempty"` // Creation date
	ModifiedDate *time.Time        `json:"modified_date,omitempty"` // Last modified date
	PageCount    int               `json:"page_count,omitempty"`    // Number of pages
	WordCount    int               `json:"word_count,omitempty"`    // Estimated word count
	Language     string            `json:"language,omitempty"`      // Detected language
	Format       string            `json:"format"`                  // Original document format
	FileSize     int64             `json:"file_size,omitempty"`     // File size in bytes
	Properties   map[string]string `json:"properties,omitempty"`    // Additional properties
}

DocumentMetadata contains metadata about the processed document

type DocumentProcessingRequest

type DocumentProcessingRequest struct {
	Source                   string               `json:"source"`                                // File path, URL, or base64 content
	Profile                  ProcessingProfile    `json:"profile,omitempty"`                     // Processing profile (replaces multiple parameters)
	ProcessingMode           ProcessingMode       `json:"processing_mode,omitempty"`             // Processing mode (default: basic)
	OutputFormat             OutputFormat         `json:"output_format,omitempty"`               // Output format (default: markdown)
	EnableOCR                bool                 `json:"enable_ocr,omitempty"`                  // Enable OCR processing
	OCRLanguages             []string             `json:"ocr_languages,omitempty"`               // OCR language codes
	PreserveImages           bool                 `json:"preserve_images,omitempty"`             // Extract and preserve images
	Timeout                  *int                 `json:"timeout,omitempty"`                     // Processing timeout in seconds
	MaxFileSize              *int                 `json:"max_file_size,omitempty"`               // Maximum file size in MB
	ReturnInlineOnly         *bool                `json:"return_inline_only,omitempty"`          // Return content inline in the response only. When false (default), the tool will save the processed content to a file in the same directory as the source file, and also return the content inline.
	SaveTo                   string               `json:"save_to,omitempty"`                     // File path to save content when return_inline_only=false
	ClearFileCache           bool                 `json:"clear_file_cache,omitempty"`            // Force clear all cache entries for this source file before processing
	TableFormerMode          TableFormerMode      `json:"table_former_mode,omitempty"`           // TableFormer processing mode for table structure recognition
	CellMatching             *bool                `json:"cell_matching,omitempty"`               // Control table cell matching (true: use PDF cells, false: use predicted cells)
	VisionMode               VisionProcessingMode `json:"vision_mode,omitempty"`                 // Vision processing mode for enhanced document understanding
	DiagramDescription       bool                 `json:"diagram_description,omitempty"`         // Enable diagram and chart description using vision models
	ChartDataExtraction      bool                 `json:"chart_data_extraction,omitempty"`       // Enable data extraction from charts and graphs
	EnableRemoteServices     bool                 `json:"enable_remote_services,omitempty"`      // Allow communication with external vision model services
	ConvertDiagramsToMermaid bool                 `json:"convert_diagrams_to_mermaid,omitempty"` // Convert detected diagrams to Mermaid syntax using AI vision models
	GenerateDiagrams         bool                 `json:"generate_diagrams,omitempty"`           // Generate enhanced diagram analysis using external LLM (requires DOCLING_VLM_API_URL, DOCLING_VLM_MODEL, DOCLING_VLM_API_KEY environment variables)
	ExtractImages            bool                 `json:"extract_images,omitempty"`              // Extract individual images, charts, and diagrams as base64-encoded data with AI recreation prompts
	Debug                    bool                 `json:"debug,omitempty"`                       // Return debug information including environment variables (secrets masked)
}

DocumentProcessingRequest represents the input parameters for document processing

type DocumentProcessingResponse

type DocumentProcessingResponse struct {
	Source         string             `json:"source"`             // Original source
	Content        string             `json:"content"`            // Processed content (markdown)
	Metadata       *DocumentMetadata  `json:"metadata,omitempty"` // Document metadata
	Images         []ExtractedImage   `json:"images,omitempty"`   // Extracted images
	Tables         []ExtractedTable   `json:"tables,omitempty"`   // Extracted tables
	Diagrams       []ExtractedDiagram `json:"diagrams,omitempty"` // Extracted diagrams
	ProcessingInfo ProcessingInfo     `json:"processing_info"`    // Processing information
	CacheHit       bool               `json:"cache_hit"`          // Whether result came from cache
	Error          string             `json:"error,omitempty"`    // Error message if processing failed
}

DocumentProcessingResponse represents the output from document processing

type DocumentProcessorTool

type DocumentProcessorTool struct {
	// contains filtered or unexported fields
}

DocumentProcessorTool implements document processing using Docling via Python subprocess

func (*DocumentProcessorTool) Definition

func (t *DocumentProcessorTool) Definition() mcp.Tool

Definition returns the MCP tool definition

func (*DocumentProcessorTool) Execute

func (t *DocumentProcessorTool) Execute(ctx context.Context, logger *logrus.Logger, cache *sync.Map, args map[string]any) (*mcp.CallToolResult, error)

Execute processes the document using the Python wrapper

func (*DocumentProcessorTool) ProvideExtendedInfo added in v0.22.0

func (t *DocumentProcessorTool) ProvideExtendedInfo() *tools.ExtendedHelp

ProvideExtendedInfo provides detailed usage information for the document processing tool

type ErrorInfo

type ErrorInfo struct {
	Code        string            `json:"code"`              // Error code
	Message     string            `json:"message"`           // Error message
	Details     string            `json:"details,omitempty"` // Additional error details
	Source      string            `json:"source,omitempty"`  // Source that caused the error
	Timestamp   time.Time         `json:"timestamp"`         // When the error occurred
	Context     map[string]string `json:"context,omitempty"` // Additional context
	Recoverable bool              `json:"recoverable"`       // Whether the error is recoverable
}

ErrorInfo represents detailed error information

type ExtractedDiagram

type ExtractedDiagram struct {
	ID          string           `json:"id"`                     // Unique diagram identifier
	Type        string           `json:"type"`                   // Type of diagram (flowchart, chart, diagram, etc.)
	Caption     string           `json:"caption,omitempty"`      // Diagram caption if available
	Description string           `json:"description,omitempty"`  // Generated description of the diagram
	DiagramType string           `json:"diagram_type,omitempty"` // Classified diagram type (flowchart, chart, etc.)
	MermaidCode string           `json:"mermaid_code,omitempty"` // Generated Mermaid syntax for the diagram
	Base64Data  string           `json:"base64_data,omitempty"`  // Base64-encoded image data for LLM vision processing
	Elements    []DiagramElement `json:"elements,omitempty"`     // Text elements within the diagram
	PageNumber  int              `json:"page_number,omitempty"`  // Page number where diagram appears
	BoundingBox *BoundingBox     `json:"bounding_box,omitempty"` // Position on page
	Confidence  float64          `json:"confidence,omitempty"`   // Confidence score for diagram analysis
	Properties  map[string]any   `json:"properties,omitempty"`   // Additional diagram-specific properties
}

ExtractedDiagram represents a diagram extracted from the document

type ExtractedImage

type ExtractedImage struct {
	ID            string       `json:"id"`                       // Unique image identifier
	Type          string       `json:"type"`                     // Type of image (picture, table, chart, diagram)
	Caption       string       `json:"caption,omitempty"`        // Image caption if available
	AltText       string       `json:"alt_text,omitempty"`       // Alternative text
	Format        string       `json:"format"`                   // Image format (PNG, JPEG, etc.)
	Width         int          `json:"width,omitempty"`          // Image width in pixels
	Height        int          `json:"height,omitempty"`         // Image height in pixels
	Size          int64        `json:"size,omitempty"`           // Image size in bytes
	FilePath      string       `json:"file_path,omitempty"`      // Path to saved image file
	PageNumber    int          `json:"page_number,omitempty"`    // Page number where image appears
	BoundingBox   *BoundingBox `json:"bounding_box,omitempty"`   // Position on page
	ExtractedText []string     `json:"extracted_text,omitempty"` // Text elements extracted from the image
}

ExtractedImage represents an image extracted from the document

type ExtractedTable

type ExtractedTable struct {
	ID          string       `json:"id"`                     // Unique table identifier
	Caption     string       `json:"caption,omitempty"`      // Table caption if available
	Headers     []string     `json:"headers,omitempty"`      // Column headers
	Rows        [][]string   `json:"rows"`                   // Table data rows
	PageNumber  int          `json:"page_number,omitempty"`  // Page number where table appears
	BoundingBox *BoundingBox `json:"bounding_box,omitempty"` // Position on page
	Markdown    string       `json:"markdown,omitempty"`     // Markdown representation
	CSV         string       `json:"csv,omitempty"`          // CSV representation
}

ExtractedTable represents a table extracted from the document

type HardwareAcceleration

type HardwareAcceleration string

HardwareAcceleration defines the hardware acceleration mode

const (
	HardwareAccelerationAuto HardwareAcceleration = "auto" // Auto-detect best option
	HardwareAccelerationMPS  HardwareAcceleration = "mps"  // Metal Performance Shaders (macOS)
	HardwareAccelerationCUDA HardwareAcceleration = "cuda" // CUDA (NVIDIA GPUs)
	HardwareAccelerationCPU  HardwareAcceleration = "cpu"  // CPU-only processing
)

type LLMConfig

type LLMConfig struct {
	Provider string
	Model    string
	APIKey   string
	BaseURL  string
}

LLMConfig contains configuration for the LLM client

type OutputFormat

type OutputFormat string

OutputFormat defines the output format for processed documents

const (
	OutputFormatMarkdown OutputFormat = "markdown" // Markdown output (default)
	OutputFormatJSON     OutputFormat = "json"     // JSON metadata
	OutputFormatBoth     OutputFormat = "both"     // Both markdown and JSON
)

type ProcessingInfo

type ProcessingInfo struct {
	ProcessingMode       ProcessingMode       `json:"processing_mode"`           // Mode used for processing
	ProcessingMethod     string               `json:"processing_method"`         // Concise description of processing method used
	HardwareAcceleration HardwareAcceleration `json:"hardware_acceleration"`     // Hardware acceleration used
	VisionModel          string               `json:"vision_model,omitempty"`    // Vision model used (if any)
	OCREnabled           bool                 `json:"ocr_enabled"`               // Whether OCR was enabled
	OCRLanguages         []string             `json:"ocr_languages,omitempty"`   // OCR languages used
	ProcessingTime       float64              `json:"processing_time"`           // Time taken to process in seconds
	PythonVersion        string               `json:"python_version,omitempty"`  // Python version used
	DoclingVersion       string               `json:"docling_version,omitempty"` // Docling version used
	CacheKey             string               `json:"cache_key,omitempty"`       // Cache key used
	Timestamp            time.Time            `json:"timestamp"`                 // Processing timestamp
	TokenUsage           *TokenUsage          `json:"token_usage,omitempty"`     // Token usage from external LLM (if available)
}

ProcessingInfo contains information about the processing operation

type ProcessingMode

type ProcessingMode string

ProcessingMode defines the type of document processing to perform

const (
	ProcessingModeBasic    ProcessingMode = "basic"    // Fast, code-only processing
	ProcessingModeAdvanced ProcessingMode = "advanced" // Vision model with layout preservation
	ProcessingModeOCR      ProcessingMode = "ocr"      // OCR for scanned documents
	ProcessingModeTables   ProcessingMode = "tables"   // Table extraction focus
	ProcessingModeImages   ProcessingMode = "images"   // Image extraction focus
)

type ProcessingProfile

type ProcessingProfile string

ProcessingProfile defines preset configurations for common document processing scenarios

const (
	ProfileBasic          ProcessingProfile = "basic"           // Text extraction only (fast processing)
	ProfileTextAndImage   ProcessingProfile = "text-and-image"  // Text and image extraction with tables
	ProfileScanned        ProcessingProfile = "scanned"         // OCR-focused processing for scanned documents
	ProfileLLMSmolDocling ProcessingProfile = "llm-smoldocling" // Text and image extraction enhanced with SmolDocling vision model
	ProfileLLMExternal    ProcessingProfile = "llm-external"    // Text and image extraction enhanced with external vision LLM for diagram conversion to Mermaid
)

type SystemInfo

type SystemInfo struct {
	Platform             string                 `json:"platform"`                        // Operating system
	Architecture         string                 `json:"architecture"`                    // CPU architecture
	PythonPath           string                 `json:"python_path,omitempty"`           // Path to Python executable
	PythonVersion        string                 `json:"python_version,omitempty"`        // Python version
	DoclingVersion       string                 `json:"docling_version,omitempty"`       // Docling version
	DoclingAvailable     bool                   `json:"docling_available"`               // Whether Docling is available
	HardwareAcceleration []HardwareAcceleration `json:"hardware_acceleration_available"` // Available acceleration options
	CacheDirectory       string                 `json:"cache_directory,omitempty"`       // Cache directory path
	CacheEnabled         bool                   `json:"cache_enabled"`                   // Whether caching is enabled
	MaxFileSize          int                    `json:"max_file_size"`                   // Maximum file size in MB
	MaxMemoryLimit       int64                  `json:"max_memory_limit"`                // Maximum memory limit in bytes
	DefaultTimeout       int                    `json:"default_timeout"`                 // Default timeout in seconds
}

SystemInfo represents system information for diagnostics

type TableFormerMode

type TableFormerMode string

TableFormerMode defines the TableFormer processing mode for table structure recognition

const (
	TableFormerModeFast     TableFormerMode = "fast"     // Faster but less accurate table processing
	TableFormerModeAccurate TableFormerMode = "accurate" // More accurate but slower table processing (default)
)

type TokenUsage

type TokenUsage struct {
	PromptTokens     int `json:"prompt_tokens,omitempty"`     // Tokens used in prompts
	CompletionTokens int `json:"completion_tokens,omitempty"` // Tokens used in completions
	TotalTokens      int `json:"total_tokens,omitempty"`      // Total tokens used
}

TokenUsage represents token consumption from external LLM providers

type VisionProcessingMode

type VisionProcessingMode string

VisionProcessingMode defines the vision model processing mode for enhanced document understanding

const (
	VisionModeStandard    VisionProcessingMode = "standard"    // Standard vision processing
	VisionModeSmolDocling VisionProcessingMode = "smoldocling" // Compact vision-language model (256M parameters)
	VisionModeAdvanced    VisionProcessingMode = "advanced"    // Advanced vision processing with remote services
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL