docprocessing

package

v0.33.2 Latest Latest Go to latest Published: Aug 26, 2025 License: Apache-2.0 Imports: 25 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/sammcj/mcp-devtools

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CleanupEmbeddedScripts() error
func GetEmbeddedScriptPath() (string, error)
func IsEmbeddedScriptsAvailable() bool
func IsLLMConfigured() bool
func ReadEmbeddedFile(path string) ([]byte, error)
type BatchProcessingRequest
type BatchProcessingResponse
type BatchSummary
type BoundingBox
type CacheManager
- func NewCacheManager(config *Config) *CacheManager
- func (cm *CacheManager) CleanExpired() error
- func (cm *CacheManager) CleanOldFiles(maxAge time.Duration) error
- func (cm *CacheManager) Clear() error
- func (cm *CacheManager) ClearFileCache(source string) error
- func (cm *CacheManager) Delete(cacheKey string) error
- func (cm *CacheManager) GenerateCacheKey(req *DocumentProcessingRequest) string
- func (cm *CacheManager) Get(cacheKey string) (*DocumentProcessingResponse, bool)
- func (cm *CacheManager) GetCacheFilePath(cacheKey string) string
- func (cm *CacheManager) GetStats() (*CacheStats, error)
- func (cm *CacheManager) PerformMaintenance(maxAge time.Duration) error
- func (cm *CacheManager) Set(cacheKey string, response *DocumentProcessingResponse) error
type CacheStats
type CachedResponse
type Config
- func DefaultConfig() *Config
- func LoadConfig() *Config
- func (c *Config) CleanupTemporaryFiles() error
- func (c *Config) EnsureCacheDir() error
- func (c *Config) GetCertificateEnvironment() []string
- func (c *Config) GetMaxMemoryLimit() int64
- func (c *Config) GetScriptPath() string
- func (c *Config) GetSystemInfo() *SystemInfo
- func (c *Config) ResolveHardwareAcceleration() HardwareAcceleration
- func (c *Config) Validate() error
- func (c *Config) ValidateCertificates() error
- func (c *Config) ValidateFileSize(fileSizeBytes int64) error
- func (c *Config) ValidateFileType(filePath string) error
- func (c *Config) ValidateMemoryLimit() error
type DiagramAnalysis
type DiagramElement
type DiagramLLMClient
- func NewDiagramLLMClient() (*DiagramLLMClient, error)
- func (c *DiagramLLMClient) AnalyseDiagram(diagram *ExtractedDiagram) (*DiagramAnalysis, error)
type DocumentMetadata
type DocumentProcessingRequest
type DocumentProcessingResponse
type DocumentProcessorTool
- func (t *DocumentProcessorTool) Definition() mcp.Tool
- func (t *DocumentProcessorTool) Execute(ctx context.Context, logger *logrus.Logger, cache *sync.Map, ...) (*mcp.CallToolResult, error)
- func (t *DocumentProcessorTool) ProvideExtendedInfo() *tools.ExtendedHelp
type ErrorInfo
type ExtractedDiagram
type ExtractedImage
type ExtractedTable
type HardwareAcceleration
type LLMConfig
type OutputFormat
type ProcessingInfo
type ProcessingMode
type ProcessingProfile
type SystemInfo
type TableFormerMode
type TokenUsage
type VisionProcessingMode

Constants ¶

View Source

const (
	// Document processing security limits
	DefaultMaxMemoryLimit             = int64(5 * 1024 * 1024 * 1024) // 5GB default memory limit
	DefaultMaxFileSizeMB              = 100                           // Default file size in MB
	DocProcessingMaxMemoryLimitEnvVar = "DOCLING_MAX_MEMORY_LIMIT"
	DocProcessingMaxFileSizeEnvVar    = "DOCLING_MAX_FILE_SIZE"
)

View Source

const (
	EnvOpenAIAPIBase  = "DOCLING_VLM_API_URL"     // e.g., "https://api.openai.com/v1"
	EnvOpenAIModel    = "DOCLING_VLM_MODEL"       // e.g., "gpt-4-vision-preview"
	EnvOpenAIAPIKey   = "DOCLING_VLM_API_KEY"     // API key for the provider (consistent with VLM naming)
	EnvLLMMaxTokens   = "DOCLING_LLM_MAX_TOKENS"  // Maximum tokens for LLM response (default: 16384)
	EnvLLMTemperature = "DOCLING_LLM_TEMPERATURE" // Temperature for LLM inference (default: 0.1)
	EnvLLMTimeout     = "DOCLING_LLM_TIMEOUT"     // Timeout for LLM requests in seconds (default: 240)

	// Prompt configuration environment variables
	EnvPromptBase         = "DOCLING_LLM_PROMPT_BASE"         // Base prompt for diagram analysis
	EnvPromptFlowchart    = "DOCLING_LLM_PROMPT_FLOWCHART"    // Flowchart-specific prompt
	EnvPromptArchitecture = "DOCLING_LLM_PROMPT_ARCHITECTURE" // Architecture diagram prompt
	EnvPromptChart        = "DOCLING_LLM_PROMPT_CHART"        // Chart analysis prompt
	EnvPromptGeneric      = "DOCLING_LLM_PROMPT_GENERIC"      // Generic diagram prompt
)

Environment variable constants for LLM integration

View Source

const (
	DefaultMaxTokens   = 16384
	DefaultTemperature = 0.1
	DefaultTimeout     = 240
)

Default LLM configuration values

View Source

const (
	// VLM Pipeline Configuration
	EnvVLMAPIURL        = "DOCLING_VLM_API_URL"        // User-provided API endpoint URL (e.g., "http://localhost:1234/v1")
	EnvVLMModel         = "DOCLING_VLM_MODEL"          // Model name/ID (e.g., "gpt-4-vision-preview", "SmolVLM-Instruct")
	EnvVLMAPIKey        = "DOCLING_VLM_API_KEY"        // Authentication key for external APIs
	EnvVLMTimeout       = "DOCLING_VLM_TIMEOUT"        // Request timeout in seconds (default: 240)
	EnvVLMFallbackLocal = "DOCLING_VLM_FALLBACK_LOCAL" // Enable local model fallback (default: true)

	// Image Processing Configuration
	EnvImageScale = "DOCLING_IMAGE_SCALE" // Image resolution scale factor (default: 3.0, range: 1.0-4.0)

	// Performance Optimisation Configuration
	EnvDisablePictureClassification = "DOCLING_DISABLE_PICTURE_CLASSIFICATION" // Disable picture classification to speed up processing (default: false)
	EnvDisablePictureDescription    = "DOCLING_DISABLE_PICTURE_DESCRIPTION"    // Disable picture description to speed up processing (default: false)
	EnvAcceleratorProcesses         = "DOCLING_ACCELERATOR_PROCESSES"          // Number of accelerator processes (default: CPU cores - 1)
)

Environment variable constants for VLM Pipeline integration and image processing

View Source

const (
	DefaultDiagramPrompt = `` /* 514-byte string literal not displayed */

)

Default prompts

Variables ¶

View Source

var SupportedFileTypes = map[string]bool{

	".pdf":  true,
	".docx": true,
	".doc":  true,
	".xlsx": true,
	".xls":  true,
	".pptx": true,
	".ppt":  true,
	".txt":  true,
	".md":   true,
	".rtf":  true,

	".html": true,
	".htm":  true,
	".csv":  true,

	".png":  true,
	".jpg":  true,
	".jpeg": true,
	".gif":  true,
	".bmp":  true,
	".tiff": true,
	".tif":  true,
}

Supported file types for document processing

Functions ¶

func CleanupEmbeddedScripts ¶

func CleanupEmbeddedScripts() error

CleanupEmbeddedScripts removes the temporary directory containing extracted scripts This should be called during graceful shutdown, but the OS will clean up temp files anyway

func GetEmbeddedScriptPath ¶

func GetEmbeddedScriptPath() (string, error)

GetEmbeddedScriptPath extracts the embedded Python scripts to a temporary directory and returns the path to the main docling_processor.py script. This is thread-safe and only extracts once per process.

func IsEmbeddedScriptsAvailable ¶

func IsEmbeddedScriptsAvailable() bool

IsEmbeddedScriptsAvailable checks if the embedded Python scripts are available

func IsLLMConfigured ¶

func IsLLMConfigured() bool

IsLLMConfigured checks if the required environment variables are set

func ReadEmbeddedFile ¶

func ReadEmbeddedFile(path string) ([]byte, error)

ReadEmbeddedFile reads an embedded file and returns its content

Types ¶

type BatchProcessingRequest ¶

type BatchProcessingRequest struct {
	Sources        []string       `json:"sources"`                   // Multiple document sources
	ProcessingMode ProcessingMode `json:"processing_mode,omitempty"` // Processing mode for all documents
	OutputFormat   OutputFormat   `json:"output_format,omitempty"`   // Output format for all documents
	EnableOCR      bool           `json:"enable_ocr,omitempty"`      // Enable OCR for all documents
	OCRLanguages   []string       `json:"ocr_languages,omitempty"`   // OCR languages for all documents
	PreserveImages bool           `json:"preserve_images,omitempty"` // Extract images from all documents
	CacheEnabled   *bool          `json:"cache_enabled,omitempty"`   // Cache setting for all documents
	Timeout        *int           `json:"timeout,omitempty"`         // Timeout for each document
	MaxConcurrency int            `json:"max_concurrency,omitempty"` // Maximum concurrent processing
}

BatchProcessingRequest represents a request to process multiple documents

type BatchProcessingResponse ¶

type BatchProcessingResponse struct {
	Results   []DocumentProcessingResponse `json:"results"`    // Individual processing results
	Summary   BatchSummary                 `json:"summary"`    // Batch processing summary
	TotalTime time.Duration                `json:"total_time"` // Total processing time
	Timestamp time.Time                    `json:"timestamp"`  // Batch processing timestamp
}

BatchProcessingResponse represents the response from batch processing

type BatchSummary ¶

type BatchSummary struct {
	TotalDocuments  int `json:"total_documents"`  // Total number of documents
	SuccessfulCount int `json:"successful_count"` // Number of successfully processed documents
	FailedCount     int `json:"failed_count"`     // Number of failed documents
	CacheHitCount   int `json:"cache_hit_count"`  // Number of cache hits
	TotalPages      int `json:"total_pages"`      // Total pages processed
	TotalWords      int `json:"total_words"`      // Total words processed
	TotalImages     int `json:"total_images"`     // Total images extracted
	TotalTables     int `json:"total_tables"`     // Total tables extracted
}

BatchSummary provides summary statistics for batch processing

type BoundingBox ¶

type BoundingBox struct {
	X      float64 `json:"x"`      // X coordinate (left)
	Y      float64 `json:"y"`      // Y coordinate (top)
	Width  float64 `json:"width"`  // Width
	Height float64 `json:"height"` // Height
}

BoundingBox represents the position and size of an element on a page

type CacheManager ¶

type CacheManager struct {
	// contains filtered or unexported fields
}

CacheManager handles caching of document processing results

func NewCacheManager ¶

func NewCacheManager(config *Config) *CacheManager

NewCacheManager creates a new cache manager

func (*CacheManager) CleanExpired ¶

func (cm *CacheManager) CleanExpired() error

CleanExpired removes expired cache entries

func (*CacheManager) CleanOldFiles ¶

func (cm *CacheManager) CleanOldFiles(maxAge time.Duration) error

CleanOldFiles removes cache files older than the specified duration This is useful for cleaning up files that may not have proper TTL metadata

func (*CacheManager) Clear ¶

func (cm *CacheManager) Clear() error

Clear removes all cached results

func (*CacheManager) ClearFileCache ¶

func (cm *CacheManager) ClearFileCache(source string) error

ClearFileCache removes all cache entries for a specific source file

func (*CacheManager) Delete ¶

func (cm *CacheManager) Delete(cacheKey string) error

Delete removes a cached result

func (*CacheManager) GenerateCacheKey ¶

func (cm *CacheManager) GenerateCacheKey(req *DocumentProcessingRequest) string

GenerateCacheKey generates a cache key for the given request

func (*CacheManager) Get ¶

func (cm *CacheManager) Get(cacheKey string) (*DocumentProcessingResponse, bool)

Get retrieves a cached result if it exists and is valid

func (*CacheManager) GetCacheFilePath ¶

func (cm *CacheManager) GetCacheFilePath(cacheKey string) string

GetCacheFilePath returns the file path for a cache key

func (*CacheManager) GetStats ¶

func (cm *CacheManager) GetStats() (*CacheStats, error)

GetStats returns cache statistics

func (*CacheManager) PerformMaintenance ¶

func (cm *CacheManager) PerformMaintenance(maxAge time.Duration) error

PerformMaintenance performs routine cache maintenance including: - Removing expired entries - Removing old files (older than maxAge)

func (*CacheManager) Set ¶

func (cm *CacheManager) Set(cacheKey string, response *DocumentProcessingResponse) error

Set stores a result in the cache

type CacheStats ¶

type CacheStats struct {
	Enabled      bool   `json:"enabled"`
	Directory    string `json:"directory"`
	TotalFiles   int    `json:"total_files"`
	TotalSize    int64  `json:"total_size"`    // Size in bytes
	ExpiredFiles int    `json:"expired_files"` // Number of expired files
}

CacheStats provides statistics about the cache

type CachedResponse ¶

type CachedResponse struct {
	Response  DocumentProcessingResponse `json:"response"`
	CacheKey  string                     `json:"cache_key"`
	Timestamp time.Time                  `json:"timestamp"`
	TTL       time.Duration              `json:"ttl"` // Time to live
}

CachedResponse represents a cached document processing response

type Config ¶

type Config struct {
	// Python Configuration
	PythonPath string // Path to Python executable with Docling installed

	// Cache Configuration
	CacheDir     string // Directory for caching processed documents
	CacheEnabled bool   // Enable/disable caching

	// Hardware Configuration
	HardwareAcceleration HardwareAcceleration // Hardware acceleration mode

	// Processing Configuration
	Timeout        int   // Processing timeout in seconds
	MaxFileSize    int   // Maximum file size in MB
	MaxMemoryLimit int64 // Maximum memory limit in bytes

	// OCR Configuration
	OCRLanguages []string // Default OCR languages

	// Vision Model Configuration
	VisionModel string // Vision model to use

	// Certificate Configuration
	ExtraCACerts string // Path to additional CA certificates file or directory
}

Config holds the configuration for document processing

func DefaultConfig ¶

func DefaultConfig() *Config

DefaultConfig returns the default configuration

func LoadConfig ¶

func LoadConfig() *Config

LoadConfig loads configuration from environment variables

func (*Config) CleanupTemporaryFiles ¶ added in v0.21.2

func (c *Config) CleanupTemporaryFiles() error

CleanupTemporaryFiles performs cleanup of temporary files and directories

func (*Config) EnsureCacheDir ¶

func (c *Config) EnsureCacheDir() error

EnsureCacheDir creates the cache directory if it doesn't exist

func (*Config) GetCertificateEnvironment ¶

func (c *Config) GetCertificateEnvironment() []string

GetCertificateEnvironment returns environment variables for certificate configuration

func (*Config) GetMaxMemoryLimit ¶ added in v0.21.2

func (c *Config) GetMaxMemoryLimit() int64

GetMaxMemoryLimit returns the configured maximum memory limit in bytes

func (*Config) GetScriptPath ¶

func (c *Config) GetScriptPath() string

GetScriptPath returns the path to the Python wrapper script

func (*Config) GetSystemInfo ¶

func (c *Config) GetSystemInfo() *SystemInfo

GetSystemInfo returns system information for diagnostics

func (*Config) ResolveHardwareAcceleration ¶

func (c *Config) ResolveHardwareAcceleration() HardwareAcceleration

ResolveHardwareAcceleration resolves the hardware acceleration setting

func (*Config) Validate ¶

func (c *Config) Validate() error

Validate validates the configuration

func (*Config) ValidateCertificates ¶

func (c *Config) ValidateCertificates() error

ValidateCertificates validates the certificate configuration

func (*Config) ValidateFileSize ¶ added in v0.21.2

func (c *Config) ValidateFileSize(fileSizeBytes int64) error

ValidateFileSize validates that the file size is within limits

func (*Config) ValidateFileType ¶ added in v0.21.2

func (c *Config) ValidateFileType(filePath string) error

ValidateFileType validates that the file type is supported for processing

func (*Config) ValidateMemoryLimit ¶ added in v0.21.2

func (c *Config) ValidateMemoryLimit() error

ValidateMemoryLimit validates that memory usage is within limits

type DiagramAnalysis ¶

type DiagramAnalysis struct {
	Description    string           `json:"description"`
	DiagramType    string           `json:"diagram_type"`
	MermaidCode    string           `json:"mermaid_code"`
	Elements       []DiagramElement `json:"elements"`
	Confidence     float64          `json:"confidence"`
	Properties     map[string]any   `json:"properties"`
	ProcessingTime time.Duration    `json:"processing_time"`
	TokenUsage     *TokenUsage      `json:"token_usage,omitempty"` // Token usage from LLM provider (if available)
}

DiagramAnalysis represents the result of LLM-based diagram analysis

type DiagramElement ¶

type DiagramElement struct {
	Type        string       `json:"type"`                   // Element type (text, shape, connector, etc.)
	Content     string       `json:"content,omitempty"`      // Text content of the element
	Position    string       `json:"position,omitempty"`     // Position description within diagram
	BoundingBox *BoundingBox `json:"bounding_box,omitempty"` // Position within the diagram
}

DiagramElement represents a text or structural element within a diagram

type DiagramLLMClient ¶

type DiagramLLMClient struct {
	// contains filtered or unexported fields
}

DiagramLLMClient handles LLM-based diagram analysis using OpenAI API

func NewDiagramLLMClient ¶

func NewDiagramLLMClient() (*DiagramLLMClient, error)

NewDiagramLLMClient creates a new LLM client for diagram analysis using OpenAI API

func (*DiagramLLMClient) AnalyseDiagram ¶

func (c *DiagramLLMClient) AnalyseDiagram(diagram *ExtractedDiagram) (*DiagramAnalysis, error)

AnalyseDiagram performs LLM-based analysis of a diagram

type DocumentMetadata ¶

type DocumentMetadata struct {
	Title        string            `json:"title,omitempty"`         // Document title
	Author       string            `json:"author,omitempty"`        // Document author
	Subject      string            `json:"subject,omitempty"`       // Document subject
	Creator      string            `json:"creator,omitempty"`       // Document creator
	Producer     string            `json:"producer,omitempty"`      // Document producer
	CreationDate *time.Time        `json:"creation_date,omitempty"` // Creation date
	ModifiedDate *time.Time        `json:"modified_date,omitempty"` // Last modified date
	PageCount    int               `json:"page_count,omitempty"`    // Number of pages
	WordCount    int               `json:"word_count,omitempty"`    // Estimated word count
	Language     string            `json:"language,omitempty"`      // Detected language
	Format       string            `json:"format"`                  // Original document format
	FileSize     int64             `json:"file_size,omitempty"`     // File size in bytes
	Properties   map[string]string `json:"properties,omitempty"`    // Additional properties
}

DocumentMetadata contains metadata about the processed document

type DocumentProcessingRequest ¶

type DocumentProcessingRequest struct {
	Source                   string               `json:"source"`                                // File path, URL, or base64 content
	Profile                  ProcessingProfile    `json:"profile,omitempty"`                     // Processing profile (replaces multiple parameters)
	ProcessingMode           ProcessingMode       `json:"processing_mode,omitempty"`             // Processing mode (default: basic)
	OutputFormat             OutputFormat         `json:"output_format,omitempty"`               // Output format (default: markdown)
	EnableOCR                bool                 `json:"enable_ocr,omitempty"`                  // Enable OCR processing
	OCRLanguages             []string             `json:"ocr_languages,omitempty"`               // OCR language codes
	PreserveImages           bool                 `json:"preserve_images,omitempty"`             // Extract and preserve images
	Timeout                  *int                 `json:"timeout,omitempty"`                     // Processing timeout in seconds
	MaxFileSize              *int                 `json:"max_file_size,omitempty"`               // Maximum file size in MB
	ReturnInlineOnly         *bool                `json:"return_inline_only,omitempty"`          // Return content inline in the response only. When false (default), the tool will save the processed content to a file in the same directory as the source file, and also return the content inline.
	SaveTo                   string               `json:"save_to,omitempty"`                     // File path to save content when return_inline_only=false
	ClearFileCache           bool                 `json:"clear_file_cache,omitempty"`            // Force clear all cache entries for this source file before processing
	TableFormerMode          TableFormerMode      `json:"table_former_mode,omitempty"`           // TableFormer processing mode for table structure recognition
	CellMatching             *bool                `json:"cell_matching,omitempty"`               // Control table cell matching (true: use PDF cells, false: use predicted cells)
	VisionMode               VisionProcessingMode `json:"vision_mode,omitempty"`                 // Vision processing mode for enhanced document understanding
	DiagramDescription       bool                 `json:"diagram_description,omitempty"`         // Enable diagram and chart description using vision models
	ChartDataExtraction      bool                 `json:"chart_data_extraction,omitempty"`       // Enable data extraction from charts and graphs
	EnableRemoteServices     bool                 `json:"enable_remote_services,omitempty"`      // Allow communication with external vision model services
	ConvertDiagramsToMermaid bool                 `json:"convert_diagrams_to_mermaid,omitempty"` // Convert detected diagrams to Mermaid syntax using AI vision models
	GenerateDiagrams         bool                 `json:"generate_diagrams,omitempty"`           // Generate enhanced diagram analysis using external LLM (requires DOCLING_VLM_API_URL, DOCLING_VLM_MODEL, DOCLING_VLM_API_KEY environment variables)
	ExtractImages            bool                 `json:"extract_images,omitempty"`              // Extract individual images, charts, and diagrams as base64-encoded data with AI recreation prompts
	Debug                    bool                 `json:"debug,omitempty"`                       // Return debug information including environment variables (secrets masked)
}

DocumentProcessingRequest represents the input parameters for document processing

type DocumentProcessingResponse ¶

type DocumentProcessingResponse struct {
	Source         string             `json:"source"`             // Original source
	Content        string             `json:"content"`            // Processed content (markdown)
	Metadata       *DocumentMetadata  `json:"metadata,omitempty"` // Document metadata
	Images         []ExtractedImage   `json:"images,omitempty"`   // Extracted images
	Tables         []ExtractedTable   `json:"tables,omitempty"`   // Extracted tables
	Diagrams       []ExtractedDiagram `json:"diagrams,omitempty"` // Extracted diagrams
	ProcessingInfo ProcessingInfo     `json:"processing_info"`    // Processing information
	CacheHit       bool               `json:"cache_hit"`          // Whether result came from cache
	Error          string             `json:"error,omitempty"`    // Error message if processing failed
}

DocumentProcessingResponse represents the output from document processing

type DocumentProcessorTool ¶

type DocumentProcessorTool struct {
	// contains filtered or unexported fields
}

DocumentProcessorTool implements document processing using Docling via Python subprocess

func (*DocumentProcessorTool) Definition ¶

func (t *DocumentProcessorTool) Definition() mcp.Tool

Definition returns the MCP tool definition

func (*DocumentProcessorTool) Execute ¶

func (t *DocumentProcessorTool) Execute(ctx context.Context, logger *logrus.Logger, cache *sync.Map, args map[string]any) (*mcp.CallToolResult, error)

Execute processes the document using the Python wrapper

func (*DocumentProcessorTool) ProvideExtendedInfo ¶ added in v0.22.0

func (t *DocumentProcessorTool) ProvideExtendedInfo() *tools.ExtendedHelp

ProvideExtendedInfo provides detailed usage information for the document processing tool

type ErrorInfo ¶

type ErrorInfo struct {
	Code        string            `json:"code"`              // Error code
	Message     string            `json:"message"`           // Error message
	Details     string            `json:"details,omitempty"` // Additional error details
	Source      string            `json:"source,omitempty"`  // Source that caused the error
	Timestamp   time.Time         `json:"timestamp"`         // When the error occurred
	Context     map[string]string `json:"context,omitempty"` // Additional context
	Recoverable bool              `json:"recoverable"`       // Whether the error is recoverable
}

ErrorInfo represents detailed error information

type ExtractedDiagram ¶

type ExtractedDiagram struct {
	ID          string           `json:"id"`                     // Unique diagram identifier
	Type        string           `json:"type"`                   // Type of diagram (flowchart, chart, diagram, etc.)
	Caption     string           `json:"caption,omitempty"`      // Diagram caption if available
	Description string           `json:"description,omitempty"`  // Generated description of the diagram
	DiagramType string           `json:"diagram_type,omitempty"` // Classified diagram type (flowchart, chart, etc.)
	MermaidCode string           `json:"mermaid_code,omitempty"` // Generated Mermaid syntax for the diagram
	Base64Data  string           `json:"base64_data,omitempty"`  // Base64-encoded image data for LLM vision processing
	Elements    []DiagramElement `json:"elements,omitempty"`     // Text elements within the diagram
	PageNumber  int              `json:"page_number,omitempty"`  // Page number where diagram appears
	BoundingBox *BoundingBox     `json:"bounding_box,omitempty"` // Position on page
	Confidence  float64          `json:"confidence,omitempty"`   // Confidence score for diagram analysis
	Properties  map[string]any   `json:"properties,omitempty"`   // Additional diagram-specific properties
}

ExtractedDiagram represents a diagram extracted from the document

type ExtractedImage ¶

type ExtractedImage struct {
	ID            string       `json:"id"`                       // Unique image identifier
	Type          string       `json:"type"`                     // Type of image (picture, table, chart, diagram)
	Caption       string       `json:"caption,omitempty"`        // Image caption if available
	AltText       string       `json:"alt_text,omitempty"`       // Alternative text
	Format        string       `json:"format"`                   // Image format (PNG, JPEG, etc.)
	Width         int          `json:"width,omitempty"`          // Image width in pixels
	Height        int          `json:"height,omitempty"`         // Image height in pixels
	Size          int64        `json:"size,omitempty"`           // Image size in bytes
	FilePath      string       `json:"file_path,omitempty"`      // Path to saved image file
	PageNumber    int          `json:"page_number,omitempty"`    // Page number where image appears
	BoundingBox   *BoundingBox `json:"bounding_box,omitempty"`   // Position on page
	ExtractedText []string     `json:"extracted_text,omitempty"` // Text elements extracted from the image
}

ExtractedImage represents an image extracted from the document

type ExtractedTable ¶

type ExtractedTable struct {
	ID          string       `json:"id"`                     // Unique table identifier
	Caption     string       `json:"caption,omitempty"`      // Table caption if available
	Headers     []string     `json:"headers,omitempty"`      // Column headers
	Rows        [][]string   `json:"rows"`                   // Table data rows
	PageNumber  int          `json:"page_number,omitempty"`  // Page number where table appears
	BoundingBox *BoundingBox `json:"bounding_box,omitempty"` // Position on page
	Markdown    string       `json:"markdown,omitempty"`     // Markdown representation
	CSV         string       `json:"csv,omitempty"`          // CSV representation
}

ExtractedTable represents a table extracted from the document

type HardwareAcceleration ¶

type HardwareAcceleration string

HardwareAcceleration defines the hardware acceleration mode

const (
	HardwareAccelerationAuto HardwareAcceleration = "auto" // Auto-detect best option
	HardwareAccelerationMPS  HardwareAcceleration = "mps"  // Metal Performance Shaders (macOS)
	HardwareAccelerationCUDA HardwareAcceleration = "cuda" // CUDA (NVIDIA GPUs)
	HardwareAccelerationCPU  HardwareAcceleration = "cpu"  // CPU-only processing
)

type LLMConfig ¶

type LLMConfig struct {
	Provider string
	Model    string
	APIKey   string
	BaseURL  string
}

LLMConfig contains configuration for the LLM client

type OutputFormat ¶

type OutputFormat string

OutputFormat defines the output format for processed documents

const (
	OutputFormatMarkdown OutputFormat = "markdown" // Markdown output (default)
	OutputFormatJSON     OutputFormat = "json"     // JSON metadata
	OutputFormatBoth     OutputFormat = "both"     // Both markdown and JSON
)

type ProcessingInfo ¶

type ProcessingInfo struct {
	ProcessingMode       ProcessingMode       `json:"processing_mode"`           // Mode used for processing
	ProcessingMethod     string               `json:"processing_method"`         // Concise description of processing method used
	HardwareAcceleration HardwareAcceleration `json:"hardware_acceleration"`     // Hardware acceleration used
	VisionModel          string               `json:"vision_model,omitempty"`    // Vision model used (if any)
	OCREnabled           bool                 `json:"ocr_enabled"`               // Whether OCR was enabled
	OCRLanguages         []string             `json:"ocr_languages,omitempty"`   // OCR languages used
	ProcessingTime       float64              `json:"processing_time"`           // Time taken to process in seconds
	PythonVersion        string               `json:"python_version,omitempty"`  // Python version used
	DoclingVersion       string               `json:"docling_version,omitempty"` // Docling version used
	CacheKey             string               `json:"cache_key,omitempty"`       // Cache key used
	Timestamp            time.Time            `json:"timestamp"`                 // Processing timestamp
	TokenUsage           *TokenUsage          `json:"token_usage,omitempty"`     // Token usage from external LLM (if available)
}

ProcessingInfo contains information about the processing operation

type ProcessingMode ¶

type ProcessingMode string

ProcessingMode defines the type of document processing to perform

const (
	ProcessingModeBasic    ProcessingMode = "basic"    // Fast, code-only processing
	ProcessingModeAdvanced ProcessingMode = "advanced" // Vision model with layout preservation
	ProcessingModeOCR      ProcessingMode = "ocr"      // OCR for scanned documents
	ProcessingModeTables   ProcessingMode = "tables"   // Table extraction focus
	ProcessingModeImages   ProcessingMode = "images"   // Image extraction focus
)

type ProcessingProfile ¶

type ProcessingProfile string

ProcessingProfile defines preset configurations for common document processing scenarios

const (
	ProfileBasic          ProcessingProfile = "basic"           // Text extraction only (fast processing)
	ProfileTextAndImage   ProcessingProfile = "text-and-image"  // Text and image extraction with tables
	ProfileScanned        ProcessingProfile = "scanned"         // OCR-focused processing for scanned documents
	ProfileLLMSmolDocling ProcessingProfile = "llm-smoldocling" // Text and image extraction enhanced with SmolDocling vision model
	ProfileLLMExternal    ProcessingProfile = "llm-external"    // Text and image extraction enhanced with external vision LLM for diagram conversion to Mermaid
)

type SystemInfo ¶

type SystemInfo struct {
	Platform             string                 `json:"platform"`                        // Operating system
	Architecture         string                 `json:"architecture"`                    // CPU architecture
	PythonPath           string                 `json:"python_path,omitempty"`           // Path to Python executable
	PythonVersion        string                 `json:"python_version,omitempty"`        // Python version
	DoclingVersion       string                 `json:"docling_version,omitempty"`       // Docling version
	DoclingAvailable     bool                   `json:"docling_available"`               // Whether Docling is available
	HardwareAcceleration []HardwareAcceleration `json:"hardware_acceleration_available"` // Available acceleration options
	CacheDirectory       string                 `json:"cache_directory,omitempty"`       // Cache directory path
	CacheEnabled         bool                   `json:"cache_enabled"`                   // Whether caching is enabled
	MaxFileSize          int                    `json:"max_file_size"`                   // Maximum file size in MB
	MaxMemoryLimit       int64                  `json:"max_memory_limit"`                // Maximum memory limit in bytes
	DefaultTimeout       int                    `json:"default_timeout"`                 // Default timeout in seconds
}

SystemInfo represents system information for diagnostics

type TableFormerMode ¶

type TableFormerMode string

TableFormerMode defines the TableFormer processing mode for table structure recognition

const (
	TableFormerModeFast     TableFormerMode = "fast"     // Faster but less accurate table processing
	TableFormerModeAccurate TableFormerMode = "accurate" // More accurate but slower table processing (default)
)

type TokenUsage ¶

type TokenUsage struct {
	PromptTokens     int `json:"prompt_tokens,omitempty"`     // Tokens used in prompts
	CompletionTokens int `json:"completion_tokens,omitempty"` // Tokens used in completions
	TotalTokens      int `json:"total_tokens,omitempty"`      // Total tokens used
}

TokenUsage represents token consumption from external LLM providers

type VisionProcessingMode ¶

type VisionProcessingMode string

VisionProcessingMode defines the vision model processing mode for enhanced document understanding

const (
	VisionModeStandard    VisionProcessingMode = "standard"    // Standard vision processing
	VisionModeSmolDocling VisionProcessingMode = "smoldocling" // Compact vision-language model (256M parameters)
	VisionModeAdvanced    VisionProcessingMode = "advanced"    // Advanced vision processing with remote services
)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL