ocr

package
v1.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 21, 2026 License: MIT Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// TessdataURL is the base URL for downloading tessdata files.
	TessdataURL = "https://github.com/tesseract-ocr/tessdata_fast/raw/main"
)

Variables

View Source
var ErrNativeNotFound = errors.New("native Tesseract not found")

ErrNativeNotFound is returned when native Tesseract is not installed.

Functions

This section is empty.

Types

type Backend

type Backend interface {
	Name() string
	Available() bool
	ProcessImage(ctx context.Context, imagePath, lang string) (string, error)
	Close() error
}

Backend defines the interface for OCR backends.

type BackendType

type BackendType int

BackendType represents the type of OCR backend to use.

const (
	BackendAuto   BackendType = iota // Auto-select best available backend
	BackendNative                    // System-installed Tesseract
	BackendWASM                      // WASM-based Tesseract (gogosseract)
)

func ParseBackendType

func ParseBackendType(s string) BackendType

ParseBackendType converts a string to BackendType.

func (BackendType) String

func (b BackendType) String() string

type Engine

type Engine struct {
	// contains filtered or unexported fields
}

Engine provides OCR capabilities with configurable backend.

func NewEngine

func NewEngine(lang string) (*Engine, error)

NewEngine creates a new OCR engine with auto backend selection.

func NewEngineWithOptions

func NewEngineWithOptions(opts EngineOptions) (*Engine, error)

NewEngineWithOptions creates a new OCR engine with specified options.

func (*Engine) BackendName

func (e *Engine) BackendName() string

BackendName returns the name of the currently active backend.

func (*Engine) Close

func (e *Engine) Close() error

Close releases resources held by the engine.

func (*Engine) EnsureTessdata

func (e *Engine) EnsureTessdata() error

EnsureTessdata ensures the tessdata file for the language exists.

func (*Engine) ExtractTextFromPDF

func (e *Engine) ExtractTextFromPDF(pdfPath string, pages []int, password string, showProgress bool) (string, error)

ExtractTextFromPDF extracts text from a PDF using OCR.

type EngineOptions

type EngineOptions struct {
	Lang        string
	DataDir     string
	BackendType BackendType
}

EngineOptions contains options for creating an OCR engine.

type NativeBackend

type NativeBackend struct {
	// contains filtered or unexported fields
}

NativeBackend implements Backend using system-installed Tesseract.

func NewNativeBackend

func NewNativeBackend(lang, dataDir string) (*NativeBackend, error)

NewNativeBackend creates a new native Tesseract backend.

func (*NativeBackend) Available

func (n *NativeBackend) Available() bool

func (*NativeBackend) Close

func (n *NativeBackend) Close() error

func (*NativeBackend) HasLanguage

func (n *NativeBackend) HasLanguage(lang string) bool

HasLanguage checks if a language is available in the system tessdata.

func (*NativeBackend) Name

func (n *NativeBackend) Name() string

func (*NativeBackend) ProcessImage

func (n *NativeBackend) ProcessImage(ctx context.Context, imagePath, lang string) (string, error)

func (*NativeBackend) Version

func (n *NativeBackend) Version() string

Version returns the version of the native Tesseract installation.

type NativeInfo

type NativeInfo struct {
	Path     string
	Version  string
	Tessdata string
}

NativeInfo contains information about the native Tesseract installation.

func DetectNativeTesseract

func DetectNativeTesseract() (*NativeInfo, error)

DetectNativeTesseract checks if native Tesseract is installed and returns its info.

type WASMBackend

type WASMBackend struct {
	// contains filtered or unexported fields
}

WASMBackend implements Backend using gogosseract (WASM-based Tesseract).

func NewWASMBackend

func NewWASMBackend(lang, dataDir string) (*WASMBackend, error)

NewWASMBackend creates a new WASM-based Tesseract backend.

func (*WASMBackend) Available

func (w *WASMBackend) Available() bool

func (*WASMBackend) Close

func (w *WASMBackend) Close() error

func (*WASMBackend) EnsureTessdata

func (w *WASMBackend) EnsureTessdata(lang string) error

EnsureTessdata ensures the tessdata file for the language exists.

func (*WASMBackend) Name

func (w *WASMBackend) Name() string

func (*WASMBackend) ProcessImage

func (w *WASMBackend) ProcessImage(ctx context.Context, imagePath, lang string) (string, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL