document

package
v1.1.7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 24, 2025 License: MIT Imports: 11 Imported by: 0

Documentation

Overview

Package document contains Document structs and Parsers prepare for RAG

Index

Constants

This section is empty.

Variables

View Source
var ErrReading = errors.New("document is reading")

Functions

This section is empty.

Types

type ClosableDocument

type ClosableDocument interface {
	Close() error
}

type Document

type Document struct {
	Meta map[string]string
	// contains filtered or unexported fields
}

Document is a document container with metadata

func (*Document) Reader

func (d *Document) Reader() *bytes.Reader

type File

type File struct {
	Document
	// contains filtered or unexported fields
}

func NewFile

func NewFile(fname string) (*File, error)

func (*File) Close

func (d *File) Close() error

func (*File) Read

func (d *File) Read() (chan<- []byte, error)

func (*File) ReadAll

func (d *File) ReadAll() error

func (*File) ReadStatus

func (d *File) ReadStatus() ReadStatus

type HTML2MDParser

type HTML2MDParser struct {
	// contains filtered or unexported fields
}

HTML2MDParser is a parser which parse html content to markdown

func NewHTML2MDParser

func NewHTML2MDParser(opts ...converter.ConvertOptionFunc) *HTML2MDParser

func (*HTML2MDParser) Parse

func (h *HTML2MDParser) Parse(ctx context.Context, reader *bytes.Reader, writer io.Writer) error

Parse try to parse a html content from a bytes.Reader into a markdown content then write to an io.Writer

type Http

type Http struct {
	Document
	// contains filtered or unexported fields
}

func NewHttp

func NewHttp(opts ...HttpOption) (*Http, error)

func (*Http) Read

func (h *Http) Read() (chan<- []byte, error)

func (*Http) ReadAll

func (h *Http) ReadAll() error

func (*Http) ReadStatus

func (h *Http) ReadStatus() ReadStatus

type HttpConfig

type HttpConfig struct {
	// contains filtered or unexported fields
}

type HttpOption

type HttpOption func(*HttpConfig)

func WithHttpClient

func WithHttpClient(client *http.Client) HttpOption

func WithHttpMethod

func WithHttpMethod(method string) HttpOption

func WithHttpURL

func WithHttpURL(link string) HttpOption

func WithPayload

func WithPayload(payload io.Reader) HttpOption

type PDFParser

type PDFParser struct {
	// contains filtered or unexported fields
}

PDFParser is a parser which parse PDF content to text

func NewPDFParser

func NewPDFParser(opts ...PDFParserOption) *PDFParser

func (*PDFParser) Parse

func (p *PDFParser) Parse(ctx context.Context, reader *bytes.Reader, writer io.Writer) error

Parse try to parse a pdf content from a bytes.Reader and write to an io.Writer

type PDFParserOption

type PDFParserOption func(*PDFParser)

func PDFParserWithPassword

func PDFParserWithPassword(password string) PDFParserOption

type Parser

type Parser interface {
	Parse(context.Context, *bytes.Reader, io.Writer) error
}

type ReadStatus

type ReadStatus = int32
const (
	Unread ReadStatus = iota
	Reading
	ReadCompleted
)

type ReadableDocument

type ReadableDocument interface {
	ReadAll() error
	Read() (chan<- []byte, error)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL