tools

package
v0.7.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2025 License: Apache-2.0 Imports: 24 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Insecure bool

Functions

func BuildFilename

func BuildFilename(title string, dir string, id string) string

BuildFilename build md filename clean from special characters

func CheckError

func CheckError(err error)

CheckError display error on screen

func DescribeImg

func DescribeImg(img string, lang string) (string, error)

DescribeImg describe an image with Ollama API

func DisplayOnScreen

func DisplayOnScreen(exportedPages []Page)

DisplayOnScreen display pages on screen as text table

func ExtractTextFromPDF

func ExtractTextFromPDF(pdfPath string) (string, error)

ExtractTextFromPDF extract text from a PDF. @todo: rewrite with https://github.com/ledongthuc/pdf lib. The current lib is not maintained anymore.

func GetImgList

func GetImgList(content *goquery.Document, ispath string, scheme string, domain string) ([]string, error)

GetImgList get all images from a web page and return a list of image url

func InitLogger

func InitLogger(verbose bool) error

func RemoveAccents

func RemoveAccents(s string) string

RemoveAccents remove accents from a string

func RemoveSpecialChars

func RemoveSpecialChars(s string) string

func ReplaceAllMultipleChars

func ReplaceAllMultipleChars(s string) string

ReplaceAllMultipleChars replace all multiple consecutive characters by a single character for example "aa---bb__cc==dd" becomes "a-b_c=d"

func WriteMarkdownToFile

func WriteMarkdownToFile(markdown, outputPath string) error

WriteMarkdownToFile writes markdown content to a file.

Types

type Metadata

type Metadata struct {
	Title            string   `json:"title"`
	Doc_id           string   `json:"doc_id"`
	Description      string   `json:"description"`
	Site_url         string   `json:"site_url"`
	Authors          []string `json:"authors"`
	Creation_date    string   `json:"creation_date"`
	Last_update_date string   `json:"last_update_date"`
	Visibility       string   `json:"visibility"`
	Tags             []string `json:"tags"`
	PageId           string   `json:"page_id"`
}

func BuildFileMetadata

func BuildFileMetadata(docpath string, url string, prefix string, meta Metadata, complement Metadata) (string, Metadata)

BuildFileMetadata build metadata for a docs or pdf file.

func BuildMetadata

func BuildMetadata(content *goquery.Document, url string, prefix string, complement Metadata) (string, Metadata)

BuildMetadata build metadata for a page as markdown header

func ReadPages

func ReadPages(filename string) ([]Metadata, error)

ReadPages read pages list from json file and return a list of Metadata

type Page

type Page struct {
	PageId string
	MdFile string
	Title  string
	Url    string
}

func GetPDF

func GetPDF(pdfPath string, url string, customerId string, exportDir string, complements Metadata) (Page, error)

func GetPage

func GetPage(url string, customerId string, exportDir string, complements Metadata, domain string, ia bool) (Page, error)

GetPage get a web page by it url and return a Page struct

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL