database

package
v0.20.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 16, 2025 License: MIT Imports: 28 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var Logger *slog.Logger

Logger is global since we will need it everywhere

Functions

func CalculateUUID

func CalculateUUID(time time.Time) (ulid.ULID, error)

CalculateUUID for the incoming file

func DeleteDocument

func DeleteDocument(docULIDSt string, db Repository) error

DeleteDocument fetches the requested document by ULID

func FetchAllDocuments

func FetchAllDocuments(db Repository) (*[]Document, error)

FetchAllDocuments fetches all the documents in the database

func FetchConfigFromDB

func FetchConfigFromDB(db Repository) (config.ServerConfig, error)

FetchConfigFromDB pulls the server config from the database

func UpdateDocumentField

func UpdateDocumentField(docULIDSt string, field string, newValue interface{}, db Repository) (int, error)

UpdateDocumentField updates a single field in a document

func WriteConfigToDB

func WriteConfigToDB(serverConfig config.ServerConfig, db Repository)

WriteConfigToDB writes the serverconfig to the database for later retrieval

Types

type BunDB

type BunDB struct {
	// contains filtered or unexported fields
}

BunDB implements Repository using Bun ORM

func NewRepository

func NewRepository(config config.ServerConfig) *BunDB

NewRepository initializes the database based on configuration

func (*BunDB) Close

func (b *BunDB) Close() error

Close closes the database connection and stops embedded server if running

func (*BunDB) CompleteJob

func (b *BunDB) CompleteJob(jobID ulid.ULID, result string) error

CompleteJob marks a job as completed with optional result data

func (*BunDB) CreateJob

func (b *BunDB) CreateJob(jobType JobType, message string) (*Job, error)

Job tracking methods CreateJob creates a new job in the database

func (*BunDB) DeleteDocument

func (b *BunDB) DeleteDocument(ulidStr string) error

DeleteDocument deletes a document by ULID

func (*BunDB) DeleteOldJobs

func (b *BunDB) DeleteOldJobs(olderThan time.Duration) (int, error)

DeleteOldJobs deletes completed jobs older than the specified duration

func (*BunDB) GetActiveJobs

func (b *BunDB) GetActiveJobs() ([]Job, error)

GetActiveJobs retrieves all running or pending jobs

func (*BunDB) GetAllDocuments

func (b *BunDB) GetAllDocuments() ([]Document, error)

GetAllDocuments retrieves all documents

func (*BunDB) GetConfig

func (b *BunDB) GetConfig() (*config.ServerConfig, error)

GetConfig retrieves server configuration

func (*BunDB) GetDocumentByHash

func (b *BunDB) GetDocumentByHash(hash string) (*Document, error)

GetDocumentByHash retrieves a document by hash

func (*BunDB) GetDocumentByID

func (b *BunDB) GetDocumentByID(id int) (*Document, error)

GetDocumentByID retrieves a document by ID

func (*BunDB) GetDocumentByPath

func (b *BunDB) GetDocumentByPath(path string) (*Document, error)

GetDocumentByPath retrieves a document by file path

func (*BunDB) GetDocumentByULID

func (b *BunDB) GetDocumentByULID(ulidStr string) (*Document, error)

GetDocumentByULID retrieves a document by ULID

func (*BunDB) GetDocumentsByFolder

func (b *BunDB) GetDocumentsByFolder(folder string) ([]Document, error)

GetDocumentsByFolder retrieves documents in a specific folder

func (*BunDB) GetJob

func (b *BunDB) GetJob(jobID ulid.ULID) (*Job, error)

GetJob retrieves a job by ID

func (*BunDB) GetNewestDocuments

func (b *BunDB) GetNewestDocuments(limit int) ([]Document, error)

GetNewestDocuments retrieves the newest documents

func (*BunDB) GetNewestDocumentsWithPagination

func (b *BunDB) GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)

GetNewestDocumentsWithPagination retrieves documents with pagination support

func (*BunDB) GetRecentJobs

func (b *BunDB) GetRecentJobs(limit, offset int) ([]Job, error)

GetRecentJobs retrieves the most recent jobs with pagination

func (*BunDB) GetTopWords

func (b *BunDB) GetTopWords(limit int) ([]WordFrequency, error)

Word cloud methods GetTopWords retrieves the top N most frequent words

func (*BunDB) GetWordCloudMetadata

func (b *BunDB) GetWordCloudMetadata() (*WordCloudMetadata, error)

GetWordCloudMetadata retrieves metadata about the word cloud

func (*BunDB) RecalculateAllWordFrequencies

func (b *BunDB) RecalculateAllWordFrequencies() error

RecalculateAllWordFrequencies performs a full recalculation of word frequencies

func (*BunDB) ReindexSearchDocuments

func (b *BunDB) ReindexSearchDocuments() (int, error)

ReindexSearchDocuments reindexes all documents to populate the full_text_search column

func (*BunDB) SaveConfig

func (b *BunDB) SaveConfig(cfg *config.ServerConfig) error

SaveConfig saves server configuration

func (*BunDB) SaveDocument

func (b *BunDB) SaveDocument(doc *Document) error

SaveDocument saves or updates a document

func (*BunDB) SearchDocuments

func (b *BunDB) SearchDocuments(searchTerm string) ([]Document, error)

SearchDocuments performs full-text search

func (*BunDB) UpdateDocumentFolder

func (b *BunDB) UpdateDocumentFolder(ulidStr string, folder string) error

UpdateDocumentFolder updates the Folder field of a document

func (*BunDB) UpdateDocumentURL

func (b *BunDB) UpdateDocumentURL(ulidStr string, url string) error

UpdateDocumentURL updates the URL field of a document

func (*BunDB) UpdateJobError

func (b *BunDB) UpdateJobError(jobID ulid.ULID, errorMsg string) error

UpdateJobError updates a job with an error

func (*BunDB) UpdateJobProgress

func (b *BunDB) UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error

UpdateJobProgress updates the progress of a job

func (*BunDB) UpdateJobStatus

func (b *BunDB) UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error

UpdateJobStatus updates the status of a job

func (*BunDB) UpdateWordFrequencies

func (b *BunDB) UpdateWordFrequencies(docID string) error

UpdateWordFrequencies updates word frequencies after document ingestion

type BunDocument

type BunDocument struct {
	bun.BaseModel `bun:"table:documents,alias:d"`

	ID             int       `bun:"id,pk,autoincrement"`
	Name           string    `bun:"name,notnull"`
	Path           string    `bun:"path,notnull,unique"`
	IngressTime    time.Time `bun:"ingress_time,notnull,default:current_timestamp"`
	Folder         string    `bun:"folder,notnull"`
	Hash           string    `bun:"hash,notnull"`
	ULID           string    `bun:"ulid,notnull,unique"` // Stored as string in DB
	DocumentType   string    `bun:"document_type,notnull"`
	FullText       string    `bun:"full_text,nullzero"`
	URL            string    `bun:"url,nullzero"`
	FullTextSearch string    `bun:"full_text_search,type:tsvector,nullzero"` // PostgreSQL-specific
	CreatedAt      time.Time `bun:"created_at,notnull,default:current_timestamp"`
	UpdatedAt      time.Time `bun:"updated_at,notnull,default:current_timestamp"`
}

BunDocument represents the documents table for Bun ORM

func FromDocument

func FromDocument(doc *Document) *BunDocument

FromDocument converts Document to BunDocument

func (*BunDocument) ToDocument

func (bd *BunDocument) ToDocument() (*Document, error)

ToDocument converts BunDocument to Document

type BunJob

type BunJob struct {
	bun.BaseModel `bun:"table:jobs,alias:j"`

	ID          string     `bun:"id,pk"` // ULID as string
	Type        string     `bun:"type,notnull"`
	Status      string     `bun:"status,default:'pending'"`
	Progress    int        `bun:"progress,default:0"`
	CurrentStep string     `bun:"current_step,default:''"`
	TotalSteps  int        `bun:"total_steps,default:0"`
	Message     string     `bun:"message,default:''"`
	Error       string     `bun:"error,nullzero"`
	Result      string     `bun:"result,nullzero"`
	CreatedAt   time.Time  `bun:"created_at,notnull,default:current_timestamp"`
	UpdatedAt   time.Time  `bun:"updated_at,notnull,default:current_timestamp"`
	StartedAt   *time.Time `bun:"started_at,nullzero"`
	CompletedAt *time.Time `bun:"completed_at,nullzero"`
}

BunJob represents the jobs table for Bun ORM

func FromJob

func FromJob(job *Job) *BunJob

FromJob converts Job to BunJob

func (*BunJob) ToJob

func (bj *BunJob) ToJob() (*Job, error)

ToJob converts BunJob to Job

type BunServerConfig

type BunServerConfig struct {
	bun.BaseModel `bun:"table:server_config,alias:sc"`

	ID                   int       `bun:"id,pk"`
	ListenAddrIP         string    `bun:"listen_addr_ip,default:''"`
	ListenAddrPort       string    `bun:"listen_addr_port,notnull,default:'8000'"`
	IngressPath          string    `bun:"ingress_path,notnull,default:''"`
	IngressDelete        bool      `bun:"ingress_delete,notnull,default:false"`
	IngressMoveFolder    string    `bun:"ingress_move_folder,notnull,default:''"`
	IngressPreserve      bool      `bun:"ingress_preserve,notnull,default:true"`
	DocumentPath         string    `bun:"document_path,notnull,default:''"`
	NewDocumentFolder    string    `bun:"new_document_folder,default:''"`
	NewDocumentFolderRel string    `bun:"new_document_folder_rel,default:''"`
	WebUIPass            bool      `bun:"web_ui_pass,notnull,default:false"`
	ClientUsername       string    `bun:"client_username,default:''"`
	ClientPassword       string    `bun:"client_password,default:''"`
	PushBulletToken      string    `bun:"pushbullet_token,default:''"`
	TesseractPath        string    `bun:"tesseract_path,default:''"`
	UseReverseProxy      bool      `bun:"use_reverse_proxy,notnull,default:false"`
	BaseURL              string    `bun:"base_url,default:''"`
	IngressInterval      int       `bun:"ingress_interval,notnull,default:10"`
	NewDocumentNumber    int       `bun:"new_document_number,notnull,default:5"`
	ServerAPIURL         string    `bun:"server_api_url,default:''"`
	CreatedAt            time.Time `bun:"created_at,notnull,default:current_timestamp"`
	UpdatedAt            time.Time `bun:"updated_at,notnull,default:current_timestamp"`
}

BunServerConfig represents the server_config table for Bun ORM

type BunWordCloudMetadata

type BunWordCloudMetadata struct {
	bun.BaseModel `bun:"table:word_cloud_metadata,alias:wcm"`

	ID                  int        `bun:"id,pk"`
	LastFullCalculation *time.Time `bun:"last_full_calculation,nullzero"`
	TotalDocsProcessed  int        `bun:"total_documents_processed,default:0"`
	TotalWordsIndexed   int        `bun:"total_words_indexed,default:0"`
	Version             int        `bun:"version,default:1"`
	CreatedAt           time.Time  `bun:"created_at,notnull,default:current_timestamp"`
	UpdatedAt           time.Time  `bun:"updated_at,notnull,default:current_timestamp"`
}

BunWordCloudMetadata represents the word_cloud_metadata table for Bun ORM

func (*BunWordCloudMetadata) ToWordCloudMetadata

func (bwcm *BunWordCloudMetadata) ToWordCloudMetadata() *WordCloudMetadata

ToWordCloudMetadata converts BunWordCloudMetadata to WordCloudMetadata

type BunWordFrequency

type BunWordFrequency struct {
	bun.BaseModel `bun:"table:word_frequencies,alias:wf"`

	Word        string    `bun:"word,pk"`
	Frequency   int       `bun:"frequency,default:1"`
	LastUpdated time.Time `bun:"last_updated,default:current_timestamp"`
}

BunWordFrequency represents the word_frequencies table for Bun ORM

func (*BunWordFrequency) ToWordFrequency

func (bwf *BunWordFrequency) ToWordFrequency() *WordFrequency

ToWordFrequency converts BunWordFrequency to WordFrequency

type Document

type Document struct {
	StormID      int // ID field (kept as StormID for backward compatibility)
	Name         string
	Path         string // full path to the file
	IngressTime  time.Time
	Folder       string
	Hash         string
	ULID         ulid.ULID // Have a smaller (than hash) id that can be used in URL's, hopefully speed things up
	DocumentType string    // type of document (pdf, txt, etc)
	FullText     string
	URL          string
}

Document is all of the document information stored in the database

func AddNewDocument

func AddNewDocument(filePath string, fullText string, db Repository) (*Document, error)

AddNewDocument adds a new document to the database

func FetchDocument

func FetchDocument(docULIDSt string, db Repository) (Document, int, error)

FetchDocument fetches the requested document by ULID

func FetchDocumentFromPath

func FetchDocumentFromPath(path string, db Repository) (Document, error)

FetchDocumentFromPath fetches the document by document path

func FetchDocuments

func FetchDocuments(docULIDSt []string, db Repository) ([]Document, int, error)

FetchDocuments fetches an array of documents // TODO: Not fucking needed?

func FetchFolder

func FetchFolder(folderName string, db Repository) ([]Document, error)

FetchFolder grabs all of the documents contained in a folder

func FetchNewestDocuments

func FetchNewestDocuments(numberOf int, db Repository) ([]Document, error)

FetchNewestDocuments fetches the documents that were added last

type EphemeralPostgresDB

type EphemeralPostgresDB struct {
	*PostgresDB
	// contains filtered or unexported fields
}

EphemeralPostgresDB implements Repository using ephemeral PostgreSQL

func SetupEphemeralPostgresDatabase

func SetupEphemeralPostgresDatabase() (*EphemeralPostgresDB, error)

SetupEphemeralPostgresDatabase creates an ephemeral PostgreSQL instance

func (*EphemeralPostgresDB) Close

func (e *EphemeralPostgresDB) Close() error

Close closes the database connection and cleans up the ephemeral server

type Job

type Job struct {
	ID          ulid.ULID  `json:"id"`
	Type        JobType    `json:"type"`
	Status      JobStatus  `json:"status"`
	Progress    int        `json:"progress"`         // 0-100
	CurrentStep string     `json:"currentStep"`      // Human-readable current step
	TotalSteps  int        `json:"totalSteps"`       // Total number of steps
	Message     string     `json:"message"`          // Status message
	Error       string     `json:"error,omitempty"`  // Error message if failed
	Result      string     `json:"result,omitempty"` // JSON result data
	CreatedAt   time.Time  `json:"createdAt"`
	UpdatedAt   time.Time  `json:"updatedAt"`
	StartedAt   *time.Time `json:"startedAt,omitempty"`
	CompletedAt *time.Time `json:"completedAt,omitempty"`
}

Job represents a background job or operation

type JobStatus

type JobStatus string

JobStatus represents the status of a job

const (
	JobStatusPending   JobStatus = "pending"
	JobStatusRunning   JobStatus = "running"
	JobStatusCompleted JobStatus = "completed"
	JobStatusFailed    JobStatus = "failed"
	JobStatusCancelled JobStatus = "cancelled"
)

type JobSummary

type JobSummary struct {
	FilesProcessed int    `json:"filesProcessed"`
	FilesTotal     int    `json:"filesTotal"`
	BytesProcessed int64  `json:"bytesProcessed"`
	Errors         int    `json:"errors"`
	Details        string `json:"details,omitempty"`
}

JobSummary provides summary statistics for a job

type JobType

type JobType string

JobType represents the type of job

const (
	JobTypeIngestion     JobType = "ingestion"
	JobTypeCleanup       JobType = "cleanup"
	JobTypeWordCloud     JobType = "wordcloud"
	JobTypeSearchReindex JobType = "search_reindex"
)

type PostgresDB

type PostgresDB struct {
	// contains filtered or unexported fields
}

PostgresDB implements Repository for PostgreSQL

func SetupPostgresDatabase

func SetupPostgresDatabase(connectionString string) (*PostgresDB, error)

SetupPostgresDatabase initializes PostgreSQL database with migrations If connectionString is empty, it will use ephemeral PostgreSQL

func (*PostgresDB) Close

func (p *PostgresDB) Close() error

Close closes the database connection and stops embedded server if running

func (*PostgresDB) CompleteJob

func (p *PostgresDB) CompleteJob(jobID ulid.ULID, result string) error

CompleteJob marks a job as completed with optional result data

func (*PostgresDB) CreateJob

func (p *PostgresDB) CreateJob(jobType JobType, message string) (*Job, error)

CreateJob creates a new job in the database

func (*PostgresDB) DeleteDocument

func (p *PostgresDB) DeleteDocument(ulidStr string) error

DeleteDocument deletes a document by ULID

func (*PostgresDB) DeleteOldJobs

func (p *PostgresDB) DeleteOldJobs(olderThan time.Duration) (int, error)

DeleteOldJobs deletes completed jobs older than the specified duration

func (*PostgresDB) GetActiveJobs

func (p *PostgresDB) GetActiveJobs() ([]Job, error)

GetActiveJobs retrieves all running or pending jobs

func (*PostgresDB) GetAllDocuments

func (p *PostgresDB) GetAllDocuments() ([]Document, error)

GetAllDocuments retrieves all documents

func (*PostgresDB) GetConfig

func (p *PostgresDB) GetConfig() (*config.ServerConfig, error)

GetConfig retrieves server configuration

func (*PostgresDB) GetDocumentByHash

func (p *PostgresDB) GetDocumentByHash(hash string) (*Document, error)

GetDocumentByHash retrieves a document by hash

func (*PostgresDB) GetDocumentByID

func (p *PostgresDB) GetDocumentByID(id int) (*Document, error)

GetDocumentByID retrieves a document by ID

func (*PostgresDB) GetDocumentByPath

func (p *PostgresDB) GetDocumentByPath(path string) (*Document, error)

GetDocumentByPath retrieves a document by file path

func (*PostgresDB) GetDocumentByULID

func (p *PostgresDB) GetDocumentByULID(ulidStr string) (*Document, error)

GetDocumentByULID retrieves a document by ULID

func (*PostgresDB) GetDocumentsByFolder

func (p *PostgresDB) GetDocumentsByFolder(folder string) ([]Document, error)

GetDocumentsByFolder retrieves documents in a specific folder

func (*PostgresDB) GetJob

func (p *PostgresDB) GetJob(jobID ulid.ULID) (*Job, error)

GetJob retrieves a job by ID

func (*PostgresDB) GetNewestDocuments

func (p *PostgresDB) GetNewestDocuments(limit int) ([]Document, error)

GetNewestDocuments retrieves the newest documents

func (*PostgresDB) GetNewestDocumentsWithPagination

func (p *PostgresDB) GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)

GetNewestDocumentsWithPagination retrieves documents with pagination support

func (*PostgresDB) GetRecentJobs

func (p *PostgresDB) GetRecentJobs(limit, offset int) ([]Job, error)

GetRecentJobs retrieves the most recent jobs with pagination

func (*PostgresDB) GetTopWords

func (p *PostgresDB) GetTopWords(limit int) ([]WordFrequency, error)

GetTopWords retrieves the top N most frequent words

func (*PostgresDB) GetWordCloudMetadata

func (p *PostgresDB) GetWordCloudMetadata() (*WordCloudMetadata, error)

GetWordCloudMetadata retrieves metadata about the word cloud

func (*PostgresDB) RecalculateAllWordFrequencies

func (p *PostgresDB) RecalculateAllWordFrequencies() error

RecalculateAllWordFrequencies performs a full recalculation of word frequencies This should be called during database cleaning or on-demand

func (*PostgresDB) ReindexSearchDocuments

func (p *PostgresDB) ReindexSearchDocuments() (int, error)

ReindexSearchDocuments reindexes all documents to populate the full_text_search column Returns the number of documents reindexed

func (*PostgresDB) SaveConfig

func (p *PostgresDB) SaveConfig(cfg *config.ServerConfig) error

SaveConfig saves server configuration

func (*PostgresDB) SaveDocument

func (p *PostgresDB) SaveDocument(doc *Document) error

SaveDocument saves or updates a document

func (*PostgresDB) SearchDocuments

func (p *PostgresDB) SearchDocuments(searchTerm string) ([]Document, error)

SearchDocuments performs full-text search using PostgreSQL's native search capabilities Supports both prefix matching and phrase search

func (*PostgresDB) UpdateDocumentFolder

func (p *PostgresDB) UpdateDocumentFolder(ulidStr string, folder string) error

UpdateDocumentFolder updates the Folder field of a document

func (*PostgresDB) UpdateDocumentURL

func (p *PostgresDB) UpdateDocumentURL(ulidStr string, url string) error

UpdateDocumentURL updates the URL field of a document

func (*PostgresDB) UpdateJobError

func (p *PostgresDB) UpdateJobError(jobID ulid.ULID, errorMsg string) error

UpdateJobError updates a job with an error

func (*PostgresDB) UpdateJobProgress

func (p *PostgresDB) UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error

UpdateJobProgress updates the progress of a job

func (*PostgresDB) UpdateJobStatus

func (p *PostgresDB) UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error

UpdateJobStatus updates the status of a job

func (*PostgresDB) UpdateWordFrequencies

func (p *PostgresDB) UpdateWordFrequencies(docID string) error

UpdateWordFrequencies updates word frequencies after document ingestion This should be called incrementally as documents are added

type Repository

type Repository interface {
	Close() error
	SaveDocument(doc *Document) error
	GetDocumentByID(id int) (*Document, error)
	GetDocumentByULID(ulid string) (*Document, error)
	GetDocumentByPath(path string) (*Document, error)
	GetDocumentByHash(hash string) (*Document, error)
	GetNewestDocuments(limit int) ([]Document, error)
	GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)
	GetAllDocuments() ([]Document, error)
	GetDocumentsByFolder(folder string) ([]Document, error)
	DeleteDocument(ulid string) error
	UpdateDocumentURL(ulid string, url string) error
	UpdateDocumentFolder(ulid string, folder string) error
	SaveConfig(config *config.ServerConfig) error
	GetConfig() (*config.ServerConfig, error)
	SearchDocuments(searchTerm string) ([]Document, error)
	ReindexSearchDocuments() (int, error)
	// Word cloud methods
	GetTopWords(limit int) ([]WordFrequency, error)
	GetWordCloudMetadata() (*WordCloudMetadata, error)
	RecalculateAllWordFrequencies() error
	UpdateWordFrequencies(docID string) error
	// Job tracking methods
	CreateJob(jobType JobType, message string) (*Job, error)
	UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error
	UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error
	UpdateJobError(jobID ulid.ULID, errorMsg string) error
	CompleteJob(jobID ulid.ULID, result string) error
	GetJob(jobID ulid.ULID) (*Job, error)
	GetRecentJobs(limit, offset int) ([]Job, error)
	GetActiveJobs() ([]Job, error)
	DeleteOldJobs(olderThan time.Duration) (int, error)
}

Repository defines database operations

type WordCloudMetadata

type WordCloudMetadata struct {
	LastCalculation    time.Time `json:"lastCalculation"`
	TotalDocsProcessed int       `json:"totalDocsProcessed"`
	TotalWordsIndexed  int       `json:"totalWordsIndexed"`
	Version            int       `json:"version"`
}

WordCloudMetadata tracks word cloud calculation status

type WordFrequency

type WordFrequency struct {
	Word      string    `json:"word"`
	Frequency int       `json:"frequency"`
	Updated   time.Time `json:"updated"`
}

WordFrequency represents a word and its frequency count

type WordTokenizer

type WordTokenizer struct {
	// contains filtered or unexported fields
}

WordTokenizer handles text processing for word cloud

func NewWordTokenizer

func NewWordTokenizer() *WordTokenizer

NewWordTokenizer creates a new word tokenizer

func (*WordTokenizer) TokenizeAndCount

func (wt *WordTokenizer) TokenizeAndCount(text string) map[string]int

TokenizeAndCount extracts words from text and counts frequencies

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL