Documentation
¶
Index ¶
- Variables
- func CalculateUUID(time time.Time) (ulid.ULID, error)
- func DeleteDocument(docULIDSt string, db Repository) error
- func FetchAllDocuments(db Repository) (*[]Document, error)
- func FetchConfigFromDB(db Repository) (config.ServerConfig, error)
- func UpdateDocumentField(docULIDSt string, field string, newValue interface{}, db Repository) (int, error)
- func WriteConfigToDB(serverConfig config.ServerConfig, db Repository)
- type BunDB
- func (b *BunDB) AddTagToDocument(documentID int, tagID int) error
- func (b *BunDB) Close() error
- func (b *BunDB) CompleteJob(jobID ulid.ULID, result string) error
- func (b *BunDB) CreateJob(jobType JobType, message string) (*Job, error)
- func (b *BunDB) CreateTag(tag *Tag) error
- func (b *BunDB) DeleteDocument(ulidStr string) error
- func (b *BunDB) DeleteOldJobs(olderThan time.Duration) (int, error)
- func (b *BunDB) DeleteTag(id int) error
- func (b *BunDB) GetActiveJobs() ([]Job, error)
- func (b *BunDB) GetAllDimensions() ([]Dimension, error)
- func (b *BunDB) GetAllDocuments() ([]Document, error)
- func (b *BunDB) GetAllTags() ([]Tag, error)
- func (b *BunDB) GetConfig() (*config.ServerConfig, error)
- func (b *BunDB) GetDimensionByID(id int) (*Dimension, error)
- func (b *BunDB) GetDimensionByName(name string) (*Dimension, error)
- func (b *BunDB) GetDimensionValueByValue(dimensionID int, value string) (*DimensionValue, error)
- func (b *BunDB) GetDimensionValues(dimensionID int) ([]DimensionValue, error)
- func (b *BunDB) GetDocumentByHash(hash string) (*Document, error)
- func (b *BunDB) GetDocumentByID(id int) (*Document, error)
- func (b *BunDB) GetDocumentByPath(path string) (*Document, error)
- func (b *BunDB) GetDocumentByULID(ulidStr string) (*Document, error)
- func (b *BunDB) GetDocumentDimensions(documentID int) (map[string]DimensionValue, error)
- func (b *BunDB) GetDocumentsByFolder(folder string) ([]Document, error)
- func (b *BunDB) GetJob(jobID ulid.ULID) (*Job, error)
- func (b *BunDB) GetNewestDocuments(limit int) ([]Document, error)
- func (b *BunDB) GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)
- func (b *BunDB) GetRecentJobs(limit, offset int) ([]Job, error)
- func (b *BunDB) GetTagByID(id int) (*Tag, error)
- func (b *BunDB) GetTagByName(name string) (*Tag, error)
- func (b *BunDB) GetTagsForDocument(documentID int) ([]Tag, error)
- func (b *BunDB) GetTopWords(limit int) ([]WordFrequency, error)
- func (b *BunDB) GetWordCloudMetadata() (*WordCloudMetadata, error)
- func (b *BunDB) RecalculateAllWordFrequencies() error
- func (b *BunDB) ReindexSearchDocuments() (int, error)
- func (b *BunDB) RemoveDocumentDimension(documentID int, dimensionID int) error
- func (b *BunDB) RemoveTagFromDocument(documentID int, tagID int) error
- func (b *BunDB) SaveConfig(cfg *config.ServerConfig) error
- func (b *BunDB) SaveDocument(doc *Document) error
- func (b *BunDB) SearchDocuments(searchTerm string) ([]Document, error)
- func (b *BunDB) SetDocumentDimension(documentID int, dimensionID int, dimensionValueID int) error
- func (b *BunDB) UpdateDocumentFolder(ulidStr string, folder string) error
- func (b *BunDB) UpdateDocumentURL(ulidStr string, url string) error
- func (b *BunDB) UpdateJobError(jobID ulid.ULID, errorMsg string) error
- func (b *BunDB) UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error
- func (b *BunDB) UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error
- func (b *BunDB) UpdateTag(tag *Tag) error
- func (b *BunDB) UpdateWordFrequencies(docID string) error
- type BunDocument
- type BunJob
- type BunServerConfig
- type BunWordCloudMetadata
- type BunWordFrequency
- type Dimension
- type DimensionValue
- type DimensionWithValues
- type Document
- func AddNewDocument(filePath string, fullText string, db Repository) (*Document, error)
- func FetchDocument(docULIDSt string, db Repository) (Document, int, error)
- func FetchDocumentFromPath(path string, db Repository) (Document, error)
- func FetchDocuments(docULIDSt []string, db Repository) ([]Document, int, error)
- func FetchFolder(folderName string, db Repository) ([]Document, error)
- func FetchNewestDocuments(numberOf int, db Repository) ([]Document, error)
- type DocumentDimension
- type DocumentTag
- type DocumentTagsAndDimensions
- type DocumentWithTagsAndDimensions
- type EphemeralPostgresDB
- type Job
- type JobStatus
- type JobSummary
- type JobType
- type PostgresDB
- func (p *PostgresDB) AddTagToDocument(documentID int, tagID int) error
- func (p *PostgresDB) Close() error
- func (p *PostgresDB) CompleteJob(jobID ulid.ULID, result string) error
- func (p *PostgresDB) CreateJob(jobType JobType, message string) (*Job, error)
- func (p *PostgresDB) CreateTag(tag *Tag) error
- func (p *PostgresDB) DeleteDocument(ulidStr string) error
- func (p *PostgresDB) DeleteOldJobs(olderThan time.Duration) (int, error)
- func (p *PostgresDB) DeleteTag(id int) error
- func (p *PostgresDB) GetActiveJobs() ([]Job, error)
- func (p *PostgresDB) GetAllDimensions() ([]Dimension, error)
- func (p *PostgresDB) GetAllDocuments() ([]Document, error)
- func (p *PostgresDB) GetAllTags() ([]Tag, error)
- func (p *PostgresDB) GetConfig() (*config.ServerConfig, error)
- func (p *PostgresDB) GetDimensionByID(id int) (*Dimension, error)
- func (p *PostgresDB) GetDimensionByName(name string) (*Dimension, error)
- func (p *PostgresDB) GetDimensionValueByValue(dimensionID int, value string) (*DimensionValue, error)
- func (p *PostgresDB) GetDimensionValues(dimensionID int) ([]DimensionValue, error)
- func (p *PostgresDB) GetDocumentByHash(hash string) (*Document, error)
- func (p *PostgresDB) GetDocumentByID(id int) (*Document, error)
- func (p *PostgresDB) GetDocumentByPath(path string) (*Document, error)
- func (p *PostgresDB) GetDocumentByULID(ulidStr string) (*Document, error)
- func (p *PostgresDB) GetDocumentDimensions(documentID int) (map[string]DimensionValue, error)
- func (p *PostgresDB) GetDocumentsByFolder(folder string) ([]Document, error)
- func (p *PostgresDB) GetJob(jobID ulid.ULID) (*Job, error)
- func (p *PostgresDB) GetNewestDocuments(limit int) ([]Document, error)
- func (p *PostgresDB) GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)
- func (p *PostgresDB) GetRecentJobs(limit, offset int) ([]Job, error)
- func (p *PostgresDB) GetTagByID(id int) (*Tag, error)
- func (p *PostgresDB) GetTagByName(name string) (*Tag, error)
- func (p *PostgresDB) GetTagsForDocument(documentID int) ([]Tag, error)
- func (p *PostgresDB) GetTopWords(limit int) ([]WordFrequency, error)
- func (p *PostgresDB) GetWordCloudMetadata() (*WordCloudMetadata, error)
- func (p *PostgresDB) RecalculateAllWordFrequencies() error
- func (p *PostgresDB) ReindexSearchDocuments() (int, error)
- func (p *PostgresDB) RemoveDocumentDimension(documentID int, dimensionID int) error
- func (p *PostgresDB) RemoveTagFromDocument(documentID int, tagID int) error
- func (p *PostgresDB) SaveConfig(cfg *config.ServerConfig) error
- func (p *PostgresDB) SaveDocument(doc *Document) error
- func (p *PostgresDB) SearchDocuments(searchTerm string) ([]Document, error)
- func (p *PostgresDB) SetDocumentDimension(documentID int, dimensionID int, dimensionValueID int) error
- func (p *PostgresDB) UpdateDocumentFolder(ulidStr string, folder string) error
- func (p *PostgresDB) UpdateDocumentURL(ulidStr string, url string) error
- func (p *PostgresDB) UpdateJobError(jobID ulid.ULID, errorMsg string) error
- func (p *PostgresDB) UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error
- func (p *PostgresDB) UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error
- func (p *PostgresDB) UpdateTag(tag *Tag) error
- func (p *PostgresDB) UpdateWordFrequencies(docID string) error
- type Repository
- type Tag
- type TagWithCount
- type WordCloudMetadata
- type WordFrequency
- type WordTokenizer
Constants ¶
This section is empty.
Variables ¶
var Logger *slog.Logger
Logger is global since we will need it everywhere
Functions ¶
func CalculateUUID ¶
CalculateUUID for the incoming file
func DeleteDocument ¶
func DeleteDocument(docULIDSt string, db Repository) error
DeleteDocument fetches the requested document by ULID
func FetchAllDocuments ¶
func FetchAllDocuments(db Repository) (*[]Document, error)
FetchAllDocuments fetches all the documents in the database
func FetchConfigFromDB ¶
func FetchConfigFromDB(db Repository) (config.ServerConfig, error)
FetchConfigFromDB pulls the server config from the database
func UpdateDocumentField ¶
func UpdateDocumentField(docULIDSt string, field string, newValue interface{}, db Repository) (int, error)
UpdateDocumentField updates a single field in a document
func WriteConfigToDB ¶
func WriteConfigToDB(serverConfig config.ServerConfig, db Repository)
WriteConfigToDB writes the serverconfig to the database for later retrieval
Types ¶
type BunDB ¶
type BunDB struct {
// contains filtered or unexported fields
}
BunDB implements Repository using Bun ORM
func NewRepository ¶
func NewRepository(config config.ServerConfig) *BunDB
NewRepository initializes the database based on configuration
func (*BunDB) AddTagToDocument ¶ added in v0.21.4
AddTagToDocument associates a tag with a document
func (*BunDB) CompleteJob ¶
CompleteJob marks a job as completed with optional result data
func (*BunDB) DeleteDocument ¶
DeleteDocument deletes a document by ULID
func (*BunDB) DeleteOldJobs ¶
DeleteOldJobs deletes completed jobs older than the specified duration
func (*BunDB) DeleteTag ¶ added in v0.21.4
DeleteTag deletes a tag (will also remove all document associations due to CASCADE)
func (*BunDB) GetActiveJobs ¶
GetActiveJobs retrieves all running or pending jobs
func (*BunDB) GetAllDimensions ¶ added in v0.21.4
GetAllDimensions returns all dimension definitions
func (*BunDB) GetAllDocuments ¶
GetAllDocuments retrieves all documents
func (*BunDB) GetAllTags ¶ added in v0.21.4
GetAllTags returns all tags
func (*BunDB) GetConfig ¶
func (b *BunDB) GetConfig() (*config.ServerConfig, error)
GetConfig retrieves server configuration
func (*BunDB) GetDimensionByID ¶ added in v0.21.4
GetDimensionByID returns a dimension by its ID
func (*BunDB) GetDimensionByName ¶ added in v0.21.4
GetDimensionByName returns a dimension by its name
func (*BunDB) GetDimensionValueByValue ¶ added in v0.21.4
func (b *BunDB) GetDimensionValueByValue(dimensionID int, value string) (*DimensionValue, error)
GetDimensionValueByValue returns a dimension value by dimension ID and value string
func (*BunDB) GetDimensionValues ¶ added in v0.21.4
func (b *BunDB) GetDimensionValues(dimensionID int) ([]DimensionValue, error)
GetDimensionValues returns all possible values for a dimension
func (*BunDB) GetDocumentByHash ¶
GetDocumentByHash retrieves a document by hash
func (*BunDB) GetDocumentByID ¶
GetDocumentByID retrieves a document by ID
func (*BunDB) GetDocumentByPath ¶
GetDocumentByPath retrieves a document by file path
func (*BunDB) GetDocumentByULID ¶
GetDocumentByULID retrieves a document by ULID
func (*BunDB) GetDocumentDimensions ¶ added in v0.21.4
func (b *BunDB) GetDocumentDimensions(documentID int) (map[string]DimensionValue, error)
GetDocumentDimensions returns all dimension values assigned to a document Returns a map of dimension_name -> DimensionValue
func (*BunDB) GetDocumentsByFolder ¶
GetDocumentsByFolder retrieves documents in a specific folder
func (*BunDB) GetNewestDocuments ¶
GetNewestDocuments retrieves the newest documents
func (*BunDB) GetNewestDocumentsWithPagination ¶
GetNewestDocumentsWithPagination retrieves documents with pagination support
func (*BunDB) GetRecentJobs ¶
GetRecentJobs retrieves the most recent jobs with pagination
func (*BunDB) GetTagByID ¶ added in v0.21.4
GetTagByID returns a tag by its ID
func (*BunDB) GetTagByName ¶ added in v0.21.4
GetTagByName returns a tag by its name
func (*BunDB) GetTagsForDocument ¶ added in v0.21.4
GetTagsForDocument returns all tags associated with a document
func (*BunDB) GetTopWords ¶
func (b *BunDB) GetTopWords(limit int) ([]WordFrequency, error)
Word cloud methods GetTopWords retrieves the top N most frequent words
func (*BunDB) GetWordCloudMetadata ¶
func (b *BunDB) GetWordCloudMetadata() (*WordCloudMetadata, error)
GetWordCloudMetadata retrieves metadata about the word cloud
func (*BunDB) RecalculateAllWordFrequencies ¶
RecalculateAllWordFrequencies performs a full recalculation of word frequencies
func (*BunDB) ReindexSearchDocuments ¶
ReindexSearchDocuments reindexes all documents to populate the full_text_search column
func (*BunDB) RemoveDocumentDimension ¶ added in v0.21.4
RemoveDocumentDimension removes a dimension value from a document
func (*BunDB) RemoveTagFromDocument ¶ added in v0.21.4
RemoveTagFromDocument removes a tag association from a document
func (*BunDB) SaveConfig ¶
func (b *BunDB) SaveConfig(cfg *config.ServerConfig) error
SaveConfig saves server configuration
func (*BunDB) SaveDocument ¶
SaveDocument saves or updates a document
func (*BunDB) SearchDocuments ¶
SearchDocuments performs full-text search
func (*BunDB) SetDocumentDimension ¶ added in v0.21.4
SetDocumentDimension sets a dimension value for a document (replaces existing if present)
func (*BunDB) UpdateDocumentFolder ¶
UpdateDocumentFolder updates the Folder field of a document
func (*BunDB) UpdateDocumentURL ¶
UpdateDocumentURL updates the URL field of a document
func (*BunDB) UpdateJobError ¶
UpdateJobError updates a job with an error
func (*BunDB) UpdateJobProgress ¶
UpdateJobProgress updates the progress of a job
func (*BunDB) UpdateJobStatus ¶
UpdateJobStatus updates the status of a job
func (*BunDB) UpdateWordFrequencies ¶
UpdateWordFrequencies updates word frequencies after document ingestion
type BunDocument ¶
type BunDocument struct {
bun.BaseModel `bun:"table:documents,alias:d"`
ID int `bun:"id,pk,autoincrement"`
Name string `bun:"name,notnull"`
Path string `bun:"path,notnull,unique"`
IngressTime time.Time `bun:"ingress_time,notnull,default:current_timestamp"`
Folder string `bun:"folder,notnull"`
Hash string `bun:"hash,notnull"`
ULID string `bun:"ulid,notnull,unique"` // Stored as string in DB
DocumentType string `bun:"document_type,notnull"`
FullText string `bun:"full_text,nullzero"`
URL string `bun:"url,nullzero"`
FullTextSearch string `bun:"full_text_search,type:tsvector,nullzero"` // PostgreSQL-specific
CreatedAt time.Time `bun:"created_at,notnull,default:current_timestamp"`
UpdatedAt time.Time `bun:"updated_at,notnull,default:current_timestamp"`
}
BunDocument represents the documents table for Bun ORM
func FromDocument ¶
func FromDocument(doc *Document) *BunDocument
FromDocument converts Document to BunDocument
func (*BunDocument) ToDocument ¶
func (bd *BunDocument) ToDocument() (*Document, error)
ToDocument converts BunDocument to Document
type BunJob ¶
type BunJob struct {
bun.BaseModel `bun:"table:jobs,alias:j"`
ID string `bun:"id,pk"` // ULID as string
Type string `bun:"type,notnull"`
Status string `bun:"status,default:'pending'"`
Progress int `bun:"progress,default:0"`
CurrentStep string `bun:"current_step,default:''"`
TotalSteps int `bun:"total_steps,default:0"`
Message string `bun:"message,default:''"`
Error string `bun:"error,nullzero"`
Result string `bun:"result,nullzero"`
CreatedAt time.Time `bun:"created_at,notnull,default:current_timestamp"`
UpdatedAt time.Time `bun:"updated_at,notnull,default:current_timestamp"`
StartedAt *time.Time `bun:"started_at,nullzero"`
CompletedAt *time.Time `bun:"completed_at,nullzero"`
}
BunJob represents the jobs table for Bun ORM
type BunServerConfig ¶
type BunServerConfig struct {
bun.BaseModel `bun:"table:server_config,alias:sc"`
ID int `bun:"id,pk"`
ListenAddrIP string `bun:"listen_addr_ip,default:''"`
ListenAddrPort string `bun:"listen_addr_port,notnull,default:'8000'"`
IngressPath string `bun:"ingress_path,notnull,default:''"`
IngressDelete bool `bun:"ingress_delete,notnull,default:false"`
IngressMoveFolder string `bun:"ingress_move_folder,notnull,default:''"`
IngressPreserve bool `bun:"ingress_preserve,notnull,default:true"`
DocumentPath string `bun:"document_path,notnull,default:''"`
NewDocumentFolder string `bun:"new_document_folder,default:''"`
NewDocumentFolderRel string `bun:"new_document_folder_rel,default:''"`
WebUIPass bool `bun:"web_ui_pass,notnull,default:false"`
ClientUsername string `bun:"client_username,default:''"`
ClientPassword string `bun:"client_password,default:''"`
PushBulletToken string `bun:"pushbullet_token,default:''"`
TesseractPath string `bun:"tesseract_path,default:''"`
UseReverseProxy bool `bun:"use_reverse_proxy,notnull,default:false"`
BaseURL string `bun:"base_url,default:''"`
IngressInterval int `bun:"ingress_interval,notnull,default:10"`
NewDocumentNumber int `bun:"new_document_number,notnull,default:5"`
ServerAPIURL string `bun:"server_api_url,default:''"`
CreatedAt time.Time `bun:"created_at,notnull,default:current_timestamp"`
UpdatedAt time.Time `bun:"updated_at,notnull,default:current_timestamp"`
}
BunServerConfig represents the server_config table for Bun ORM
type BunWordCloudMetadata ¶
type BunWordCloudMetadata struct {
bun.BaseModel `bun:"table:word_cloud_metadata,alias:wcm"`
ID int `bun:"id,pk"`
LastFullCalculation *time.Time `bun:"last_full_calculation,nullzero"`
TotalDocsProcessed int `bun:"total_documents_processed,default:0"`
TotalWordsIndexed int `bun:"total_words_indexed,default:0"`
Version int `bun:"version,default:1"`
CreatedAt time.Time `bun:"created_at,notnull,default:current_timestamp"`
UpdatedAt time.Time `bun:"updated_at,notnull,default:current_timestamp"`
}
BunWordCloudMetadata represents the word_cloud_metadata table for Bun ORM
func (*BunWordCloudMetadata) ToWordCloudMetadata ¶
func (bwcm *BunWordCloudMetadata) ToWordCloudMetadata() *WordCloudMetadata
ToWordCloudMetadata converts BunWordCloudMetadata to WordCloudMetadata
type BunWordFrequency ¶
type BunWordFrequency struct {
bun.BaseModel `bun:"table:word_frequencies,alias:wf"`
Word string `bun:"word,pk"`
Frequency int `bun:"frequency,default:1"`
LastUpdated time.Time `bun:"last_updated,default:current_timestamp"`
}
BunWordFrequency represents the word_frequencies table for Bun ORM
func (*BunWordFrequency) ToWordFrequency ¶
func (bwf *BunWordFrequency) ToWordFrequency() *WordFrequency
ToWordFrequency converts BunWordFrequency to WordFrequency
type Dimension ¶ added in v0.21.4
type Dimension struct {
ID int `json:"id" db:"id"`
Name string `json:"name" db:"name"`
DisplayName string `json:"display_name" db:"display_name"`
Description string `json:"description,omitempty" db:"description"`
DimensionType string `json:"dimension_type" db:"dimension_type"` // 'single' or 'multiple'
IsRequired bool `json:"is_required" db:"is_required"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
Dimension represents a structured metadata category (e.g., Person, Location)
type DimensionValue ¶ added in v0.21.4
type DimensionValue struct {
ID int `json:"id" db:"id"`
DimensionID int `json:"dimension_id" db:"dimension_id"`
Value string `json:"value" db:"value"`
DisplayName string `json:"display_name" db:"display_name"`
Description string `json:"description,omitempty" db:"description"`
Color string `json:"color" db:"color"`
SortOrder int `json:"sort_order" db:"sort_order"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
DimensionValue represents an allowed value for a dimension
type DimensionWithValues ¶ added in v0.21.4
type DimensionWithValues struct {
Dimension
Values []DimensionValue `json:"values"`
}
DimensionWithValues includes the dimension and its possible values
type Document ¶
type Document struct {
StormID int // ID field (kept as StormID for backward compatibility)
Name string
Path string // full path to the file
IngressTime time.Time
Folder string
Hash string
ULID ulid.ULID // Have a smaller (than hash) id that can be used in URL's, hopefully speed things up
DocumentType string // type of document (pdf, txt, etc)
FullText string
URL string
}
Document is all of the document information stored in the database
func AddNewDocument ¶
func AddNewDocument(filePath string, fullText string, db Repository) (*Document, error)
AddNewDocument adds a new document to the database
func FetchDocument ¶
func FetchDocument(docULIDSt string, db Repository) (Document, int, error)
FetchDocument fetches the requested document by ULID
func FetchDocumentFromPath ¶
func FetchDocumentFromPath(path string, db Repository) (Document, error)
FetchDocumentFromPath fetches the document by document path
func FetchDocuments ¶
func FetchDocuments(docULIDSt []string, db Repository) ([]Document, int, error)
FetchDocuments fetches an array of documents // TODO: Not fucking needed?
func FetchFolder ¶
func FetchFolder(folderName string, db Repository) ([]Document, error)
FetchFolder grabs all of the documents contained in a folder
func FetchNewestDocuments ¶
func FetchNewestDocuments(numberOf int, db Repository) ([]Document, error)
FetchNewestDocuments fetches the documents that were added last
type DocumentDimension ¶ added in v0.21.4
type DocumentDimension struct {
ID int `json:"id" db:"id"`
DocumentID int `json:"document_id" db:"document_id"`
DimensionID int `json:"dimension_id" db:"dimension_id"`
DimensionValueID int `json:"dimension_value_id" db:"dimension_value_id"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
DocumentDimension represents a dimension value assigned to a document
type DocumentTag ¶ added in v0.21.4
type DocumentTag struct {
DocumentID int `json:"document_id" db:"document_id"`
TagID int `json:"tag_id" db:"tag_id"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
}
DocumentTag represents the many-to-many relationship between documents and tags
type DocumentTagsAndDimensions ¶ added in v0.21.4
type DocumentTagsAndDimensions struct {
Tags []string `json:"tags"`
Dimensions map[string]string `json:"dimensions"` // dimension_name -> value
}
DocumentTagsAndDimensions is a helper struct for JSON sidecar files
type DocumentWithTagsAndDimensions ¶ added in v0.21.4
type DocumentWithTagsAndDimensions struct {
Document
Tags []Tag `json:"tags"`
Dimensions map[string]DimensionValue `json:"dimensions"` // dimension_name -> value
}
DocumentWithTagsAndDimensions extends Document with its tags and dimensions
type EphemeralPostgresDB ¶
type EphemeralPostgresDB struct {
*PostgresDB
// contains filtered or unexported fields
}
EphemeralPostgresDB implements Repository using ephemeral PostgreSQL
func SetupEphemeralPostgresDatabase ¶
func SetupEphemeralPostgresDatabase() (*EphemeralPostgresDB, error)
SetupEphemeralPostgresDatabase creates an ephemeral PostgreSQL instance
func (*EphemeralPostgresDB) Close ¶
func (e *EphemeralPostgresDB) Close() error
Close closes the database connection and cleans up the ephemeral server
type Job ¶
type Job struct {
ID ulid.ULID `json:"id"`
Type JobType `json:"type"`
Status JobStatus `json:"status"`
Progress int `json:"progress"` // 0-100
CurrentStep string `json:"currentStep"` // Human-readable current step
TotalSteps int `json:"totalSteps"` // Total number of steps
Message string `json:"message"` // Status message
Error string `json:"error,omitempty"` // Error message if failed
Result string `json:"result,omitempty"` // JSON result data
CreatedAt time.Time `json:"createdAt"`
UpdatedAt time.Time `json:"updatedAt"`
StartedAt *time.Time `json:"startedAt,omitempty"`
CompletedAt *time.Time `json:"completedAt,omitempty"`
}
Job represents a background job or operation
type JobSummary ¶
type JobSummary struct {
FilesProcessed int `json:"filesProcessed"`
FilesTotal int `json:"filesTotal"`
BytesProcessed int64 `json:"bytesProcessed"`
Errors int `json:"errors"`
Details string `json:"details,omitempty"`
}
JobSummary provides summary statistics for a job
type PostgresDB ¶
type PostgresDB struct {
// contains filtered or unexported fields
}
PostgresDB implements Repository for PostgreSQL
func SetupPostgresDatabase ¶
func SetupPostgresDatabase(connectionString string) (*PostgresDB, error)
SetupPostgresDatabase initializes PostgreSQL database with migrations If connectionString is empty, it will use ephemeral PostgreSQL
func (*PostgresDB) AddTagToDocument ¶ added in v0.21.4
func (p *PostgresDB) AddTagToDocument(documentID int, tagID int) error
AddTagToDocument associates a tag with a document
func (*PostgresDB) Close ¶
func (p *PostgresDB) Close() error
Close closes the database connection and stops embedded server if running
func (*PostgresDB) CompleteJob ¶
func (p *PostgresDB) CompleteJob(jobID ulid.ULID, result string) error
CompleteJob marks a job as completed with optional result data
func (*PostgresDB) CreateJob ¶
func (p *PostgresDB) CreateJob(jobType JobType, message string) (*Job, error)
CreateJob creates a new job in the database
func (*PostgresDB) CreateTag ¶ added in v0.21.4
func (p *PostgresDB) CreateTag(tag *Tag) error
CreateTag creates a new tag
func (*PostgresDB) DeleteDocument ¶
func (p *PostgresDB) DeleteDocument(ulidStr string) error
DeleteDocument deletes a document by ULID
func (*PostgresDB) DeleteOldJobs ¶
func (p *PostgresDB) DeleteOldJobs(olderThan time.Duration) (int, error)
DeleteOldJobs deletes completed jobs older than the specified duration
func (*PostgresDB) DeleteTag ¶ added in v0.21.4
func (p *PostgresDB) DeleteTag(id int) error
DeleteTag deletes a tag
func (*PostgresDB) GetActiveJobs ¶
func (p *PostgresDB) GetActiveJobs() ([]Job, error)
GetActiveJobs retrieves all running or pending jobs
func (*PostgresDB) GetAllDimensions ¶ added in v0.21.4
func (p *PostgresDB) GetAllDimensions() ([]Dimension, error)
GetAllDimensions returns all dimension definitions
func (*PostgresDB) GetAllDocuments ¶
func (p *PostgresDB) GetAllDocuments() ([]Document, error)
GetAllDocuments retrieves all documents
func (*PostgresDB) GetAllTags ¶ added in v0.21.4
func (p *PostgresDB) GetAllTags() ([]Tag, error)
GetAllTags returns all tags
func (*PostgresDB) GetConfig ¶
func (p *PostgresDB) GetConfig() (*config.ServerConfig, error)
GetConfig retrieves server configuration
func (*PostgresDB) GetDimensionByID ¶ added in v0.21.4
func (p *PostgresDB) GetDimensionByID(id int) (*Dimension, error)
GetDimensionByID returns a dimension by its ID
func (*PostgresDB) GetDimensionByName ¶ added in v0.21.4
func (p *PostgresDB) GetDimensionByName(name string) (*Dimension, error)
GetDimensionByName returns a dimension by its name
func (*PostgresDB) GetDimensionValueByValue ¶ added in v0.21.4
func (p *PostgresDB) GetDimensionValueByValue(dimensionID int, value string) (*DimensionValue, error)
GetDimensionValueByValue returns a dimension value by dimension ID and value string
func (*PostgresDB) GetDimensionValues ¶ added in v0.21.4
func (p *PostgresDB) GetDimensionValues(dimensionID int) ([]DimensionValue, error)
GetDimensionValues returns all possible values for a dimension
func (*PostgresDB) GetDocumentByHash ¶
func (p *PostgresDB) GetDocumentByHash(hash string) (*Document, error)
GetDocumentByHash retrieves a document by hash
func (*PostgresDB) GetDocumentByID ¶
func (p *PostgresDB) GetDocumentByID(id int) (*Document, error)
GetDocumentByID retrieves a document by ID
func (*PostgresDB) GetDocumentByPath ¶
func (p *PostgresDB) GetDocumentByPath(path string) (*Document, error)
GetDocumentByPath retrieves a document by file path
func (*PostgresDB) GetDocumentByULID ¶
func (p *PostgresDB) GetDocumentByULID(ulidStr string) (*Document, error)
GetDocumentByULID retrieves a document by ULID
func (*PostgresDB) GetDocumentDimensions ¶ added in v0.21.4
func (p *PostgresDB) GetDocumentDimensions(documentID int) (map[string]DimensionValue, error)
GetDocumentDimensions returns all dimension values assigned to a document
func (*PostgresDB) GetDocumentsByFolder ¶
func (p *PostgresDB) GetDocumentsByFolder(folder string) ([]Document, error)
GetDocumentsByFolder retrieves documents in a specific folder
func (*PostgresDB) GetJob ¶
func (p *PostgresDB) GetJob(jobID ulid.ULID) (*Job, error)
GetJob retrieves a job by ID
func (*PostgresDB) GetNewestDocuments ¶
func (p *PostgresDB) GetNewestDocuments(limit int) ([]Document, error)
GetNewestDocuments retrieves the newest documents
func (*PostgresDB) GetNewestDocumentsWithPagination ¶
func (p *PostgresDB) GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)
GetNewestDocumentsWithPagination retrieves documents with pagination support
func (*PostgresDB) GetRecentJobs ¶
func (p *PostgresDB) GetRecentJobs(limit, offset int) ([]Job, error)
GetRecentJobs retrieves the most recent jobs with pagination
func (*PostgresDB) GetTagByID ¶ added in v0.21.4
func (p *PostgresDB) GetTagByID(id int) (*Tag, error)
GetTagByID returns a tag by its ID
func (*PostgresDB) GetTagByName ¶ added in v0.21.4
func (p *PostgresDB) GetTagByName(name string) (*Tag, error)
GetTagByName returns a tag by its name
func (*PostgresDB) GetTagsForDocument ¶ added in v0.21.4
func (p *PostgresDB) GetTagsForDocument(documentID int) ([]Tag, error)
GetTagsForDocument returns all tags associated with a document
func (*PostgresDB) GetTopWords ¶
func (p *PostgresDB) GetTopWords(limit int) ([]WordFrequency, error)
GetTopWords retrieves the top N most frequent words
func (*PostgresDB) GetWordCloudMetadata ¶
func (p *PostgresDB) GetWordCloudMetadata() (*WordCloudMetadata, error)
GetWordCloudMetadata retrieves metadata about the word cloud
func (*PostgresDB) RecalculateAllWordFrequencies ¶
func (p *PostgresDB) RecalculateAllWordFrequencies() error
RecalculateAllWordFrequencies performs a full recalculation of word frequencies This should be called during database cleaning or on-demand
func (*PostgresDB) ReindexSearchDocuments ¶
func (p *PostgresDB) ReindexSearchDocuments() (int, error)
ReindexSearchDocuments reindexes all documents to populate the full_text_search column Returns the number of documents reindexed
func (*PostgresDB) RemoveDocumentDimension ¶ added in v0.21.4
func (p *PostgresDB) RemoveDocumentDimension(documentID int, dimensionID int) error
RemoveDocumentDimension removes a dimension value from a document
func (*PostgresDB) RemoveTagFromDocument ¶ added in v0.21.4
func (p *PostgresDB) RemoveTagFromDocument(documentID int, tagID int) error
RemoveTagFromDocument removes a tag association from a document
func (*PostgresDB) SaveConfig ¶
func (p *PostgresDB) SaveConfig(cfg *config.ServerConfig) error
SaveConfig saves server configuration
func (*PostgresDB) SaveDocument ¶
func (p *PostgresDB) SaveDocument(doc *Document) error
SaveDocument saves or updates a document
func (*PostgresDB) SearchDocuments ¶
func (p *PostgresDB) SearchDocuments(searchTerm string) ([]Document, error)
SearchDocuments performs full-text search using PostgreSQL's native search capabilities Supports both prefix matching and phrase search
func (*PostgresDB) SetDocumentDimension ¶ added in v0.21.4
func (p *PostgresDB) SetDocumentDimension(documentID int, dimensionID int, dimensionValueID int) error
SetDocumentDimension sets a dimension value for a document
func (*PostgresDB) UpdateDocumentFolder ¶
func (p *PostgresDB) UpdateDocumentFolder(ulidStr string, folder string) error
UpdateDocumentFolder updates the Folder field of a document
func (*PostgresDB) UpdateDocumentURL ¶
func (p *PostgresDB) UpdateDocumentURL(ulidStr string, url string) error
UpdateDocumentURL updates the URL field of a document
func (*PostgresDB) UpdateJobError ¶
func (p *PostgresDB) UpdateJobError(jobID ulid.ULID, errorMsg string) error
UpdateJobError updates a job with an error
func (*PostgresDB) UpdateJobProgress ¶
func (p *PostgresDB) UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error
UpdateJobProgress updates the progress of a job
func (*PostgresDB) UpdateJobStatus ¶
func (p *PostgresDB) UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error
UpdateJobStatus updates the status of a job
func (*PostgresDB) UpdateTag ¶ added in v0.21.4
func (p *PostgresDB) UpdateTag(tag *Tag) error
UpdateTag updates an existing tag
func (*PostgresDB) UpdateWordFrequencies ¶
func (p *PostgresDB) UpdateWordFrequencies(docID string) error
UpdateWordFrequencies updates word frequencies after document ingestion This should be called incrementally as documents are added
type Repository ¶
type Repository interface {
Close() error
SaveDocument(doc *Document) error
GetDocumentByID(id int) (*Document, error)
GetDocumentByULID(ulid string) (*Document, error)
GetDocumentByPath(path string) (*Document, error)
GetDocumentByHash(hash string) (*Document, error)
GetNewestDocuments(limit int) ([]Document, error)
GetNewestDocumentsWithPagination(page int, pageSize int) ([]Document, int, error)
GetAllDocuments() ([]Document, error)
GetDocumentsByFolder(folder string) ([]Document, error)
DeleteDocument(ulid string) error
UpdateDocumentURL(ulid string, url string) error
UpdateDocumentFolder(ulid string, folder string) error
SaveConfig(config *config.ServerConfig) error
GetConfig() (*config.ServerConfig, error)
SearchDocuments(searchTerm string) ([]Document, error)
ReindexSearchDocuments() (int, error)
// Word cloud methods
GetTopWords(limit int) ([]WordFrequency, error)
GetWordCloudMetadata() (*WordCloudMetadata, error)
RecalculateAllWordFrequencies() error
UpdateWordFrequencies(docID string) error
// Job tracking methods
CreateJob(jobType JobType, message string) (*Job, error)
UpdateJobProgress(jobID ulid.ULID, progress int, currentStep string) error
UpdateJobStatus(jobID ulid.ULID, status JobStatus, message string) error
UpdateJobError(jobID ulid.ULID, errorMsg string) error
CompleteJob(jobID ulid.ULID, result string) error
GetJob(jobID ulid.ULID) (*Job, error)
GetRecentJobs(limit, offset int) ([]Job, error)
GetActiveJobs() ([]Job, error)
DeleteOldJobs(olderThan time.Duration) (int, error)
// Tag methods
CreateTag(tag *Tag) error
GetAllTags() ([]Tag, error)
GetTagByID(id int) (*Tag, error)
GetTagByName(name string) (*Tag, error)
UpdateTag(tag *Tag) error
DeleteTag(id int) error
GetTagsForDocument(documentID int) ([]Tag, error)
AddTagToDocument(documentID int, tagID int) error
RemoveTagFromDocument(documentID int, tagID int) error
// Dimension methods
GetAllDimensions() ([]Dimension, error)
GetDimensionByID(id int) (*Dimension, error)
GetDimensionByName(name string) (*Dimension, error)
GetDimensionValues(dimensionID int) ([]DimensionValue, error)
GetDimensionValueByValue(dimensionID int, value string) (*DimensionValue, error)
GetDocumentDimensions(documentID int) (map[string]DimensionValue, error)
SetDocumentDimension(documentID int, dimensionID int, dimensionValueID int) error
RemoveDocumentDimension(documentID int, dimensionID int) error
}
Repository defines database operations
type Tag ¶ added in v0.21.4
type Tag struct {
ID int `json:"id" db:"id"`
Name string `json:"name" db:"name"`
Color string `json:"color" db:"color"`
Description string `json:"description,omitempty" db:"description"`
CreatedAt time.Time `json:"created_at" db:"created_at"`
UpdatedAt time.Time `json:"updated_at" db:"updated_at"`
}
Tag represents a free-form tag that can be applied to documents
type TagWithCount ¶ added in v0.21.4
TagWithCount includes the count of documents using this tag
type WordCloudMetadata ¶
type WordCloudMetadata struct {
LastCalculation time.Time `json:"lastCalculation"`
TotalDocsProcessed int `json:"totalDocsProcessed"`
TotalWordsIndexed int `json:"totalWordsIndexed"`
Version int `json:"version"`
}
WordCloudMetadata tracks word cloud calculation status
type WordFrequency ¶
type WordFrequency struct {
Word string `json:"word"`
Frequency int `json:"frequency"`
Updated time.Time `json:"updated"`
}
WordFrequency represents a word and its frequency count
type WordTokenizer ¶
type WordTokenizer struct {
// contains filtered or unexported fields
}
WordTokenizer handles text processing for word cloud
func NewWordTokenizer ¶
func NewWordTokenizer() *WordTokenizer
NewWordTokenizer creates a new word tokenizer
func (*WordTokenizer) TokenizeAndCount ¶
func (wt *WordTokenizer) TokenizeAndCount(text string) map[string]int
TokenizeAndCount extracts words from text and counts frequencies