analysis

package
v0.0.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 20, 2025 License: MIT Imports: 13 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AnalysisResult

type AnalysisResult struct {
	RootDir        string                 `json:"root_dir"`
	ScanStartTime  time.Time              `json:"scan_start_time"`
	ScanEndTime    time.Time              `json:"scan_end_time"`
	TotalFiles     int                    `json:"total_files"`
	TotalDirs      int                    `json:"total_dirs"`
	TotalSize      int64                  `json:"total_size"`
	FileEntries    []*FileEntry           `json:"file_entries,omitempty"` // May be omitted in summary-only reports
	TypeSummary    map[string]*TypeStats  `json:"type_summary"`           // Keyed by type label
	DuplicateSets  []*DuplicateSet        `json:"duplicate_sets,omitempty"`
	MonthlySummary map[string]*MonthStats `json:"monthly_summary"` // Keyed by "YYYY-MM"
	ToolStatus     map[string]*ToolInfo   `json:"tool_status"`
	OverallErrors  []string               `json:"overall_errors,omitempty"`
	Config         *config.Config         `json:"config_used"` // Include the config used for this run
}

AnalysisResult contains the overall results of a directory analysis run.

func NewAnalysisResult

func NewAnalysisResult(rootDir string, cfg *config.Config) *AnalysisResult

NewAnalysisResult creates a new AnalysisResult with initialized maps and slices.

func (*AnalysisResult) AddError

func (r *AnalysisResult) AddError(err error)

AddError adds an error to the list of overall errors.

func (*AnalysisResult) AddFileEntry

func (r *AnalysisResult) AddFileEntry(entry *FileEntry)

AddFileEntry adds a FileEntry to the result and updates totals.

func (*AnalysisResult) AddToolStatus

func (r *AnalysisResult) AddToolStatus(name, path string, available bool, err error)

AddToolStatus records the status of an external tool.

type Analyzer

type Analyzer interface {
	// Name returns a unique identifier for the analyzer (e.g., "MagikaTypeAnalyzer").
	Name() string

	// Type returns whether this is a file-level or aggregate analyzer.
	Type() AnalyzerType

	// DependsOn returns names of analyzers that must run before this one.
	DependsOn() []string

	// Analyze performs the analysis.
	// For file-level analyzers (Type() == FileAnalyzer), it operates on a single FileEntry.
	// For aggregate analyzers (Type() == AggregateAnalyzer), the entry parameter is ignored.
	// Both types update the AnalysisResult as needed.
	// The ToolProvider gives access to configured external tool runners.
	Analyze(ctx context.Context, cfg *config.Config, toolProvider ToolProvider, result *AnalysisResult, entry *FileEntry) error
}

Analyzer performs a specific analysis task.

type AnalyzerType

type AnalyzerType int

AnalyzerType defines the type of analyzer (how it should be executed)

const (
	// FileAnalyzer operates on individual files and can run concurrently
	FileAnalyzer AnalyzerType = iota
	// AggregateAnalyzer operates on the full dataset and runs sequentially
	AggregateAnalyzer
)

type DuplicateSet

type DuplicateSet struct {
	ID          string   `json:"id"`           // Hash or external tool ID
	FilePaths   []string `json:"file_paths"`   // Relative paths
	Size        int64    `json:"size"`         // Size of one file
	Count       int      `json:"count"`        // Number of files in the set
	WastedSpace int64    `json:"wasted_space"` // (Count-1) * Size
}

DuplicateSet represents a set of duplicate files.

type FileEntry

type FileEntry struct {
	Path     string      `json:"path"` // Relative to TargetDir
	FullPath string      `json:"-"`    // Absolute path
	IsDir    bool        `json:"is_dir"`
	Size     int64       `json:"size"`
	ModTime  time.Time   `json:"mod_time"`
	Mode     fs.FileMode `json:"-"`

	TypeInfo map[string]any    `json:"type_info,omitempty"` // Source ("magika", "file"), Label, Group, MIME
	Metadata map[string]any    `json:"metadata,omitempty"`  // Source ("exiftool"), Key-Value pairs
	Hashes   map[string]string `json:"hashes,omitempty"`    // Algorithm ("md5", "sha256") -> Hash string
	Tags     []string          `json:"tags,omitempty"`      // "LargeFile", "RecentFile", "DuplicateSetID:xyz"
	Error    string            `json:"error,omitempty"`     // Record file-specific processing errors
}

FileEntry holds information about a scanned file system entry.

func NewFileEntry

func NewFileEntry(path, fullPath string, isDir bool, size int64, modTime time.Time, mode fs.FileMode) *FileEntry

NewFileEntry creates a new FileEntry with initialized maps.

func (*FileEntry) AddError

func (e *FileEntry) AddError(err error)

AddError records an error message on a FileEntry.

func (*FileEntry) AddTag

func (e *FileEntry) AddTag(tag string)

AddTag adds a tag to a FileEntry if it doesn't already exist.

func (*FileEntry) HasTag

func (e *FileEntry) HasTag(tag string) bool

HasTag checks if a FileEntry has a specific tag.

type GlobMatcher

type GlobMatcher struct {
	// contains filtered or unexported fields
}

GlobMatcher implements the PathMatcher interface using filepath.Match

func (*GlobMatcher) Match

func (g *GlobMatcher) Match(path string) bool

Match checks if a path matches the glob pattern

type MagikaTypeAnalyzer

type MagikaTypeAnalyzer struct {
	// contains filtered or unexported fields
}

MagikaTypeAnalyzer uses Magika to analyze file types.

func NewMagikaTypeAnalyzer

func NewMagikaTypeAnalyzer() *MagikaTypeAnalyzer

NewMagikaTypeAnalyzer creates a new Magika file type analyzer.

func (*MagikaTypeAnalyzer) Analyze

func (a *MagikaTypeAnalyzer) Analyze(ctx context.Context, cfg *config.Config, toolProvider ToolProvider, result *AnalysisResult, entry *FileEntry) error

Analyze performs file type analysis using Magika.

func (*MagikaTypeAnalyzer) DependsOn

func (a *MagikaTypeAnalyzer) DependsOn() []string

DependsOn returns analyzer dependencies.

func (*MagikaTypeAnalyzer) Name

func (a *MagikaTypeAnalyzer) Name() string

Name returns the analyzer name.

func (*MagikaTypeAnalyzer) Type

func (a *MagikaTypeAnalyzer) Type() AnalyzerType

Type returns the analyzer type.

type MonthStats

type MonthStats struct {
	YearMonth string `json:"year_month"` // YYYY-MM
	Count     int    `json:"count"`
	Size      int64  `json:"size"`
}

MonthStats represents aggregated statistics for files modified in a specific month.

type PathMatcher

type PathMatcher interface {
	Match(path string) bool
}

PathMatcher interface for matching paths against patterns

type Registry

type Registry struct {
	// contains filtered or unexported fields
}

Registry manages the available analyzers.

func NewRegistry

func NewRegistry() *Registry

NewRegistry creates a new analyzer registry.

func (*Registry) FilterAnalyzers

func (r *Registry) FilterAnalyzers(enabled, disabled []string) []Analyzer

FilterAnalyzers returns a filtered list of analyzers based on enabled/disabled lists. If enabled is empty, all analyzers except those in disabled are returned.

func (*Registry) GetAnalyzer

func (r *Registry) GetAnalyzer(name string) (Analyzer, bool)

GetAnalyzer retrieves an analyzer by name.

func (*Registry) ListAnalyzers

func (r *Registry) ListAnalyzers() []Analyzer

ListAnalyzers returns all registered analyzers.

func (*Registry) Register

func (r *Registry) Register(analyzer Analyzer)

Register adds an analyzer to the registry.

type Runner

type Runner struct {
	// contains filtered or unexported fields
}

Runner orchestrates the analysis process.

func NewRunner

func NewRunner(ctx context.Context, cfg *config.Config, registry *Registry) (*Runner, error)

NewRunner creates a new analysis runner.

func (*Runner) GetToolRunner

func (r *Runner) GetToolRunner(name string) (tools.Runner, bool)

GetToolRunner returns the tool runner for the specified tool, or nil if not available.

func (*Runner) IsExcluded

func (r *Runner) IsExcluded(path string) bool

IsExcluded checks if a path should be excluded based on exclude patterns.

func (*Runner) Run

func (r *Runner) Run(ctx context.Context) (*AnalysisResult, error)

Run executes the analysis pipeline.

type ToolInfo

type ToolInfo struct {
	Name      string `json:"name"`
	Path      string `json:"path,omitempty"`
	Available bool   `json:"available"`
	Error     string `json:"error,omitempty"`
}

ToolInfo holds information about an external tool's availability and status.

type ToolProvider

type ToolProvider interface {
	GetToolRunner(name string) (tools.Runner, bool)
}

ToolProvider interface to give analyzers access to tool runners

type TypeStats

type TypeStats struct {
	Label string   `json:"label"`
	Count int      `json:"count"`
	Size  int64    `json:"size"`
	Paths []string `json:"-"` // Temporary storage during aggregation
}

TypeStats represents aggregated statistics for a specific file type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL