Documentation
ΒΆ
Overview ΒΆ
Package wordcounter provides tools for counting Chinese characters in text files and directories.
This package is designed primarily for Chinese text analysis, offering both single file and directory-based counting capabilities. It supports various export formats including ASCII tables, CSV, and Excel files.
Key features:
- Count lines, Chinese characters, non-Chinese characters, and total characters
- Support for single files and recursive directory scanning
- Flexible ignore patterns similar to .gitignore
- Multiple export formats (table, CSV, Excel)
- HTTP server mode for API access
- Concurrent processing for improved performance
- Comprehensive error handling with structured error types
Basic usage for single file:
counter := wordcounter.NewFileCounter("document.md")
if err := counter.Count(); err != nil {
log.Fatal(err)
}
fmt.Println(counter.ExportTable())
Basic usage for directory:
counter := wordcounter.NewDirCounter("./docs", "*.tmp", "node_modules")
if err := counter.Count(); err != nil {
log.Fatal(err)
}
fmt.Println(counter.ExportTable())
Index ΒΆ
- Constants
- func DiscoverIgnoreFile() []string
- func ExportCounterCSV(c Countable, filename ...string) (string, error)
- func ExportCounterExcel(c Countable, filename ...string) error
- func ExportCounterTable(c Countable) string
- func ToAbsolutePath(path string) string
- func ValidateExportType(exportType string) error
- func ValidateMode(mode string) error
- func ValidatePath(path string) error
- type CharacterCounter
- type CountBody
- type Countable
- type Counter
- type CounterExporter
- type DirCounter
- func (dc *DirCounter) AddIgnorePattern(pattern string)
- func (dc *DirCounter) Count() error
- func (dc *DirCounter) EnableTotal()
- func (dc *DirCounter) ExportCSV(filename ...string) (string, error)
- func (dc *DirCounter) ExportExcel(filename ...string) error
- func (dc *DirCounter) ExportTable() string
- func (dc *DirCounter) GetFileCounters() []*FileCounter
- func (dc *DirCounter) GetHeader() Row
- func (dc *DirCounter) GetHeaderAndRows() []Row
- func (dc *DirCounter) GetIgnoreList() []string
- func (dc *DirCounter) GetRows() []Row
- func (dc *DirCounter) Ignore(pattern string)
- func (dc *DirCounter) IsIgnored(filename string) bool
- func (dc *DirCounter) IsIgnoredWithError(filename string) (bool, error)
- type ErrorType
- type ExportConfig
- type FileCounter
- func (fc *FileCounter) Count() error
- func (fc *FileCounter) ExportCSV(filename ...string) (string, error)
- func (fc *FileCounter) ExportExcel(filename ...string) error
- func (fc *FileCounter) ExportTable() string
- func (fc *FileCounter) GetHeader() Row
- func (fc *FileCounter) GetRow() Row
- func (fc *FileCounter) GetRows() []Row
- func (fc *FileCounter) GetStats() *Stats
- type IgnoreChecker
- type Row
- type Server
- type Stats
- type WordCounterError
- func NewError(errorType ErrorType, message string, cause error) *WordCounterError
- func NewExportError(operation string, cause error) *WordCounterError
- func NewFileNotFoundError(path string, cause error) *WordCounterError
- func NewFileReadError(path string, cause error) *WordCounterError
- func NewFileWriteError(path string, cause error) *WordCounterError
- func NewInvalidInputError(message string) *WordCounterError
- func NewInvalidPathError(path string, cause error) *WordCounterError
- func NewPatternMatchError(pattern string, cause error) *WordCounterError
- func NewServerError(message string, cause error) *WordCounterError
- type WordCounterServer
Constants ΒΆ
const ( ExportTypeTable = "table" ExportTypeCSV = "csv" ExportTypeExcel = "excel" )
Export types
const ( ModeDir = "dir" ModeFile = "file" )
Mode types
const ( DefaultExportPath = "counter.xlsx" DefaultHost = "127.0.0.1" DefaultPort = 8080 DefaultMode = ModeDir DefaultExportType = ExportTypeTable )
Default values
const ( ServerAppName = "WordCounter" APIVersion = "v1" APIBasePath = "/" + APIVersion + "/wordcounter" PingEndpoint = APIBasePath + "/ping" CountEndpoint = APIBasePath + "/count" )
Server configuration
const ( // MinWorkers is the minimum number of workers in the pool MinWorkers = 1 // MaxWorkers is the maximum number of workers in the pool MaxWorkers = 32 )
Worker pool configuration
const (
IgnoreFileName = ".wcignore"
)
File patterns
Variables ΒΆ
This section is empty.
Functions ΒΆ
func DiscoverIgnoreFile ΒΆ
func DiscoverIgnoreFile() []string
func ExportCounterCSV ΒΆ added in v0.2.0
ExportCounterCSV exports a Counter to CSV format
func ExportCounterExcel ΒΆ added in v0.2.0
ExportCounterExcel exports a Counter to Excel format
func ExportCounterTable ΒΆ added in v0.2.0
ExportCounterTable exports a Counter to table format
func ToAbsolutePath ΒΆ
ToAbsolutePath detects if a path is absolute or not. If not, it converts path to absolute. Returns the original path if conversion fails.
func ValidateExportType ΒΆ added in v0.2.0
ValidateExportType validates if an export type is supported
func ValidateMode ΒΆ added in v0.2.0
ValidateMode validates if a mode is supported
func ValidatePath ΒΆ added in v0.2.0
ValidatePath validates if a path exists
Types ΒΆ
type CharacterCounter ΒΆ added in v0.2.0
type CharacterCounter interface {
// Count counts characters in the given input
Count(input any) error
// CountBytes counts characters from byte slice
CountBytes(data []byte) error
// GetStats returns the counting statistics
GetStats() *Stats
}
CharacterCounter defines the interface for character counting
type Countable ΒΆ added in v0.2.0
type Countable interface {
// Count performs the counting operation
Count() error
// GetHeader returns the header row for export
GetHeader() Row
// GetRows returns the data rows for export
GetRows() []Row
}
Countable defines the interface for counting operations
type Counter ΒΆ added in v0.2.0
type Counter struct {
S *Stats // Statistics collected during counting
}
Counter provides character counting functionality for text content. It implements the CharacterCounter interface and tracks statistics including lines, Chinese characters, non-Chinese characters, and total characters.
func NewCounter ΒΆ added in v0.2.0
func NewCounter() *Counter
NewCounter creates a new Counter instance with initialized statistics. The returned counter is ready to use for counting operations.
func (*Counter) Count ΒΆ added in v0.2.0
Count analyzes the provided input and updates the character statistics. It accepts either string or []byte input and delegates to CountBytes for processing.
Supported input types:
- string: converted to []byte for processing
- []byte: processed directly
Returns an error if the input is empty or of an unsupported type.
func (*Counter) CountBytes ΒΆ added in v0.2.0
CountBytes efficiently counts characters from a byte slice with minimal memory allocation. This optimized version processes UTF-8 encoded text in a single pass and updates the following statistics:
- Lines: counted by scanning for newline characters (newlines + 1 for content)
- Chinese characters: identified using optimized Unicode range checks
- Non-Chinese characters: all other characters except newlines
- Total characters: sum of Chinese and non-Chinese characters (excluding newlines)
Performance optimizations:
- Single-pass processing (combines line counting and character analysis)
- Direct Unicode range checks instead of unicode.In() for better performance
- Minimal function call overhead
- Local variables to reduce struct field access overhead
Returns an error if the input data is empty.
type CounterExporter ΒΆ added in v0.2.0
type CounterExporter struct {
// contains filtered or unexported fields
}
CounterExporter provides common export functionality for counters
func NewCounterExporter ΒΆ added in v0.2.0
func NewCounterExporter(counter interface {
ExportCSV(filename ...string) (string, error)
ExportExcel(filename ...string) error
ExportTable() string
}, config ExportConfig) *CounterExporter
NewCounterExporter creates a new CounterExporter
func (*CounterExporter) Export ΒΆ added in v0.2.0
func (ce *CounterExporter) Export() error
Export performs the export operation based on configuration
type DirCounter ΒΆ
type DirCounter struct {
// contains filtered or unexported fields
}
func NewDirCounter ΒΆ
func NewDirCounter(dirname string, ignores ...string) *DirCounter
func (*DirCounter) AddIgnorePattern ΒΆ added in v0.2.0
func (dc *DirCounter) AddIgnorePattern(pattern string)
AddIgnorePattern adds a new ignore pattern (implements IgnoreChecker interface)
func (*DirCounter) Count ΒΆ
func (dc *DirCounter) Count() error
func (*DirCounter) EnableTotal ΒΆ
func (dc *DirCounter) EnableTotal()
func (*DirCounter) ExportExcel ΒΆ
func (dc *DirCounter) ExportExcel(filename ...string) error
func (*DirCounter) ExportTable ΒΆ
func (dc *DirCounter) ExportTable() string
func (*DirCounter) GetFileCounters ΒΆ added in v0.2.0
func (dc *DirCounter) GetFileCounters() []*FileCounter
GetFileCounters returns the slice of FileCounter instances. This provides access to individual file counting results.
func (*DirCounter) GetHeader ΒΆ added in v0.2.0
func (dc *DirCounter) GetHeader() Row
GetHeader returns the header row (implements Counter interface)
func (*DirCounter) GetHeaderAndRows ΒΆ
func (dc *DirCounter) GetHeaderAndRows() []Row
func (*DirCounter) GetIgnoreList ΒΆ added in v0.2.0
func (dc *DirCounter) GetIgnoreList() []string
GetIgnoreList returns the current ignore patterns. This allows inspection of the configured ignore patterns.
func (*DirCounter) GetRows ΒΆ
func (dc *DirCounter) GetRows() []Row
func (*DirCounter) Ignore ΒΆ
func (dc *DirCounter) Ignore(pattern string)
Ignore is deprecated, use AddIgnorePattern instead
func (*DirCounter) IsIgnored ΒΆ
func (dc *DirCounter) IsIgnored(filename string) bool
func (*DirCounter) IsIgnoredWithError ΒΆ added in v0.2.0
func (dc *DirCounter) IsIgnoredWithError(filename string) (bool, error)
IsIgnoredWithError checks if a file should be ignored and returns any pattern matching errors
type ErrorType ΒΆ added in v0.2.0
type ErrorType int
ErrorType represents the category of error
const ( // ErrorTypeFileNotFound indicates a file or directory was not found ErrorTypeFileNotFound ErrorType = iota // ErrorTypeFileRead indicates an error reading a file ErrorTypeFileRead // ErrorTypeFileWrite indicates an error writing a file ErrorTypeFileWrite // ErrorTypeInvalidInput indicates invalid input was provided ErrorTypeInvalidInput // ErrorTypeInvalidPath indicates an invalid file path ErrorTypeInvalidPath // ErrorTypePatternMatch indicates an error in pattern matching ErrorTypePatternMatch // ErrorTypeExport indicates an error during export operations ErrorTypeExport // ErrorTypeServer indicates a server-related error ErrorTypeServer )
type ExportConfig ΒΆ added in v0.2.0
ExportConfig holds configuration for export operations
type FileCounter ΒΆ
type FileCounter struct {
FileName string // Absolute path to the file being analyzed
// contains filtered or unexported fields
}
FileCounter provides character counting functionality for individual files. It implements the Counter interface and combines file I/O operations with text analysis capabilities.
func NewFileCounter ΒΆ
func NewFileCounter(filename string) *FileCounter
NewFileCounter creates a new FileCounter instance for the specified file. The file path is automatically converted to an absolute path for consistency. The file is not read until Count() is called, allowing for lazy evaluation and better error handling.
Parameters:
- filename: path to the file to be analyzed (relative or absolute)
Returns a configured FileCounter ready for counting operations.
func (*FileCounter) Count ΒΆ
func (fc *FileCounter) Count() error
Count reads the file and performs character analysis. This method opens the file, reads its entire content into memory, and delegates the character counting to the internal Counter.
The method uses io.ReadAll for optimal performance, reading the entire file at once to avoid issues with UTF-8 character boundaries that could occur with buffered reading.
Returns structured errors for different failure scenarios:
- FileNotFoundError: if the file doesn't exist
- FileReadError: if there are I/O errors during reading or counting
func (*FileCounter) ExportCSV ΒΆ
func (fc *FileCounter) ExportCSV(filename ...string) (string, error)
func (*FileCounter) ExportExcel ΒΆ
func (fc *FileCounter) ExportExcel(filename ...string) error
func (*FileCounter) ExportTable ΒΆ
func (fc *FileCounter) ExportTable() string
func (*FileCounter) GetHeader ΒΆ
func (fc *FileCounter) GetHeader() Row
func (*FileCounter) GetRow ΒΆ
func (fc *FileCounter) GetRow() Row
func (*FileCounter) GetRows ΒΆ added in v0.2.0
func (fc *FileCounter) GetRows() []Row
GetRows returns the data rows (implements Counter interface)
func (*FileCounter) GetStats ΒΆ added in v0.2.0
func (fc *FileCounter) GetStats() *Stats
GetStats returns the counting statistics from the internal Counter. This method provides access to the detailed character counting results after Count() has been called.
type IgnoreChecker ΒΆ added in v0.2.0
type IgnoreChecker interface {
// IsIgnored checks if a file should be ignored
IsIgnored(filename string) bool
// IsIgnoredWithError checks if a file should be ignored and returns any errors
IsIgnoredWithError(filename string) (bool, error)
// AddIgnorePattern adds a new ignore pattern
AddIgnorePattern(pattern string)
}
IgnoreChecker defines the interface for checking if files should be ignored
type Server ΒΆ added in v0.2.0
type Server interface {
// Run starts the server on the specified port
Run(port int) error
// Count handles the count request
Count(ctx any) error
}
Server defines the interface for server operations
type Stats ΒΆ
type Stats struct {
Lines int `json:"lines,omitempty"`
ChineseChars int `json:"chinese_chars,omitempty"`
NonChineseChars int `json:"non_chinese_chars,omitempty"`
TotalChars int `json:"total_chars,omitempty"`
}
func (*Stats) HeaderAndRows ΒΆ
type WordCounterError ΒΆ added in v0.2.0
WordCounterError represents different types of errors that can occur in wordcounter
func NewError ΒΆ added in v0.2.0
func NewError(errorType ErrorType, message string, cause error) *WordCounterError
NewError creates a new WordCounterError
func NewExportError ΒΆ added in v0.2.0
func NewExportError(operation string, cause error) *WordCounterError
NewExportError creates an export error
func NewFileNotFoundError ΒΆ added in v0.2.0
func NewFileNotFoundError(path string, cause error) *WordCounterError
NewFileNotFoundError creates a file not found error
func NewFileReadError ΒΆ added in v0.2.0
func NewFileReadError(path string, cause error) *WordCounterError
NewFileReadError creates a file read error
func NewFileWriteError ΒΆ added in v0.2.0
func NewFileWriteError(path string, cause error) *WordCounterError
NewFileWriteError creates a file write error
func NewInvalidInputError ΒΆ added in v0.2.0
func NewInvalidInputError(message string) *WordCounterError
NewInvalidInputError creates an invalid input error
func NewInvalidPathError ΒΆ added in v0.2.0
func NewInvalidPathError(path string, cause error) *WordCounterError
NewInvalidPathError creates an invalid path error
func NewPatternMatchError ΒΆ added in v0.2.0
func NewPatternMatchError(pattern string, cause error) *WordCounterError
NewPatternMatchError creates a pattern matching error
func NewServerError ΒΆ added in v0.2.0
func NewServerError(message string, cause error) *WordCounterError
NewServerError creates a server error
func (*WordCounterError) Error ΒΆ added in v0.2.0
func (e *WordCounterError) Error() string
Error implements the error interface
func (*WordCounterError) Unwrap ΒΆ added in v0.2.0
func (e *WordCounterError) Unwrap() error
Unwrap returns the underlying error
func (*WordCounterError) WithContext ΒΆ added in v0.2.0
func (e *WordCounterError) WithContext(key string, value any) *WordCounterError
WithContext adds context information to the error
type WordCounterServer ΒΆ added in v0.1.8
func NewWordCounterServer ΒΆ added in v0.1.8
func NewWordCounterServer() *WordCounterServer
func (*WordCounterServer) Count ΒΆ added in v0.1.8
func (s *WordCounterServer) Count(c echo.Context) error
func (*WordCounterServer) Run ΒΆ added in v0.1.8
func (s *WordCounterServer) Run(port int) error