Versions in this module Expand all Collapse all v1 v1.0.0 Sep 20, 2024 Changes in this version + const ContiguousBehavior + const IsolatedBehavior + const MergedWithNextBehavior + const MergedWithPreviousBehavior + const NormalizedTarget + const OriginalTarget + const RemovedBehavior + func BytesToChar(s string, byteRange []int) (retVal []int) + func CharToBytes(s string, charRange []int) (retVal []int) + func FindAllStringIndex(re *re2.Regexp, s string, n int) [][]int + func IsBertPunctuation(c rune) bool + func IsBertWhitespace(c rune) bool + func IsChinese(c rune) bool + func IsPunctuation(c rune) bool + func IsWhitespace(c rune) bool + func RangeOf(s string, r []int) (retVal string) + type BertNormalizer struct + CleanText bool + HandleChineseChars bool + Lowercase bool + StripAccents bool + func NewBertNormalizer(cleanText, lowercase, handleChineseChars, stripAccents bool) *BertNormalizer + func (bn *BertNormalizer) Normalize(n *NormalizedString) (*NormalizedString, error) + type ChangeMap struct + Changes int + RuneVal string + type DefaultNormalizer struct + func NewDefaultNormalizer(opts ...DefaultOption) *DefaultNormalizer + func (dn *DefaultNormalizer) Normalize(n *NormalizedString) (*NormalizedString, error) + type DefaultOption func(*DefaultNormalizer) + func WithLowercase(lowercase bool) DefaultOption + func WithStrip(strip bool) DefaultOption + type FnPattern struct + func NewFnPattern(fn PatternFn) *FnPattern + func (fp *FnPattern) FindMatches(inside string) []OffsetsMatch + type IndexOn int + type Invert struct + Pattern Pattern + func NewInvertPattern(p Pattern) *Invert + func (i *Invert) FindMatches(inside string) []OffsetsMatch + type NFC struct + func NewNFC() *NFC + func (n *NFC) Normalize(norm *NormalizedString) (*NormalizedString, error) + type NFD struct + func NewNFD() *NFD + func (n *NFD) Normalize(norm *NormalizedString) (*NormalizedString, error) + type NFKC struct + func NewNFKC() *NFKC + func (n *NFKC) Normalize(norm *NormalizedString) (*NormalizedString, error) + type NFKD struct + func NewNFKD() *NFKD + func (n *NFKD) Normalize(norm *NormalizedString) (*NormalizedString, error) + type NormFn func(rune) rune + type NormalizedString struct + func NewNormalizedFrom(s string) (retVal *NormalizedString) + func NewNormalizedString(original, normalized string, alignments, alignmentsOriginal [][]int, ...) *NormalizedString + func (n *NormalizedString) Alignments() (retVal [][]int) + func (n *NormalizedString) AlignmentsOriginal() (retVal [][]int) + func (n *NormalizedString) Append(s string) (retVal *NormalizedString) + func (n *NormalizedString) Clear() + func (n *NormalizedString) ConvertOffset(inputRange *Range) (retVal *Range) + func (n *NormalizedString) Filter(fn func(rune) bool) (retVal *NormalizedString) + func (n *NormalizedString) ForEach(nfn NormFn) (retVal *NormalizedString) + func (n *NormalizedString) GetNormalized() string + func (n *NormalizedString) GetOriginal() string + func (n *NormalizedString) IsEmpty() bool + func (n *NormalizedString) LStrip() (retVal *NormalizedString) + func (n *NormalizedString) Len() int + func (n *NormalizedString) LenOriginal() int + func (n *NormalizedString) Lowercase() (retVal *NormalizedString) + func (n *NormalizedString) Map(nfn NormFn) (retVal *NormalizedString) + func (n *NormalizedString) NFC() (retVal *NormalizedString) + func (n *NormalizedString) NFD() (retVal *NormalizedString) + func (n *NormalizedString) NFKC() (retVal *NormalizedString) + func (n *NormalizedString) NFKD() (retVal *NormalizedString) + func (n *NormalizedString) OffsetsOriginal() []int + func (n *NormalizedString) Prepend(s string) (retVal *NormalizedString) + func (n *NormalizedString) RStrip() (retVal *NormalizedString) + func (n *NormalizedString) Range(r *Range) (retVal string) + func (n *NormalizedString) RangeOriginal(r *Range) (retVal string) + func (n *NormalizedString) RemoveAccents() (retVal *NormalizedString) + func (n *NormalizedString) Replace(pattern Pattern, content string) (retVal *NormalizedString) + func (n *NormalizedString) Shift() int + func (n *NormalizedString) Slice(inputRange *Range) (retVal *NormalizedString) + func (n *NormalizedString) Split(pattern Pattern, behavior SplitDelimiterBehavior) (retVal []NormalizedString) + func (n *NormalizedString) Strip() (retVal *NormalizedString) + func (n *NormalizedString) Transform(m []ChangeMap, initialOffset int) (retVal *NormalizedString) + func (n *NormalizedString) TransformRange(inputRange *Range, changeMap []ChangeMap, initialOffset int) (retVal *NormalizedString) + func (n *NormalizedString) Uppercase() (retVal *NormalizedString) + type Normalizer interface + Normalize func(normalized *NormalizedString) (*NormalizedString, error) + func Lowercase() Normalizer + func NewNormalizer(opts ...Option) Normalizer + type OffsetsMatch struct + Match bool + Offsets []int + type OffsetsRemove struct + Offsets []int + ShouldRemove bool + type Option func(*normalizer) + func WithBertNormalizer(cleanText, lowercase, handleChineseChars, stripAccents bool) Option + func WithUnicodeNormalizer(form norm.Form) Option + type Pattern interface + FindMatches func(inside string) []OffsetsMatch + type PatternFn func(rune) bool + type Precompiled struct + func (m *Precompiled) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type Prepend struct + Prepend string + func NewPrepend(prepend string) *Prepend + func (p *Prepend) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type Range struct + func NewRange(start int, end int, indexOn IndexOn) (retVal *Range) + func (r *Range) End() (retVal int) + func (r *Range) IntoFullRange(maxLen int) (retVal *Range) + func (r *Range) Len() int + func (r *Range) On() IndexOn + func (r *Range) Start() (retVal int) + func (r *Range) Values() []int + type RegexpPattern struct + func NewRegexpPattern(s string) *RegexpPattern + func (rp *RegexpPattern) FindMatches(inside string) []OffsetsMatch + type Replace struct + Content string + Pattern Pattern + PatternType ReplacePattern + func NewReplace(patternType ReplacePattern, pattern string, content string) *Replace + func (r *Replace) Decode(tokens []string) string + func (r *Replace) DecodeChain(tokens []string) []string + func (r *Replace) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type ReplacePattern int + const Regex + const String + type RunePattern struct + func NewRunePattern(r rune) *RunePattern + func (r *RunePattern) FindMatches(inside string) []OffsetsMatch + type Sequence struct + Normalizers []Normalizer + func NewSequence(norms []Normalizer) *Sequence + func (s *Sequence) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type SplitDelimiterBehavior int + type StringPattern struct + func NewStringPattern(s string) *StringPattern + func (s *StringPattern) FindMatches(inside string) []OffsetsMatch + type Strip struct + func NewStrip(stripLeft, stripRight bool) *Strip + func (s *Strip) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type StripAccents struct + func NewStripAccents() *StripAccents + func (sa *StripAccents) Normalize(normalized *NormalizedString) (*NormalizedString, error) + type UnicodeNormalizer struct + Form norm.Form + func NewUnicodeNormalizer(form norm.Form) *UnicodeNormalizer + func (un *UnicodeNormalizer) Normalize(n *NormalizedString) (*NormalizedString, error)