Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AdvancedSentenceTokenizer ¶
type AdvancedSentenceTokenizer struct {
// contains filtered or unexported fields
}
AdvancedSentenceTokenizer provides robust, multilingual sentence boundary detection. It replaces the need for the CGO-bound BlingFire C++ library in the Go parity.
func NewAdvancedSentenceTokenizer ¶
func NewAdvancedSentenceTokenizer() *AdvancedSentenceTokenizer
func (*AdvancedSentenceTokenizer) Stream ¶
func (t *AdvancedSentenceTokenizer) Stream(language string) SentenceStream
type BasicSentenceTokenizer ¶
type BasicSentenceTokenizer struct{}
func NewBasicSentenceTokenizer ¶
func NewBasicSentenceTokenizer() *BasicSentenceTokenizer
func (*BasicSentenceTokenizer) Stream ¶
func (t *BasicSentenceTokenizer) Stream(language string) SentenceStream
type BasicWordTokenizer ¶
type BasicWordTokenizer struct{}
func NewBasicWordTokenizer ¶
func NewBasicWordTokenizer() *BasicWordTokenizer
func (*BasicWordTokenizer) Stream ¶
func (t *BasicWordTokenizer) Stream(language string) WordStream
type BufferedTokenStream ¶
type BufferedTokenStream struct {
// contains filtered or unexported fields
}
func NewBufferedTokenStream ¶
func NewBufferedTokenStream(fnc func(string) []string, minTokenLen, minCtxLen int) *BufferedTokenStream
func (*BufferedTokenStream) Close ¶
func (s *BufferedTokenStream) Close() error
func (*BufferedTokenStream) Flush ¶
func (s *BufferedTokenStream) Flush() error
func (*BufferedTokenStream) Next ¶
func (s *BufferedTokenStream) Next() (*TokenData, error)
func (*BufferedTokenStream) PushText ¶
func (s *BufferedTokenStream) PushText(text string) error
type SentenceStream ¶
type SentenceTokenizer ¶
type SentenceTokenizer interface {
Tokenize(text string, language string) []string
Stream(language string) SentenceStream
}
type WordStream ¶
type WordTokenizer ¶
type WordTokenizer interface {
Tokenize(text string, language string) []string
Stream(language string) WordStream
}
Click to show internal directories.
Click to hide internal directories.