Documentation
¶
Index ¶
Constants ¶
View Source
const ( TOKEN_TYPE_NORMAL = iota + 1 TOKEN_TYPE_UNKNOWN TOKEN_TYPE_CONTROL TOKEN_TYPE_USER_DEFINED TOKEN_TYPE_UNUSED TOKEN_TYPE_BYTE )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BytePairEncoding ¶
type BytePairEncoding struct {
// contains filtered or unexported fields
}
func NewBytePairEncoding ¶
func NewBytePairEncoding(vocab *Vocabulary, pretokenizer ...string) BytePairEncoding
func (BytePairEncoding) Encode ¶
func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error)
func (BytePairEncoding) Vocabulary ¶
func (bpe BytePairEncoding) Vocabulary() *Vocabulary
type SentencePiece ¶
type SentencePiece struct {
// contains filtered or unexported fields
}
func NewSentencePiece ¶
func NewSentencePiece(vocab *Vocabulary) SentencePiece
func (SentencePiece) Encode ¶
func (spm SentencePiece) Encode(s string, addSpecial bool) ([]int32, error)
func (SentencePiece) Vocabulary ¶
func (spm SentencePiece) Vocabulary() *Vocabulary
type Vocabulary ¶
type Vocabulary struct {
Values []string
Types []int32
Scores []float32
Merges []string
BOS, EOS []int32
AddBOS, AddEOS bool
// contains filtered or unexported fields
}
func (*Vocabulary) Decode ¶
func (v *Vocabulary) Decode(id int32) string
func (*Vocabulary) Encode ¶
func (v *Vocabulary) Encode(s string) int32
func (*Vocabulary) Merge ¶
func (v *Vocabulary) Merge(left, right string) int
func (*Vocabulary) SpecialVocabulary ¶
func (v *Vocabulary) SpecialVocabulary() []string
type WordPiece ¶
type WordPiece struct {
// contains filtered or unexported fields
}
func NewWordPiece ¶
func NewWordPiece(vocab *Vocabulary, lowercase bool) WordPiece
func (WordPiece) Vocabulary ¶
func (wpm WordPiece) Vocabulary() *Vocabulary
Vocabulary implements Tokenizer.
Click to show internal directories.
Click to hide internal directories.