Documentation
¶
Index ¶
- type WordPiece
- func (wp WordPiece) Builder() (retVal WordPieceBuilder)
- func (wp WordPiece) GetVocab() (retVal map[string]int)
- func (wp WordPiece) GetVocabSize() (retVal int)
- func (wp WordPiece) IdToToken(id int) (retVal string, ok bool)
- func (wp WordPiece) ReadFiles(filename string) (retVal model.Vocab)
- func (wp WordPiece) Save(dir string, nameOpt ...string) (err error)
- func (wp WordPiece) TokenToId(token string) (retVal int, ok bool)
- func (wp WordPiece) Tokenize(sequence string) (retVal []tokenizer.Token, err error)
- type WordPieceBuilder
- func (wpb WordPieceBuilder) Build() (retVal WordPiece)
- func (wpb WordPieceBuilder) ContinuingSubwordPrefix(continueSubwordPrefix string) (retVal WordPieceBuilder)
- func (wpb WordPieceBuilder) Files(vocab string) (retVal WordPieceBuilder)
- func (wpb WordPieceBuilder) MaxInputCharsPerWord(maxInputCharsPerWord int) (retVal WordPieceBuilder)
- func (wpb WordPieceBuilder) UnkToken(unkToken string) (retVal WordPieceBuilder)
- func (wpb WordPieceBuilder) Vocab(vocab *model.Vocab) (retVal WordPieceBuilder)
- type WordPieceTrainer
- type WordPieceTrainerBuilder
- func (wptb WordPieceTrainerBuilder) Build() (retVal WordPieceTrainer)
- func (wptb WordPieceTrainerBuilder) ContinuingSubwordPrefix(prefix string) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) EndOfWordSuffix(suffix string) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) InitialAlphabet(alphabet bpe.CharSet) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) LimitAlphabet(limit int) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) MinFrequency(frequency int) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) ShowProgress(show bool) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) SpecialTokens(tokens []tokenizer.AddedToken) (retVal WordPieceTrainerBuilder)
- func (wptb WordPieceTrainerBuilder) VocabSize(size int) (retVal WordPieceTrainerBuilder)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type WordPiece ¶
type WordPiece struct {
// contains filtered or unexported fields
}
WordPiece is a WordPiece model Ref.https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/37842.pdf
func NewWordPiece ¶
func NewWordPiece() (retVal WordPiece)
NewWordPiece initiates a new WordPiece with default values.
func NewWordPieceFromBPE ¶
WordPieceBuilderFromBPE create a WordPieceBuilder from BPE model
func NewWordPieceFromFile ¶
func NewWordPieceFromFile(vocabFile string, unkToken string, maxInputCharsPerWordOpt ...int) (retVal WordPiece, err error)
NewWordPieceFromFile initializes a WordPiece model from a mapping file
func (WordPiece) Builder ¶
func (wp WordPiece) Builder() (retVal WordPieceBuilder)
Builder gets a WordPieceBuilder
func (WordPiece) GetVocabSize ¶
type WordPieceBuilder ¶
type WordPieceBuilder struct {
// contains filtered or unexported fields
}
WordPieceBuilder can be used to create a WordPiece model with a custom configuration
func NewWordPieceBuilder ¶
func NewWordPieceBuilder() (retVal WordPieceBuilder)
func NewWordPieceBuilderFromFile ¶
func NewWordPieceBuilderFromFile(filename string) (retVal WordPieceBuilder)
NewWordPieceBuilderFromFile initializes a WordPieceBuilder from a vocab mapping file
func (WordPieceBuilder) Build ¶
func (wpb WordPieceBuilder) Build() (retVal WordPiece)
Build contructs a `WordPiece` model that uses the `WordPieceBuilder`'s configuration.
func (WordPieceBuilder) ContinuingSubwordPrefix ¶
func (wpb WordPieceBuilder) ContinuingSubwordPrefix(continueSubwordPrefix string) (retVal WordPieceBuilder)
ContinueSubwordPrefix set the prefix for continuing subwords.
func (WordPieceBuilder) Files ¶
func (wpb WordPieceBuilder) Files(vocab string) (retVal WordPieceBuilder)
Files sets the input files
func (WordPieceBuilder) MaxInputCharsPerWord ¶
func (wpb WordPieceBuilder) MaxInputCharsPerWord(maxInputCharsPerWord int) (retVal WordPieceBuilder)
Set the maximum number of input characters per word.
func (WordPieceBuilder) UnkToken ¶
func (wpb WordPieceBuilder) UnkToken(unkToken string) (retVal WordPieceBuilder)
UnkToken set the `UNK` token for the vocab.
func (WordPieceBuilder) Vocab ¶
func (wpb WordPieceBuilder) Vocab(vocab *model.Vocab) (retVal WordPieceBuilder)
Vocab set the vocab (token -> ID) mapping.
type WordPieceTrainer ¶
type WordPieceTrainer struct {
// contains filtered or unexported fields
}
WordPieceTrainer is a trainer for WordPiece model
func (WordPieceTrainer) Builder ¶
func (wpt WordPieceTrainer) Builder() (retVal WordPieceTrainerBuilder)
Builder creates WordPieceTrainerBuilder
func (WordPieceTrainer) ProcessTokens ¶
func (wpt WordPieceTrainer) ProcessTokens(words map[string]int, tokens []string)
func (WordPieceTrainer) Train ¶
func (wpt WordPieceTrainer) Train(wordCounts map[string]int) (retVal tokenizer.Model)
func (WordPieceTrainer) WithProgressBar ¶
func (wpt WordPieceTrainer) WithProgressBar() (retVal bool)
type WordPieceTrainerBuilder ¶
type WordPieceTrainerBuilder struct {
// contains filtered or unexported fields
}
WordPieceTrainerBuilder can be used to create a `WordPieceTrainer` with a custom configuration.
func NewWordPieceTrainerBuilder ¶
func NewWordPieceTrainerBuilder() (retVal WordPieceTrainerBuilder)
NewWordPieceTrainerBuilder create a new WordPieceTrainerBuilder
func (WordPieceTrainerBuilder) Build ¶
func (wptb WordPieceTrainerBuilder) Build() (retVal WordPieceTrainer)
Build constructs the final BpeTrainer
func (WordPieceTrainerBuilder) ContinuingSubwordPrefix ¶
func (wptb WordPieceTrainerBuilder) ContinuingSubwordPrefix(prefix string) (retVal WordPieceTrainerBuilder)
ContinuingSubwordPrefix set the continuing_subword_prefix
func (WordPieceTrainerBuilder) EndOfWordSuffix ¶
func (wptb WordPieceTrainerBuilder) EndOfWordSuffix(suffix string) (retVal WordPieceTrainerBuilder)
EndOfWordSuffix set the end_of_word_suffix
func (WordPieceTrainerBuilder) InitialAlphabet ¶
func (wptb WordPieceTrainerBuilder) InitialAlphabet(alphabet bpe.CharSet) (retVal WordPieceTrainerBuilder)
InitialAlphabet set the initial alphabet
func (WordPieceTrainerBuilder) LimitAlphabet ¶
func (wptb WordPieceTrainerBuilder) LimitAlphabet(limit int) (retVal WordPieceTrainerBuilder)
LimitAlphabet set whether to limit the alphabet
func (WordPieceTrainerBuilder) MinFrequency ¶
func (wptb WordPieceTrainerBuilder) MinFrequency(frequency int) (retVal WordPieceTrainerBuilder)
MinFrequency set the frequency threshold for the trainer
func (WordPieceTrainerBuilder) ShowProgress ¶
func (wptb WordPieceTrainerBuilder) ShowProgress(show bool) (retVal WordPieceTrainerBuilder)
ShowProgress set whether to show progress
func (WordPieceTrainerBuilder) SpecialTokens ¶
func (wptb WordPieceTrainerBuilder) SpecialTokens(tokens []tokenizer.AddedToken) (retVal WordPieceTrainerBuilder)
SpecialTokens set the special tokens
func (WordPieceTrainerBuilder) VocabSize ¶
func (wptb WordPieceTrainerBuilder) VocabSize(size int) (retVal WordPieceTrainerBuilder)
VocabSize set the vocabulary size