wordpiece

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 20, 2024 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type WordPiece

type WordPiece struct {
	// contains filtered or unexported fields
}

WordPiece is a WordPiece model Ref.https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/37842.pdf

func New

func New(
	vocab model.Vocab,
	opts *util.Params,
) (*WordPiece, error)

New creates WordPiece model from input data.

func NewWordPiece

func NewWordPiece() (retVal WordPiece)

NewWordPiece initiates a new WordPiece with default values.

func NewWordPieceFromBPE

func NewWordPieceFromBPE(bpe bpe.BPE) (retVal WordPiece)

WordPieceBuilderFromBPE create a WordPieceBuilder from BPE model

func NewWordPieceFromFile

func NewWordPieceFromFile(vocabFile string, unkToken string, maxInputCharsPerWordOpt ...int) (retVal WordPiece, err error)

NewWordPieceFromFile initializes a WordPiece model from a mapping file

func (WordPiece) Builder

func (wp WordPiece) Builder() (retVal WordPieceBuilder)

Builder gets a WordPieceBuilder

func (WordPiece) GetVocab

func (wp WordPiece) GetVocab() (retVal map[string]int)

func (WordPiece) GetVocabSize

func (wp WordPiece) GetVocabSize() (retVal int)

func (WordPiece) IdToToken

func (wp WordPiece) IdToToken(id int) (retVal string, ok bool)

func (WordPiece) ReadFiles

func (wp WordPiece) ReadFiles(filename string) (retVal model.Vocab)

ReadFiles reads the given file to extract the vocab

func (WordPiece) Save

func (wp WordPiece) Save(dir string, nameOpt ...string) (err error)

func (WordPiece) TokenToId

func (wp WordPiece) TokenToId(token string) (retVal int, ok bool)

func (WordPiece) Tokenize

func (wp WordPiece) Tokenize(sequence string) (retVal []tokenizer.Token, err error)

type WordPieceBuilder

type WordPieceBuilder struct {
	// contains filtered or unexported fields
}

WordPieceBuilder can be used to create a WordPiece model with a custom configuration

func NewWordPieceBuilder

func NewWordPieceBuilder() (retVal WordPieceBuilder)

func NewWordPieceBuilderFromFile

func NewWordPieceBuilderFromFile(filename string) (retVal WordPieceBuilder)

NewWordPieceBuilderFromFile initializes a WordPieceBuilder from a vocab mapping file

func (WordPieceBuilder) Build

func (wpb WordPieceBuilder) Build() (retVal WordPiece)

Build contructs a `WordPiece` model that uses the `WordPieceBuilder`'s configuration.

func (WordPieceBuilder) ContinuingSubwordPrefix

func (wpb WordPieceBuilder) ContinuingSubwordPrefix(continueSubwordPrefix string) (retVal WordPieceBuilder)

ContinueSubwordPrefix set the prefix for continuing subwords.

func (WordPieceBuilder) Files

func (wpb WordPieceBuilder) Files(vocab string) (retVal WordPieceBuilder)

Files sets the input files

func (WordPieceBuilder) MaxInputCharsPerWord

func (wpb WordPieceBuilder) MaxInputCharsPerWord(maxInputCharsPerWord int) (retVal WordPieceBuilder)

Set the maximum number of input characters per word.

func (WordPieceBuilder) UnkToken

func (wpb WordPieceBuilder) UnkToken(unkToken string) (retVal WordPieceBuilder)

UnkToken set the `UNK` token for the vocab.

func (WordPieceBuilder) Vocab

func (wpb WordPieceBuilder) Vocab(vocab *model.Vocab) (retVal WordPieceBuilder)

Vocab set the vocab (token -> ID) mapping.

type WordPieceTrainer

type WordPieceTrainer struct {
	// contains filtered or unexported fields
}

WordPieceTrainer is a trainer for WordPiece model

func (WordPieceTrainer) Builder

func (wpt WordPieceTrainer) Builder() (retVal WordPieceTrainerBuilder)

Builder creates WordPieceTrainerBuilder

func (WordPieceTrainer) ProcessTokens

func (wpt WordPieceTrainer) ProcessTokens(words map[string]int, tokens []string)

func (WordPieceTrainer) Train

func (wpt WordPieceTrainer) Train(wordCounts map[string]int) (retVal tokenizer.Model)

func (WordPieceTrainer) WithProgressBar

func (wpt WordPieceTrainer) WithProgressBar() (retVal bool)

type WordPieceTrainerBuilder

type WordPieceTrainerBuilder struct {
	// contains filtered or unexported fields
}

WordPieceTrainerBuilder can be used to create a `WordPieceTrainer` with a custom configuration.

func NewWordPieceTrainerBuilder

func NewWordPieceTrainerBuilder() (retVal WordPieceTrainerBuilder)

NewWordPieceTrainerBuilder create a new WordPieceTrainerBuilder

func (WordPieceTrainerBuilder) Build

func (wptb WordPieceTrainerBuilder) Build() (retVal WordPieceTrainer)

Build constructs the final BpeTrainer

func (WordPieceTrainerBuilder) ContinuingSubwordPrefix

func (wptb WordPieceTrainerBuilder) ContinuingSubwordPrefix(prefix string) (retVal WordPieceTrainerBuilder)

ContinuingSubwordPrefix set the continuing_subword_prefix

func (WordPieceTrainerBuilder) EndOfWordSuffix

func (wptb WordPieceTrainerBuilder) EndOfWordSuffix(suffix string) (retVal WordPieceTrainerBuilder)

EndOfWordSuffix set the end_of_word_suffix

func (WordPieceTrainerBuilder) InitialAlphabet

func (wptb WordPieceTrainerBuilder) InitialAlphabet(alphabet bpe.CharSet) (retVal WordPieceTrainerBuilder)

InitialAlphabet set the initial alphabet

func (WordPieceTrainerBuilder) LimitAlphabet

func (wptb WordPieceTrainerBuilder) LimitAlphabet(limit int) (retVal WordPieceTrainerBuilder)

LimitAlphabet set whether to limit the alphabet

func (WordPieceTrainerBuilder) MinFrequency

func (wptb WordPieceTrainerBuilder) MinFrequency(frequency int) (retVal WordPieceTrainerBuilder)

MinFrequency set the frequency threshold for the trainer

func (WordPieceTrainerBuilder) ShowProgress

func (wptb WordPieceTrainerBuilder) ShowProgress(show bool) (retVal WordPieceTrainerBuilder)

ShowProgress set whether to show progress

func (WordPieceTrainerBuilder) SpecialTokens

func (wptb WordPieceTrainerBuilder) SpecialTokens(tokens []tokenizer.AddedToken) (retVal WordPieceTrainerBuilder)

SpecialTokens set the special tokens

func (WordPieceTrainerBuilder) VocabSize

func (wptb WordPieceTrainerBuilder) VocabSize(size int) (retVal WordPieceTrainerBuilder)

VocabSize set the vocabulary size

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL