Documentation
¶
Index ¶
- type BertProcessing
- type ByteLevelProcessing
- type Piece
- type PostToken
- type RobertaProcessing
- func (rp *RobertaProcessing) AddPrefixSpace(addPrefixSpace bool)
- func (rp *RobertaProcessing) AddedTokens(isPair bool) int
- func (rp *RobertaProcessing) Process(encoding, pairEncoding *tokenizer.Encoding, addSpecialTokens bool) *tokenizer.Encoding
- func (rp *RobertaProcessing) TrimOffsets(trimOffsets bool)
- type Sequence
- type SequenceEnum
- type SequencePiece
- type SpecialToken
- type SpecialTokenPiece
- type Template
- type TemplateProcessing
- func (tp *TemplateProcessing) AddedTokens(isPair bool) int
- func (tp *TemplateProcessing) ApplyTemplate(template []Piece, encodings []tokenizer.Encoding, addSpecialTokens bool) []tokenizer.Encoding
- func (tp *TemplateProcessing) Builder() *TemplateProcessingBuilder
- func (tp *TemplateProcessing) Process(encoding, pairEncoding *tokenizer.Encoding, addSpecialTokens bool) *tokenizer.Encoding
- type TemplateProcessingBuilder
- func (tp *TemplateProcessingBuilder) Build() *TemplateProcessing
- func (b *TemplateProcessingBuilder) DefaultAdded(isSingle bool) int
- func (b *TemplateProcessingBuilder) NewPair(v interface{})
- func (b *TemplateProcessingBuilder) NewSingle(v interface{})
- func (b *TemplateProcessingBuilder) NewSpecialTokens(tokens []tokenizer.Token)
- func (b *TemplateProcessingBuilder) Validate() error
- type TemplateProcessingDeserializer
- type Tokens
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BertProcessing ¶
type BertProcessing struct {
// contains filtered or unexported fields
}
func NewBertProcessing ¶
func NewBertProcessing(sep, cls PostToken) (retVal *BertProcessing)
func (*BertProcessing) AddedTokens ¶
func (bp *BertProcessing) AddedTokens(isPair bool) (retVal int)
type ByteLevelProcessing ¶
type ByteLevelProcessing struct {
// contains filtered or unexported fields
}
func NewByteLevelProcessing ¶
func NewByteLevelProcessing(pretok *pretokenizer.ByteLevel) (retVal *ByteLevelProcessing)
func (*ByteLevelProcessing) AddedTokens ¶
func (blp *ByteLevelProcessing) AddedTokens(isPair bool) (retVal int)
type RobertaProcessing ¶
type RobertaProcessing struct {
// contains filtered or unexported fields
}
RobertaProcessing is a post post processor for Roberta model
func DefaultRobertaProcessing ¶
func DefaultRobertaProcessing() *RobertaProcessing
DefaultRobertaProcessing creates a RobertaProcessing with default values
func NewRobertaProcessing ¶
func NewRobertaProcessing(sep, cls PostToken, trimOffsets bool, addPrefixSpace bool) *RobertaProcessing
func (*RobertaProcessing) AddPrefixSpace ¶
func (rp *RobertaProcessing) AddPrefixSpace(addPrefixSpace bool)
AddPrefixSpace set whether the processor will add a prefix space
func (*RobertaProcessing) AddedTokens ¶
func (rp *RobertaProcessing) AddedTokens(isPair bool) int
func (*RobertaProcessing) Process ¶
func (rp *RobertaProcessing) Process(encoding, pairEncoding *tokenizer.Encoding, addSpecialTokens bool) *tokenizer.Encoding
Process post-processes input encoding(s) by adding special tokens if instructed to do so.
Specifically, if addSpecialToken=true, it will add special tokens patterns - Single encoding: <s> Sequence </s> - Pair encoding: <s> SequenceA </s> </s> SequenceB </s>
func (*RobertaProcessing) TrimOffsets ¶
func (rp *RobertaProcessing) TrimOffsets(trimOffsets bool)
TrimOffsets set whether the processor will trim offsets
type Sequence ¶
type Sequence struct {
// contains filtered or unexported fields
}
func NewSequence ¶
func NewSequence(processors []tokenizer.PostProcessor) *Sequence
func (*Sequence) AddedTokens ¶
type SequencePiece ¶
type SequencePiece struct {
Id SequenceEnum `json:"id"`
TypeId int `json:"type_id"`
}
func NewSequencePiece ¶
func NewSequencePiece(id string, typeId int) *SequencePiece
func (*SequencePiece) WithTypeId ¶
func (p *SequencePiece) WithTypeId(v int)
Implement Piece for SequencePiece: ----------------------------------
type SpecialToken ¶
type SpecialToken struct {
// A unique id used to identify this SpecialToken in the template
Id string
// The list of associated ids
Ids []int
// The list of associated tokens
Tokens []string
}
Represents a bunch of tokens to be used in a template. Usually, special tokens have only one associated id/token but in some cases, it might be interesting to have multiple ids/tokens.
func NewSpecialToken ¶
func NewSpecialToken(id string, ids []int, tokens []string) *SpecialToken
func NewSpecialTokenFrom ¶
func NewSpecialTokenFrom(s string, id int) *SpecialToken
type SpecialTokenPiece ¶
func NewSpecialTokenPiece ¶
func NewSpecialTokenPiece(id string, typeId int) *SpecialTokenPiece
func (*SpecialTokenPiece) WithTypeId ¶
func (p *SpecialTokenPiece) WithTypeId(v int)
type Template ¶
type Template []Piece
func NewTemplate ¶
func NewTemplateFromMulti ¶
func NewTemplateFromOne ¶
type TemplateProcessing ¶
type TemplateProcessing struct {
Single Template
Pair Template
AddedSingle int
AddedPair int
SpecialTokens *Tokens
}
/ This PostProcessor takes care of processing each input `Encoding` by applying / the corresponding template, before merging them in the final Encoding. / / A `Template` is actually a sequence of `Piece` that will be / concatenated together in the given order. Each `Piece` represents either / one of the input `Encoding` or a `SpecialToken`. / / ## Example / ``` / # use tokenizers::processors::template::TemplateProcessing; / let template = TemplateProcessing::builder() / .try_single("[CLS] $A [SEP]").unwrap() / .try_pair("[CLS] $A [SEP] $B:1 [SEP]:1").unwrap() / .special_tokens(vec![("[CLS]", 1), ("[SEP]", 0)]) / .build() / .unwrap(); / ``` /
func DefaultTemplateProcessing ¶
func DefaultTemplateProcessing() *TemplateProcessing
func NewTemplateProcessing ¶
func NewTemplateProcessing(single, pair Template, specialTokens *Tokens) *TemplateProcessing
func NewTemplateProcessingFrom ¶
func NewTemplateProcessingFrom(t *TemplateProcessingDeserializer) *TemplateProcessing
func (*TemplateProcessing) AddedTokens ¶
func (tp *TemplateProcessing) AddedTokens(isPair bool) int
func (*TemplateProcessing) ApplyTemplate ¶
func (*TemplateProcessing) Builder ¶
func (tp *TemplateProcessing) Builder() *TemplateProcessingBuilder
type TemplateProcessingBuilder ¶
type TemplateProcessingBuilder struct {
*TemplateProcessing
}
func (*TemplateProcessingBuilder) Build ¶
func (tp *TemplateProcessingBuilder) Build() *TemplateProcessing
func (*TemplateProcessingBuilder) DefaultAdded ¶
func (b *TemplateProcessingBuilder) DefaultAdded(isSingle bool) int
func (*TemplateProcessingBuilder) NewPair ¶
func (b *TemplateProcessingBuilder) NewPair(v interface{})
func (*TemplateProcessingBuilder) NewSingle ¶
func (b *TemplateProcessingBuilder) NewSingle(v interface{})
func (*TemplateProcessingBuilder) NewSpecialTokens ¶
func (b *TemplateProcessingBuilder) NewSpecialTokens(tokens []tokenizer.Token)
func (*TemplateProcessingBuilder) Validate ¶
func (b *TemplateProcessingBuilder) Validate() error
type Tokens ¶
type Tokens struct {
TokenMap map[string]SpecialToken // NOTE. HF is an ordered map
// contains filtered or unexported fields
}
A bunch of [`SpecialToken`] represented by their ID.
func DefaultTokens ¶
func DefaultTokens() *Tokens
func NewTokensFrom ¶
func NewTokensFrom(toks []SpecialToken) *Tokens
func NewTokensFromMap ¶
func NewTokensFromMap(m map[string]SpecialToken) *Tokens
func (*Tokens) GetItemByKey ¶
func (t *Tokens) GetItemByKey(id string) (SpecialToken, bool)
func (*Tokens) GetItemByOrder ¶
func (t *Tokens) GetItemByOrder(index int) (SpecialToken, bool)