Documentation
¶
Overview ¶
Package bart implements the transformer model introduced by Mike et al., 2019. "Bart: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension" https://arxiv.org/abs/1910.13461
Index ¶
- type Cache
- type Classifier
- type ClassifierConfig
- type Config
- type CrossAttentionBlock
- type CrossAttentionBlockConfig
- type Decoder
- type DecoderLayer
- type DecodingInput
- type DecodingOutput
- type Embeddings
- type Encoder
- type EncoderLayer
- type FeedForwardBlock
- type Model
- type ModelForConditionalGeneration
- type ModelForSequenceClassification
- type NewFeedForwardBlockConfig
- type PositionalEncoder
- type PositionalEncoderConfig
- type PostNormCrossAttentionBlock
- type PostNormFeedForwardBlock
- type PostNormSelfAttentionBlock
- type PreNormCrossAttentionBlock
- type PreNormFeedForwardBlock
- type PreNormSelfAttentionBlock
- type ResidualNormCrossAttention
- type ResidualNormFeedForward
- type ResidualNormSelfAttention
- type SelfAttentionBlock
- type SelfAttentionBlockConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache [][2]multiheadattention.Cache
Cache contains the cache of each DecoderLayer. For each layer, the cache contains the queries, keys and values used by the self-attention at index 0 and cross-attention at index 1.
type Classifier ¶
type Classifier struct {
nn.Module
// Config is the configuration of the classifier.
Config ClassifierConfig
// Layers is the list of layers of the MLP.
Layers nn.ModuleList[nn.StandardModel]
}
Classifier is a model for Bart head for sentence-level classification tasks.
func NewClassifier ¶
func NewClassifier[T float.DType](c ClassifierConfig) *Classifier
NewClassifier returns a new Classifier.
type ClassifierConfig ¶
type ClassifierConfig struct {
// InputSize is the input size of the classifier.
InputSize int
// HiddenSize is the hidden size of the classifier.
HiddenSize int
// OutputSize is the output size of the classifier.
OutputSize int
// PoolerDropout is the dropout rate for the classifier.
PoolerDropout float64
}
ClassifierConfig provides configuration settings for a Bart head for sentence-level Classifier model.
type Config ¶
type Config struct {
NumLabels int `json:"_num_labels,omitempty"`
ActivationDropout float64 `json:"activation_dropout,omitempty"`
ActivationFunction string `json:"activation_function,omitempty"`
BiasLogits bool `json:"add_bias_logits,omitempty"`
FinalLayerNorm bool `json:"add_final_layer_norm,omitempty"`
Architecture []string `json:"architectures,omitempty"`
AttentionDropout float64 `json:"attention_dropout,omitempty"`
BosTokenID int `json:"bos_token_id,omitempty"`
ClassifierDropout float64 `json:"classif_dropout,omitempty"`
DModel int `json:"d_model,omitempty"`
DecoderAttentionHeads int `json:"decoder_attention_heads,omitempty"`
DecoderFFNDim int `json:"decoder_ffn_dim,omitempty"`
DecoderLayerDrop float64 `json:"decoder_layerdrop,omitempty"`
DecoderLayers int `json:"decoder_layers,omitempty"`
DecoderStartTokenID int `json:"decoder_start_token_id,omitempty"`
Dropout float64 `json:"dropout,omitempty"`
EncoderAttentionHeads int `json:"encoder_attention_heads,omitempty"`
EncoderFFNDim int `json:"encoder_ffn_dim,omitempty"`
EncoderLayerDrop float64 `json:"encoder_layerdrop,omitempty"`
EncoderLayers int `json:"encoder_layers,omitempty"`
EosTokenID int `json:"eos_token_id,omitempty"`
FineTuningTask string `json:"finetuning_task,omitempty"`
ForceBosTokenToBeGenerated bool `json:"force_bos_token_to_be_generated,omitempty"`
ID2Label map[string]string `json:"id2label,omitempty"`
InitStd float64 `json:"init_std,omitempty"`
IsEncoderDecoder bool `json:"is_encoder_decoder,omitempty"`
Label2ID map[string]int `json:"label2id,omitempty"`
LengthPenalty float64 `json:"length_penalty,omitempty"`
MaxPositionEmbeddings int `json:"max_position_embeddings,omitempty"`
ModelType string `json:"model_type,omitempty"`
NormalizeBefore bool `json:"normalize_before,omitempty"`
NormalizeEmbedding bool `json:"normalize_embedding,omitempty"`
NumHiddenLayers int `json:"num_hidden_layers,omitempty"`
OutputPast bool `json:"output_past,omitempty"`
PadTokenID int `json:"pad_token_id,omitempty"`
ScaleEmbedding bool `json:"scale_embedding,omitempty"`
StaticPositionEmbeddings bool `json:"static_position_embeddings,omitempty"`
TotalFlos float64 `json:"total_flos,omitempty"`
VocabSize int `json:"vocab_size,omitempty"`
NumBeams int `json:"num_beams,omitempty"`
MaxLength int `json:"max_length,omitempty"`
MinLength int `json:"min_length,omitempty"`
BadWordsIDs [][]int `json:"bad_words_ids,omitempty"`
EarlyStopping bool `json:"early_stopping,omitempty"`
NoRepeatNGramSize int `json:"no_repeat_ngram_size,omitempty"`
ExtraSpecialTokens map[int]string `json:"extra_special_tokens,omitempty"`
Cybertron struct {
Training bool `json:"training,omitempty"`
PositionalEncoderOffset int `json:"positional_encoder_offset,omitempty"`
SharedEmbeddingsStoreName string `json:"shared_embeddings_store_name,omitempty"`
DecoderPositionalEncodingStoreName string `json:"decoder_positional_encoding_store_name,omitempty"`
EncoderPositionalEncodingStoreName string `json:"encoder_positional_encoding_store_name,omitempty"`
}
}
Config contains the global configuration of the Bart model and the heads of fine-tuning tasks. The configuration coincides with that of Hugging Face to facilitate compatibility between the two architectures.
func ConfigFromFile ¶
ConfigFromFile loads a Bart model Config from file.
func (*Config) ContradictionID ¶
ContradictionID returns the id of the `contradiction` labels.
func (*Config) EntailmentID ¶
EntailmentID returns the id of the `entailment` labels.
type CrossAttentionBlock ¶
type CrossAttentionBlock struct {
nn.Module
Attention *multiheadattention.Model
Norm *layernorm.Model
}
CrossAttentionBlock implements a cross-attention block.
type CrossAttentionBlockConfig ¶
CrossAttentionBlockConfig provides configuration settings for a CrossAttentionBlock.
type Decoder ¶
type Decoder struct {
nn.Module
// Embeddings is the embedding module.
Embeddings *Embeddings
// Layers is the list of decoder layers.
Layers []*DecoderLayer
// LayerNorm is the layer normalization module.
LayerNorm *layernorm.Model
// Config is the configuration of the decoder.
Config Config
}
Decoder implements a Bart decoder.
func NewDecoder ¶
NewDecoder returns a new Decoder.
type DecoderLayer ¶
type DecoderLayer struct {
nn.Module
// SelfAttention is the self-attention block.
SelfAttention ResidualNormSelfAttention
// CrossAttention is the cross-attention block.
CrossAttention ResidualNormCrossAttention
// FF is the feed-forward block with normalization and residual connection.
FF ResidualNormFeedForward
// Config is the configuration of the decoder layer.
Config Config
}
DecoderLayer implements a Bart decoder layer.
func NewDecoderLayer ¶
func NewDecoderLayer[T float.DType](c Config) *DecoderLayer
NewDecoderLayer returns a new decoder layer.
func (*DecoderLayer) Forward ¶
func (m *DecoderLayer) Forward(cache [2]multiheadattention.Cache, seq1 []mat.Tensor, seq2 []mat.Tensor) ([]mat.Tensor, [2]multiheadattention.Cache)
Forward performs the forward pass.
type DecodingInput ¶
type DecodingInput struct {
// InputIDs are the input IDs for the decoder.
InputIDs []int
// CurLen is the current length of the generating sequence.
CurLen int
// Cache is the cache for the decoder.
Cache Cache
}
DecodingInput is the input for the decoding function of the model for conditional generation.
type DecodingOutput ¶
type DecodingOutput struct {
// LogProbRaw is the raw (not processed) log probability of the generated token.
LogProbRaw mat.Tensor
// LogProbValue is the post-processed log probability of the generated token.
LogProbValue mat.Matrix
// NextCache is the next cache.
NextCache Cache
}
DecodingOutput is the output of the decoding function of the model for conditional generation.
type Embeddings ¶
type Embeddings struct {
nn.Module
SharedEmbeddings embedding.Shared
// PositionalEncoder is the positional encoder module.
PositionalEncoder *PositionalEncoder
// Norm is the normalization module.
Norm *layernorm.Model
// ScaleFactor is the scaling factor for the shared embeddings.
ScaleFactor *nn.Buffer
// Config is the configuration of the embeddings.
Config Config
}
func NewEmbeddings ¶
NewEmbeddings returns a new Embeddings.
type Encoder ¶
type Encoder struct {
nn.Module
// Embeddings is the embedding layer.
Embeddings *Embeddings
// Layers is the list of encoder layers.
Layers nn.ModuleList[*EncoderLayer]
// LayerNorm is the layer normalization module.
LayerNorm *layernorm.Model
// Config is the configuration of the encoder.
Config Config
}
Encoder implements a Bart encoder.
func NewEncoder ¶
NewEncoder returns a new Encoder.
type EncoderLayer ¶
type EncoderLayer struct {
nn.Module
// SelfAttention is the self attention block.
SelfAttention ResidualNormSelfAttention
// FF is the feed-forward block with normalization and residual connection.
FF ResidualNormFeedForward
// Config is the configuration of the encoder layer.
Config Config
}
EncoderLayer implements a Bart encoder layer.
func NewEncoderLayer ¶
func NewEncoderLayer[T float.DType](c Config) *EncoderLayer
NewEncoderLayer returns a new encoder layer.
type FeedForwardBlock ¶
type FeedForwardBlock struct {
nn.Module
FFN nn.ModuleList[nn.StandardModel]
Norm *layernorm.Model
}
FeedForwardBlock is a feed-forward block with normalization and residual connection.
type Model ¶
type Model struct {
nn.Module
// Config is the model configuration.
Config Config
// Encoder is the encoder model.
Encoder *Encoder
// Decoder is the decoder model.
Decoder *Decoder
// Embeddings contains the embeddings shared between the encoder and the decoder.
Embeddings *embedding.Model
}
Model implements a base Bart encoder-decoder model without any head on top.
type ModelForConditionalGeneration ¶
type ModelForConditionalGeneration struct {
nn.Module
// Bart is the fine-tuned BART model.
Bart *Model
// Projection is the projection layer from the decoder output to the vocabulary.
Projection *linear.Model
// PadMask is the mask for the pad token.
PadMask *nn.Buffer
// EosMask is the mask for the EOS token.
EosMask *nn.Buffer
}
ModelForConditionalGeneration is a model for conditional generation tasks which embeds a Bart fine-tuned model.
func NewModelForConditionalGeneration ¶
func NewModelForConditionalGeneration[T float.DType](bart *Model) *ModelForConditionalGeneration
NewModelForConditionalGeneration returns a new model for conditional generation.
func (*ModelForConditionalGeneration) DecodingFunc ¶
func (m *ModelForConditionalGeneration) DecodingFunc(encoderInputIDs []int, scoreProc generationutils.ScoreProcessor, inference bool) func(batch []*DecodingInput) []*DecodingOutput
DecodingFunc returns a decoding function that works using the encoder states derived from the input. During inference, it adjusts the logits to avoid impossible tokens.
type ModelForSequenceClassification ¶
type ModelForSequenceClassification struct {
nn.Module
// Bart is the Bart fine-tuned model.
Bart *Model
// Classifier is the final classifier layer.
Classifier *Classifier
}
ModelForSequenceClassification is a model for sequence classification tasks which embeds a Bart fine-tuned model.
func NewModelForSequenceClassification ¶
func NewModelForSequenceClassification[T float.DType](bart *Model) *ModelForSequenceClassification
NewModelForSequenceClassification returns a new model for sentence-level classification.
type NewFeedForwardBlockConfig ¶
type NewFeedForwardBlockConfig struct {
// Dim is the dimension of the input.
Dim int
// HiddenDim is the dimension of the hidden layer.
HiddenDim int
// ActivationFunction is the activation function.
Activation activation.Activation
// NormalizeBefore is whether to normalize the input before the MLP.
NormalizeBefore bool
}
NewFeedForwardBlockConfig is the configuration of a feed-forward block.
type PositionalEncoder ¶
type PositionalEncoder struct {
nn.Module
// Embeddings contains the embeddings for each position.
Embeddings *embedding.Model
// Config contains the configuration settings.
Config PositionalEncoderConfig
}
PositionalEncoder contains positional embeddings fine-tuned during the training phase.
func NewPositionalEncoder ¶
func NewPositionalEncoder[T float.DType](config PositionalEncoderConfig) *PositionalEncoder
NewPositionalEncoder returns a new PositionalEncoder.
type PositionalEncoderConfig ¶
type PositionalEncoderConfig struct {
NumEmbeddings int
EmbeddingDim int
PaddingIDX int
Offset int
StoreName string
Trainable bool
}
PositionalEncoderConfig provides configuration settings for a PositionalEncoder model.
type PostNormCrossAttentionBlock ¶
type PostNormCrossAttentionBlock struct {
*CrossAttentionBlock
}
PostNormCrossAttentionBlock embeds a cross-attention block to perform cross attention with post normalization
func (PostNormCrossAttentionBlock) Forward ¶
func (m PostNormCrossAttentionBlock) Forward(cache multiheadattention.Cache, seq1 []mat.Tensor, seq2 []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
Forward performs the forward pass.
type PostNormFeedForwardBlock ¶
type PostNormFeedForwardBlock struct {
*FeedForwardBlock
}
PostNormFeedForwardBlock is a feed-forward block with post-normalization normalization.
type PostNormSelfAttentionBlock ¶
type PostNormSelfAttentionBlock struct {
*SelfAttentionBlock
}
PostNormSelfAttentionBlock embeds a self-attention block to perform self attention with post normalization.
func (PostNormSelfAttentionBlock) Forward ¶
func (m PostNormSelfAttentionBlock) Forward(cache multiheadattention.Cache, xs []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
Forward performs the forward pass.
type PreNormCrossAttentionBlock ¶
type PreNormCrossAttentionBlock struct {
*CrossAttentionBlock
}
PreNormCrossAttentionBlock embeds a cross-attention block to perform cross attention with pre normalization
func (PreNormCrossAttentionBlock) Forward ¶
func (m PreNormCrossAttentionBlock) Forward(cache multiheadattention.Cache, seq1 []mat.Tensor, seq2 []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
type PreNormFeedForwardBlock ¶
type PreNormFeedForwardBlock struct {
*FeedForwardBlock
}
PreNormFeedForwardBlock is a feed-forward block with pre-normalization normalization.
type PreNormSelfAttentionBlock ¶
type PreNormSelfAttentionBlock struct {
*SelfAttentionBlock
}
PreNormSelfAttentionBlock embeds a self-attention block to perform self attention with pre-normalization.
func (PreNormSelfAttentionBlock) Forward ¶
func (m PreNormSelfAttentionBlock) Forward(cache multiheadattention.Cache, xs []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
Forward performs the forward pass.
type ResidualNormCrossAttention ¶
type ResidualNormCrossAttention interface {
// Forward performs the forward pass.
Forward(cache multiheadattention.Cache, seq1 []mat.Tensor, seq2 []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
}
ResidualNormCrossAttention is a cross-attention block with residual connection.
func NewCrossAttentionBlock ¶
func NewCrossAttentionBlock[T float.DType](c CrossAttentionBlockConfig) ResidualNormCrossAttention
NewCrossAttentionBlock returns a new CrossAttentionBlock.
type ResidualNormFeedForward ¶
ResidualNormFeedForward is a feed-forward block with normalization and residual connection.
func NewFeedForwardBlock ¶
func NewFeedForwardBlock[T float.DType](c NewFeedForwardBlockConfig) ResidualNormFeedForward
NewFeedForwardBlock returns a new PreNormFeedForwardBlock or PostNormFeedForwardBlock depending on the configuration.
type ResidualNormSelfAttention ¶
type ResidualNormSelfAttention interface {
Forward(cache multiheadattention.Cache, xs []mat.Tensor) ([]mat.Tensor, multiheadattention.Cache)
}
ResidualNormSelfAttention is a self-attention block with residual normalization.
func NewSelfAttentionBlock ¶
func NewSelfAttentionBlock[T float.DType](c SelfAttentionBlockConfig) ResidualNormSelfAttention
NewSelfAttentionBlock returns a new PreNormSelfAttentionBlock or PostNormSelfAttentionBlock depending on the configuration.
type SelfAttentionBlock ¶
type SelfAttentionBlock struct {
nn.Module
// Attention is the multi-head attention module.
Attention *multiheadattention.Model
// Norm is the layer normalization module.
Norm *layernorm.Model
}
SelfAttentionBlock implements a self-attention block.
type SelfAttentionBlockConfig ¶
type SelfAttentionBlockConfig struct {
// Dim is the dimension of the input and output.
Dim int
// NumOfHeads is the number of heads.
NumOfHeads int
// NormalizeBefore indicates whether the normalization is applied before or after the attention.
NormalizeBefore bool
// UseCausalMask indicates whether to use a causal mask.
UseCausalMask bool
}
SelfAttentionBlockConfig is the configuration of a self-attention block.
Source Files
¶
- bart.go
- bart_for_conditional_generation.go
- bart_for_sequence_classification.go
- classifier.go
- config.go
- crossattention.go
- crossattention_postnorm.go
- crossattention_prenorm.go
- decoder.go
- decoder_layer.go
- embeddings.go
- encoder.go
- encoder_layer.go
- feedforward.go
- feedforward_postnorm.go
- feedforward_prenorm.go
- positionalencoder.go
- selfattention.go
- selfattention_postnorm.go
- selfattention_prenorm.go