bert

package
v0.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 1, 2020 License: Apache-2.0 Imports: 18 Imported by: 1

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func BertJapaneseTokenizerFromPretrained

func BertJapaneseTokenizerFromPretrained(pretrainedModelNameOrPath string, customParams map[string]interface{}) *tokenizer.Tokenizer

BertJapaneseTokenizerFromPretrained initiate BERT tokenizer for Japanese language from pretrained file.

Types

type BertAttention

type BertAttention struct {
	Bsa    *BertSelfAttention
	Output *BertSelfOutput
}

func NewBertAttention

func NewBertAttention(p nn.Path, config *BertConfig) *BertAttention

func (*BertAttention) ForwardT

func (ba *BertAttention) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal, RetValOpt ts.Tensor)

type BertConfig

type BertConfig struct {
	HiddenAct                 string           `json:"hidden_act"`
	AttentionProbsDropoutProb float64          `json:"attention_probs_dropout_prob"`
	HiddenDropoutProb         float64          `json:"hidden_dropout_prob"`
	HiddenSize                int64            `json:"hidden_size"`
	InitializerRange          float32          `json:"initializer_range"`
	IntermediateSize          int64            `json:"intermediate_size"`
	MaxPositionEmbeddings     int64            `json:"max_position_embeddings"`
	NumAttentionHeads         int64            `json:"num_attention_heads"`
	NumHiddenLayers           int64            `json:"num_hidden_layers"`
	TypeVocabSize             int64            `json:"type_vocab_size"`
	VocabSize                 int64            `json:"vocab_size"`
	OutputAttentions          bool             `json:"output_attentions"`
	OutputHiddenStates        bool             `json:"output_hidden_states"`
	IsDecoder                 bool             `json:"is_decoder"`
	Id2Label                  map[int64]string `json:"id_2_label"`
	Label2Id                  map[string]int64 `json:"label_2_id"`
	NumLabels                 int64            `json:"num_labels"`
}

BertConfig defines the BERT model architecture (i.e., number of layers, hidden layer size, label mapping...)

func ConfigFromFile

func ConfigFromFile(filename string) (*BertConfig, error)

func NewConfig

func NewConfig(customParams map[string]interface{}) *BertConfig

NewBertConfig initiates BertConfig with given input parameters or default values.

func (*BertConfig) GetVocabSize

func (c *BertConfig) GetVocabSize() int64

func (*BertConfig) Load

func (c *BertConfig) Load(modelNameOrPath string, params map[string]interface{}) error

Load loads model configuration from file or model name. It also updates default configuration parameters if provided. This method implements `pretrained.Config` interface.

type BertEmbedding

type BertEmbedding interface {
	ForwardT(inputIds, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (ts.Tensor, error)
}

BertEmbedding defines interface for BertModel or RoBertaModel

type BertEmbeddings

type BertEmbeddings struct {
	WordEmbeddings      *nn.Embedding
	PositionEmbeddings  *nn.Embedding
	TokenTypeEmbeddings *nn.Embedding
	LayerNorm           *nn.LayerNorm
	Dropout             *util.Dropout
}

func NewBertEmbeddings

func NewBertEmbeddings(p nn.Path, config *BertConfig) *BertEmbeddings

NewBertEmbeddings builds a new BertEmbeddings * `p` - Varstore path for the root of the BertEmbeddings model * `config` - `BertConfig` object defining the model architecture and vocab/hidden size

func (*BertEmbeddings) ForwardT

func (be *BertEmbeddings) ForwardT(inputIds, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (retVal ts.Tensor, err error)

ForwardT implements BertEmbedding interface, passes throught the embedding layer

type BertEncoder

type BertEncoder struct {
	OutputAttentions   bool
	OutputHiddenStates bool
	Layers             []BertLayer
}

BertEncoder defines an encoder for BERT model

func NewBertEncoder

func NewBertEncoder(p nn.Path, config *BertConfig) *BertEncoder

func (*BertEncoder) ForwardT

func (be *BertEncoder) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor)

Forward ...

type BertForMaskedLM

type BertForMaskedLM struct {
	// contains filtered or unexported fields
}

BertForMaskedLM is BERT for masked language model

Example
// Config
config := new(bert.BertConfig)
err := config.Load("../data/bert/config.json", nil)
if err != nil {
	log.Fatal(err)
}

// Model
device := gotch.CPU

model := new(bert.BertForMaskedLM)
err = model.Load("../data/bert/model.ot", config, nil, device)
if err != nil {
	log.Fatal(err)
}

tk := getBertTokenizer()
sentence1 := "Looks like one [MASK] is missing"
sentence2 := "It was a very nice and [MASK] day"

var input []tokenizer.EncodeInput
input = append(input, tokenizer.NewSingleEncodeInput(tokenizer.NewInputSequence(sentence1)))
input = append(input, tokenizer.NewSingleEncodeInput(tokenizer.NewInputSequence(sentence2)))

encodings, err := tk.EncodeBatch(input, true)
if err != nil {
	log.Fatal(err)
}

var maxLen int = 0
for _, en := range encodings {
	if len(en.Ids) > maxLen {
		maxLen = len(en.Ids)
	}
}

var tensors []ts.Tensor
for _, en := range encodings {
	var tokInput []int64 = make([]int64, maxLen)
	for i := 0; i < len(en.Ids); i++ {
		tokInput[i] = int64(en.Ids[i])
	}

	tensors = append(tensors, ts.TensorFrom(tokInput))
}

inputTensor := ts.MustStack(tensors, 0).MustTo(device, true)

var output ts.Tensor
ts.NoGrad(func() {
	output, _, _ = model.ForwardT(inputTensor, ts.None, ts.None, ts.None, ts.None, ts.None, ts.None, false)
})

index1 := output.MustGet(0).MustGet(4).MustArgmax(0, false, false).Int64Values()[0]
index2 := output.MustGet(1).MustGet(7).MustArgmax(0, false, false).Int64Values()[0]

got1, ok := tk.IdToToken(int(index1))
if !ok {
	fmt.Printf("Cannot find a corresponding word for the given id (%v) in vocab.\n", index1)
}
got2, ok := tk.IdToToken(int(index2))
if !ok {
	fmt.Printf("Cannot find a corresponding word for the given id (%v) in vocab.\n", index2)
}

fmt.Println(got1)
fmt.Println(got2)
/*
 *   // Output:
 *   // person
 *   // pleasant
 *  */

func NewBertForMaskedLM

func NewBertForMaskedLM(p nn.Path, config *BertConfig) *BertForMaskedLM

NewBertForMaskedLM creates BertForMaskedLM.

func (*BertForMaskedLM) ForwardT

func (mlm *BertForMaskedLM) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal1 ts.Tensor, optRetVal1, optRetVal2 []ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds` - Optional input tensor of shape (*batch size*, *sequenceLength*). If None, pre-computed embeddings must be provided (see *inputEmbeds*)
  • `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
  • `tokenTypeIds` -Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
  • `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
  • `inputEmbeds` - Optional pre-computed input embeddings of shape (*batch size*, *sequenceLength*, *hiddenSize*). If None, input ids must be provided (see *inputIds*)
  • `encoderHiddenStates` - Optional encoder hidden state of shape (*batch size*, *encoderSequenceLength*, *hiddenSize*). If the model is defined as a decoder and the *encoderHiddenStates* is not None, used in the cross-attention layer as keys and values (query from the decoder).
  • `encoderMask` - Optional encoder attention mask of shape (*batch size*, *encoderSequenceLength*). If the model is defined as a decoder and the *encoderHiddenStates* is not None, used to mask encoder values. Positions with value 0 will be masked.
  • `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output` - `Tensor` of shape (*batch size*, *numLabels*, *vocabSize*)
  • `hiddenStates` - `[]ts.Tensor` of length *num_hidden_layers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

func (*BertForMaskedLM) Load

func (mlm *BertForMaskedLM) Load(modelNameOrPath string, config interface{ pretrained.Config }, params map[string]interface{}, device gotch.Device) error

Load loads model from file or model name. It also updates default configuration parameters if provided. This method implements `PretrainedModel` interface.

type BertForMultipleChoice

type BertForMultipleChoice struct {
	// contains filtered or unexported fields
}

BertForMultipleChoice constructs multiple choices model using a BERT base model and a linear classifier. Input should be in the form `[CLS] Context [SEP] Possible choice [SEP]`. The choice is made along the batch axis, assuming all elements of the batch are alternatives to be chosen from for a given context.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: Linear layer for multiple choices

func NewBertForMultipleChoice

func NewBertForMultipleChoice(p nn.Path, config *BertConfig) *BertForMultipleChoice

NewBertForMultipleChoice creates a new `BertForMultipleChoice`.

Params:

  • `p` - Variable store path for the root of the BertForMultipleChoice model
  • `config` - `BertConfig` object defining the model architecture

func (*BertForMultipleChoice) ForwardT

func (mc *BertForMultipleChoice) ForwardT(inputIds, mask, tokenTypeIds, positionIds ts.Tensor, train bool) (retVal ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds` - Input tensor of shape (*batch size*, *sequenceLength*).
  • `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
  • `tokenTypeIds` -Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
  • `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
  • `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output` - `Tensor` of shape (*1*, *batch size*) containing the logits for each of the alternatives given
  • `hiddenStates` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

type BertForQuestionAnswering

type BertForQuestionAnswering struct {
	// contains filtered or unexported fields
}

BertForQuestionAnswering constructs extractive question-answering model based on a BERT language model. Identifies the segment of a context that answers a provided question.

Please note that a significant amount of pre- and post-processing is required to perform end-to-end question answering. See the question answering pipeline (also provided in this crate) for more details.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `qa_outputs`: Linear layer for question answering

func NewForBertQuestionAnswering

func NewForBertQuestionAnswering(p nn.Path, config *BertConfig) *BertForQuestionAnswering

NewBertForQuestionAnswering creates a new `BertForQuestionAnswering`.

Params:

  • `p` - Variable store path for the root of the BertForQuestionAnswering model
  • `config` - `BertConfig` object defining the model architecture

func (*BertForQuestionAnswering) ForwardT

func (qa *BertForQuestionAnswering) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (retVal1, retVal2 ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor)

ForwardT forwards pass through the model.

Params:

  • `inputIds` - Optional input tensor of shape (*batch size*, *sequenceLength*). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
  • `tokenTypeIds` -Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
  • `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
  • `inputEmbeds` - Optional pre-computed input embeddings of shape (*batch size*, *sequenceLength*, *hiddenSize*). If None, input ids must be provided (see `inputIds`)
  • `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `startScores` - `Tensor` of shape (*batch size*, *sequenceLength*) containing the logits for start of the answer
  • `endScores` - `Tensor` of shape (*batch size*, *sequenceLength*) containing the logits for end of the answer
  • `hiddenStates` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - Optional `[]ts.Tensor` of length *num_hidden_layers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

type BertForSequenceClassification

type BertForSequenceClassification struct {
	// contains filtered or unexported fields
}

BertForSequenceClassification is Base BERT model with a classifier head to perform sentence or document-level classification.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: BERT linear layer for classification

func NewBertForSequenceClassification

func NewBertForSequenceClassification(p nn.Path, config *BertConfig) *BertForSequenceClassification

NewBertForSequenceClassification creates a new `BertForSequenceClassification`.

Params:

  • `p` - Variable store path for the root of the BertForSequenceClassification model
  • `config` - `BertConfig` object defining the model architecture and number of classes

Example:

```go

device := gotch.CPU
vs := nn.NewVarStore(device)
config := bert.ConfigFromFile("path/to/config.json")
p := vs.Root()
bert := NewBertForSequenceClassification(p.Sub("bert"), config)

```

func (*BertForSequenceClassification) ForwardT

func (bsc *BertForSequenceClassification) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (retVal ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor)

ForwardT forwards pass through the model.

Params:

-`inputIds` - Optional input tensor of shape (*batch size*, *sequenceLength*). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
- `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
- `tokenTypeIds` -Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
- `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
- `inputEmbeds` - Optional pre-computed input embeddings of shape (*batch size*, *sequenceLength*, *hiddenSize*). If None, input ids must be provided (see `inputIds`)
- `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `labels` - `Tensor` of shape (*batch size*, *numLabels*)
  • `hiddenStates` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

type BertForTokenClassification

type BertForTokenClassification struct {
	// contains filtered or unexported fields
}

BertForTokenClassification constructs token-level classifier predicting a label for each token provided. Note that because of wordpiece tokenization, the labels predicted are not necessarily aligned with words in the sentence.

It is made of the following blocks:

  • `bert`: Base BertModel
  • `classifier`: Linear layer for token classification

func NewBertForTokenClassification

func NewBertForTokenClassification(p nn.Path, config *BertConfig) *BertForTokenClassification

NewBertForTokenClassification creates a new `BertForTokenClassification`

Params:

  • `p` - Variable store path for the root of the BertForTokenClassification model
  • `config` - `BertConfig` object defining the model architecture, number of output labels and label mapping

func (*BertForTokenClassification) ForwardT

func (tc *BertForTokenClassification) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds ts.Tensor, train bool) (retVal ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor)

ForwordT forwards pass through the model.

Params:

  • `inputIds` - Optional input tensor of shape (*batch size*, *sequenceLength*). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
  • `tokenTypeIds` -Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
  • `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
  • `inputEmbeds` - Optional pre-computed input embeddings of shape (*batch size*, *sequenceLength*, *hiddenSize*). If None, input ids must be provided (see `inputIds`)
  • `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output` - `Tensor` of shape (*batch size*, *sequenceLength*, *numLabels*) containing the logits for each of the input tokens and classes
  • `hiddenStates` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - Optional `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

type BertIntermediate

type BertIntermediate struct {
	Lin        *nn.Linear
	Activation util.ActivationFn // interface
}

func NewBertIntermediate

func NewBertIntermediate(p nn.Path, config *BertConfig) *BertIntermediate

func (*BertIntermediate) Forward

func (bi *BertIntermediate) Forward(hiddenStates ts.Tensor) (retVal ts.Tensor)

type BertLMPredictionHead

type BertLMPredictionHead struct {
	Transform *BertPredictionHeadTransform
	Decoder   *util.LinearNoBias
	Bias      ts.Tensor
}

BertLMPredictionHead constructs layers for BERT prediction head.

func NewBertLMPredictionHead

func NewBertLMPredictionHead(p nn.Path, config *BertConfig) *BertLMPredictionHead

NewBertLMPredictionHead creates BertLMPredictionHead.

func (*BertLMPredictionHead) Forward

func (ph *BertLMPredictionHead) Forward(hiddenState ts.Tensor) ts.Tensor

Forward fowards through the model.

type BertLayer

type BertLayer struct {
	Attention      *BertAttention
	IsDecoder      bool
	CrossAttention *BertAttention
	Intermediate   *BertIntermediate
	Output         *BertOutput
}

BertLayer defines a layer in BERT encoder

func NewBertLayer

func NewBertLayer(p nn.Path, config *BertConfig) *BertLayer

func (*BertLayer) ForwardT

func (bl *BertLayer) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal, retValOpt1, retValOpt2 ts.Tensor)

type BertModel

type BertModel struct {
	Embeddings *BertEmbeddings
	Encoder    *BertEncoder
	Pooler     *BertPooler
	IsDecoder  bool
}

BertModel defines base architecture for BERT models. Task-specific models can be built from this base model.

Fields:

  • Embeddings: for `token`, `position` and `segment` embeddings
  • Encoder: is a vector of layers. Each layer compose of a `self-attention`,

an `intermedate` (linear) and an output ( linear + layer norm) sub-layers.

  • Pooler: linear layer applied to the first element of the sequence (`[MASK]` token)
  • IsDecoder: whether model is used as a decoder. If set to `true`

a casual mask will be applied to hide future positions that should be attended to.

func NewBertModel

func NewBertModel(p nn.Path, config *BertConfig) *BertModel

NewBertModel builds a new `BertModel`.

Params:

  • `p`: Variable store path for the root of the BERT Model
  • `config`: BertConfig onfiguration for model architecture and decoder status

func (*BertModel) ForwardT

func (b *BertModel) ForwardT(inputIds, mask, tokenTypeIds, positionIds, inputEmbeds, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal1, retVal2 ts.Tensor, retValOpt1, retValOpt2 []ts.Tensor, err error)

ForwardT forwards pass through the model.

Params:

  • `inputIds` Optional input tensor of shape (*batch size*, *sequenceLength*). If None, pre-computed embeddings must be provided (see `inputEmbeds`)
  • `mask` - Optional mask of shape (*batch size*, *sequenceLength*). Masked position have value 0, non-masked value 1. If None set to 1
  • `tokenTypeIds` - Optional segment id of shape (*batch size*, *sequenceLength*). Convention is value of 0 for the first sentence (incl. *[SEP]*) and 1 for the second sentence. If None set to 0.
  • `positionIds` - Optional position ids of shape (*batch size*, *sequenceLength*). If None, will be incremented from 0.
  • `inputEmbeds` - Optional pre-computed input embeddings of shape (*batch size*, *sequenceLength*, *hiddenSize*). If None, input ids must be provided (see `inputIds`)
  • `encoderHiddenStates` - Optional encoder hidden state of shape (*batch size*, *encoderSequenceLength*, *hiddenSize*). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used in the cross-attention layer as keys and values (query from the decoder).
  • `encoderMask` - Optional encoder attention mask of shape (*batch size*, *encoderSequenceLength*). If the model is defined as a decoder and the `encoderHiddenStates` is not None, used to mask encoder values. Positions with value 0 will be masked.
  • `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.

Returns:

  • `output` - `Tensor` of shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `pooledOutput` - `Tensor` of shape (*batch size*, *hiddenSize*)
  • `hiddenStates` - `[]ts.Tensor` of length *numHiddenLayers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)
  • `attentions` - `[]ts.Tensor` of length *num_hidden_layers* with shape (*batch size*, *sequenceLength*, *hiddenSize*)

type BertOutput

type BertOutput struct {
	Lin       *nn.Linear
	LayerNorm *nn.LayerNorm
	Dropout   *util.Dropout
}

func NewBertOutput

func NewBertOutput(p nn.Path, config *BertConfig) *BertOutput

func (*BertOutput) ForwardT

func (bo *BertOutput) ForwardT(hiddenStates, inputTensor ts.Tensor, train bool) (retVal ts.Tensor)

type BertPooler

type BertPooler struct {
	Lin *nn.Linear
}

BertPooler defines a linear layer which can be applied to the first element of the sequence(`[MASK]` token)

func NewBertPooler

func NewBertPooler(p nn.Path, config *BertConfig) *BertPooler

func (*BertPooler) Forward

func (bp *BertPooler) Forward(hiddenStates ts.Tensor) (retVal ts.Tensor)

type BertPredictionHeadTransform

type BertPredictionHeadTransform struct {
	Dense      *nn.Linear
	Activation util.ActivationFn
	LayerNorm  *nn.LayerNorm
}

BertPredictionHeadTransform holds layers of BERT prediction head transform.

func NewBertPredictionHeadTransform

func NewBertPredictionHeadTransform(p nn.Path, config *BertConfig) *BertPredictionHeadTransform

NewBertPredictionHead creates BertPredictionHeadTransform.

func (*BertPredictionHeadTransform) Forward

func (bpht *BertPredictionHeadTransform) Forward(hiddenStates ts.Tensor) (retVal ts.Tensor)

Forward forwards through the model.

type BertSelfAttention

type BertSelfAttention struct {
	NumAttentionHeads int64
	AttentionHeadSize int64
	Dropout           *util.Dropout
	OutputAttentions  bool
	Query             *nn.Linear
	Key               *nn.Linear
	Value             *nn.Linear
}

func NewBertSelfAttention

func NewBertSelfAttention(p nn.Path, config *BertConfig) *BertSelfAttention

NewBertSelfAttention creates a new `BertSelfAttention`

func (*BertSelfAttention) ForwardT

func (bsa *BertSelfAttention) ForwardT(hiddenStates, mask, encoderHiddenStates, encoderMask ts.Tensor, train bool) (retVal, retValOpt ts.Tensor)

ForwardT implements ModuleT interface for BertSelfAttention

NOTE. mask, encoderHiddenStates, encoderMask are optional tensors for `None` value, `ts.None` can be used.

type BertSelfOutput

type BertSelfOutput struct {
	Linear    *nn.Linear
	LayerNorm *nn.LayerNorm
	Dropout   *util.Dropout
}

func NewBertSelfOutput

func NewBertSelfOutput(p nn.Path, config *BertConfig) *BertSelfOutput

func (*BertSelfOutput) ForwardT

func (bso *BertSelfOutput) ForwardT(hiddenStates ts.Tensor, inputTensor ts.Tensor, train bool) (retVal ts.Tensor)

type BertTokenizerFast

type BertTokenizerFast = tokenizer.Tokenizer

type TensorOpt

type TensorOpt func() *ts.Tensor

func EncoderHiddenStateTensorOpt

func EncoderHiddenStateTensorOpt(t *ts.Tensor) TensorOpt

func EncoderMaskTensorOpt

func EncoderMaskTensorOpt(t *ts.Tensor) TensorOpt

func MaskTensorOpt

func MaskTensorOpt(t *ts.Tensor) TensorOpt

type Tokenizer

type Tokenizer struct {
	*tokenizer.Tokenizer
}

func NewTokenizer

func NewTokenizer() *Tokenizer

func (*Tokenizer) Load

func (bt *Tokenizer) Load(modelNameOrPath string, params map[string]interface{}) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL