pipeline

package
v0.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 11, 2023 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Config

type Config interface{}

type ConfigOption

type ConfigOption struct {
	// contains filtered or unexported fields
}

ConfigOption holds a model configuration

func ConfigOptionFromFile

func ConfigOptionFromFile(modelType ModelType, path string) *ConfigOption

ConfigOptionFromFile loads configuration for corresponding model type from file.

func NewBertConfigOption

func NewBertConfigOption(config bert.BertConfig) *ConfigOption

func (*ConfigOption) GetLabelMapping

func (co *ConfigOption) GetLabelMapping() map[int64]string

GetLabelMap returns label mapping for corresponding model type.

type Entity

type Entity struct {
	// String representation of the Entity
	Word string
	// Confidence score
	Score float64
	// Entity label (e.g. ORG, LOC...)
	Label string
}

Entity holds entity data generated by NERModel

type ModelOption

type ModelOption struct {
	// contains filtered or unexported fields
}

type ModelType

type ModelType int

ModelType is a enum-like, identifying the type of model

const (
	Bert ModelType = iota
	DistilBert
	Roberta
	XLMRoberta
	Electra
	Marian
	T5
	Albert
)

type NERModel

type NERModel struct {
	// contains filtered or unexported fields
}

NERModel is a model to extract entities

func NewNERModel

func NewNERModel(config TokenClassificationModel) *NERModel

NewNERModel creates a NERModel from input config

func (*NERModel) Predict

func (nm *NERModel) Predict(input []string) []Entity

Predict extracts entities from input text and returns slice of entities with score

type TokenClassificationModel

type TokenClassificationModel struct{}

type TokenizerOption

type TokenizerOption struct {
	// contains filtered or unexported fields
}

TokenizerOption specifies a tokenizer

func TokenizerOptionFromFile

func TokenizerOptionFromFile(modelType ModelType, path string) *TokenizerOption

TOkenizerOptionFromFile loads TokenizerOption from file corresponding to model type.

func (*TokenizerOption) AddSpecialTokens

func (tk *TokenizerOption) AddSpecialTokens(tokens []string)

AddSpecialTokens adds special tokens to tokenizer

func (*TokenizerOption) EncodeList

func (tk *TokenizerOption) EncodeList(sentences []string) ([]tokenizer.Encoding, error)

EncodeList encodes a slice of input string

func (*TokenizerOption) ModelType

func (tk *TokenizerOption) ModelType() ModelType

ModelType returns chosen model type

func (*TokenizerOption) PadId

func (tk *TokenizerOption) PadId() (id int64, ok bool)

PadId returns a PAD id if any.

func (*TokenizerOption) SepId

func (tk *TokenizerOption) SepId(sepOpt ...string) (id int64, ok bool)

SepId returns a SEP id if any. If optional sepOpt is not specify, default value is "[SEP]"

func (*TokenizerOption) Tokenize

func (tk *TokenizerOption) Tokenize(sentence string) ([]string, error)

Tokenize tokenizes input string

func (*TokenizerOption) TokensToIds

func (tk *TokenizerOption) TokensToIds(tokens []string) (ids []int64, ok bool)

TokensToIds converts a slice of tokens to corresponding Ids.

type TokenizerType

type TokenizerType int
const (
	BertTokenizer TokenizerType = iota
	RobertaTokenizer
	XLMRobertaTokenizer
	MarianTokenizer
	T5Tokenizer
	AlbertTokenizer
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL