verifier

package
v0.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2020 License: MIT Imports: 1 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CurationLevel

type CurationLevel int

CurationLevel tells if matched result was returned by at least one DataSource in the following categories.

const (
	// NotCurated means that all DataSources where the name-string was matched
	// are not curated sufficiently.
	NotCurated CurationLevel = iota

	// AutoCurated means that at least one of the returned DataSources invested
	// significantly in curating their data by scripts.
	AutoCurated

	// Curated means that at least one DataSource is marked as sufficiently
	// curated. It does not mean that the particular match was manually checked
	// though.
	Curated
)

func (CurationLevel) String

func (c CurationLevel) String() string

type DataSource

type DataSource struct {
	// ID is a DataSource Id.
	ID int `json:"id"`

	// UUID generated by GlobalNames and associated with the DataSource
	UUID string `json:"uuid,omitempty"`

	// Title is a full title of a DataSource
	Title string `json:"title"`

	// TitleShort is a shortened/abbreviated title of a DataSource.
	TitleShort string `json:"titleShort"`

	// Version of the data-set for a DataSource.
	Version string `json:"version,omitempty"`

	// RevisionDate of a data-set from a data-provider.
	// It follows format of 'year-month-day' || 'year-month' || 'year'
	RevisionDate string `json:"releaseDate,omitempty"`

	// DOI of a DataSource;
	DOI string `json:"doi,omitempty"`

	// Citation representing a DataSource
	Citation string `json:"citation,omitempty"`

	// Authors associated with the Datasource
	Authors string `json:"authors,omitempty"`

	// Description of the DataSource.
	Description string `json:"description,omitempty"`

	// WebsiteURL is a hompage of a DataSource
	WebsiteURL string `json:"home_url,omitempty"`

	// CurationLevel determines how much of manual or programmatic work is put
	// into assuring the quality of the data.
	CurationLevel `json:"curationLevel,omitempty"`

	// RecordCount tells how many entries are in a data source.
	RecordCount int `json:"recordCount,omitempty"`

	// UpdatedAt is the last import time and date.
	UpdatedAt time.Time `json:"updatedAt"`
}

DataSource provides metadata for an externally collected data-set.

type DataSourcesOpts

type DataSourcesOpts struct {
	// DataSourceID. When it is given we return results only for this
	// data_source_id. If not given return results for all data_sources.
	DataSourceID int `json:"dataSourceId"`
}

DataSourcesOpts are options to send to DataSources method.

type MatchType

type MatchType int

MatchType describes how a name-string matched a name in gnames database.

const (
	// NoMatch means that verification failed.
	NoMatch MatchType = iota

	// Exact means either canonical form, or the whole name-string matched
	// perfectlly.
	Exact

	// Fuzzy means that matches were not exact due to similarity of name-strings,
	// OCR or typing errors. Take these results with more suspition than
	// Exact matches. Fuzzy match is never done on uninomials due to the
	// high rate of false positives.
	Fuzzy

	// PartialExact: GNames failed to match full name string. Now the match
	// happened by removing either middle species epithets, or by choppping the
	// 'tail' words of the input name-string canonical form.
	PartialExact

	// PartialFuzzy is the same as PartialExact, but also the match was not
	// exact. We never do fuzzy matches for uninomials, due to high rate of false
	// positives.
	PartialFuzzy
)

func (MatchType) String

func (mt MatchType) String() string

type ResultData

type ResultData struct {
	// DataSourceID is the ID of a matched DataSource.
	DataSourceID *int `json:"dataSourceId"`

	// Shortened/abbreviated title of the data source.
	DataSrouceTitleShort string `json:"dataSourceTitleShort"`

	// CurationLevel of the data source.
	CurationLevel `json:"-"`

	// Curation is a string representation of CurationLevel
	CurationLevelString string `json:"curationLevel"`

	// ID from a data source. We try our best to return ID that corresponds to
	// dwc:taxonID of a DataSource. If such ID is not provided, this ID will be
	// auto-generated.  Auto-generated IDs will have 'gn_' prefix.
	ID string `json:"id"`

	// GlobalID that is exposed globally by a DataSource. Such IDs are usually
	// self-resolved, like for example LSID, pURL, DOI etc.
	GlobalID string `json:"globalId,omitempty"`

	// LocalID used by a DataSource internally. If an OutLink field is provided,
	// LocalID serves as a 'dynamic' component of the URL.
	LocalID string `json:"localId,omitempty"`

	// Outlink to the record in the DataSource. It consists of a 'stable'
	// URL and an appended 'dynamic' LocalID
	Outlink string `json:"outlink,omitempty"`

	// EntryDate is a timestamp created on entry of the data.
	EntryDate string `json:"entryDate"`

	// Score determines how well the match did work. It is used to determine
	// best match overall, and best match for every data-source.
	Score uint32 `json:"-"`

	// MatchedName is a name-string from the DataSource that was matched
	// by GNames algorithm.
	MatchedName string `json:"matchedName"`

	// MatchCardinality is the cardinality of returned name:
	// 0 - No match, virus or hybrid formula,
	// 1 - Uninomial, 2 - Binomial, 3 - trinomial etc.
	MatchedCardinality *int `json:"matchedCardinality"`

	// MatchedCanonicalSimple is a simplified canonicl form without ranks for
	// names lower than species, and with ommited hybrid signs for named hybrids.
	// Quite often simple canonical is the same as full canonical. Hybrid signs
	// are preserved for hybrid formulas.
	MatchedCanonicalSimple string `json:"matchedCanonicalSimple,omitempty"`

	// MatchedCanonicalFull is a canonical form that preserves hybrid signs
	// and infraspecific ranks.
	MatchedCanonicalFull string `json:"matchedCanonicalFull,omitempty"`

	// MatchedAuthors is a list of authors mentioned in the name.
	MatchedAuthors []string `json:"-"`

	// MatchedYear is a year mentioned in the name. Multiple years or
	// approximate years are ignored.
	MatchedYear int `json:"-"`

	// CurrentRecordID is the id of currently accepted name given by
	// the data-source.
	CurrentRecordID string `json:"currentRecordID"`

	// CurrentName is a currently accepted name (it is only provided by
	// DataSources with taxonomic data).
	CurrentName string `json:"currentName"`

	// CurrentCardinality is a cardinality of the accepted name.
	// It might differ from the matched name cardinality.
	CurrentCardinality *int `json:"currentCardinality"`

	// CurrentCanonicalSimple is a canonical form for the currently accepted name.
	CurrentCanonicalSimple string `json:"currentCanonicalSimple"`

	// CurrentCanonicalFull is a full version of canonicall form for the
	// currently accepted name.
	CurrentCanonicalFull string `json:"currentCanonicalFull"`

	// IsSynonym is true if there is an indication in the DataSource that the
	// name is not a currently accepted name for one or another reason.
	IsSynonym bool `json:"isSynonym"`

	// ClassificationPath to the name (if provided by the DataSource).
	// Classification path consists of a hierarchy of name-strings.
	ClassificationPath string `json:"classificationPath,omitempty"`

	// ClassificationRanks of the classification path. They follow the
	// same order as the classification path.
	ClassificationRanks string `json:"classificationRanks,omitempty"`

	// ClassificationIDs of the names-strings. They always correspond to
	// the "id" field.
	ClassificationIDs string `json:"classificationIds,omitempty"`

	// EditDistance is a Levenshtein edit distance between canonical form of the
	// input name-string and the matched canonical form. If match type is
	// "EXACT", edit-distance will be 0.
	EditDistance *int `json:"editDistance"`

	// StemEditDistance is a Levenshtein edit distance after removing suffixes
	// from specific epithets from canonical forms.
	StemEditDistance *int `json:"stemEditDistance"`

	//MatchType describes what kind of a match happened to a name-string.
	MatchType `json:"matchType"`

	// Vernacular names that correspond to the matched name. (Will be implemented
	// later)
	Vernaculars []Vernacular `json:"vernaculars,omitempty"`
}

ResultData are returned data of the "best" or "preferred" result of name verification.

type Verification

type Verification struct {
	// InputID is a UUIDv5 generated out of the Input string.
	InputID string `json:"input_id"`
	// Input is a verified name-string
	Input string `json:"input"`
	// MatchType is best available match.
	MatchType
	// BestResult is the best result according to GNames scoring.
	BestResult *ResultData `json:"bestResult,omitempty"`

	// PreferredResults contain all detected matches from preverred data sources
	// provided by user.
	PreferredResults []*ResultData `json:"preferredResults,omitempty"`

	// DataSourcesNum is a number of data sources that matched an
	// input name-string.
	DataSourcesNum int `json:"dataSourcesNum"`

	// CurationLevel estimates reliability of matched data sources. If
	// matches are returned by at least one manually curated data source, or by
	// automatically curated data source, or only by sources that are not
	// significantly manually curated.
	CurationLevel `json:"-"`

	// CurationLevelString is a string representation of CurationLevel
	CurationLevelString string `json:"curationLevel"`

	// Error provides an error message, if any. If error is not empty, the match
	// failed because of a bug in the service.
	Error string `json:"error"`
}

Verification is a result returned by Verify method.

type Verifier

type Verifier interface {
	// Verify takes names-strings and options and returns verification result.
	Verify(VerifyParams) []*Verification

	// DataSources takes data-source id and opts and returns the data-source
	// metadata.  If no id is provided, it returns metadata for all data-sources.
	DataSources(DataSourcesOpts) []*DataSource
}

type VerifyParams

type VerifyParams struct {
	// NameStrings is a list of name-strings to verify.
	NameStrings []string `json:"nameStrings"`
	// PreferredSources contain DataSources IDs whos matches will be returned
	// becides the best result. See PreferredResults field in Verirication.
	PreferredSources []int `json:"preferredSources"`
	// WithVernaculars indicates if corresponding vernacular results will be
	// returned as well.
	WithVernaculars bool `json:"withVernaculars"`
}

VerifyParams are options/parameters for the Verify method.

type Vernacular

type Vernacular struct {
	Name string `json:"name"`
	// Language of the name, hopefully in ISO form.
	Language string `json:"language,omitempty"`
	// Locality is geographic places where the name is used.
	Locality string `json:"locality,omitempty"`
}

Vernacular name

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL