Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func SetCollation ¶
Types ¶
type Abbr ¶
type Abbr struct {
Abbr string `gorm:"type:varchar(10);primary_key"`
}
Abbr contains all abbreviations strings generated from titles.
type AbbrTitle ¶
type AbbrTitle struct {
Abbr string `gorm:"type:varchar(10);primary_key"`
TitleID uint `gorm:"primary_key"`
}
AbbrTitle maps abbreviations to title IDs.
type ColBhlRef ¶
type ColBhlRef struct {
// ColNameID matches ID from ColName
ColNameID uint `gorm:"index:col_name_id"`
// RecordID is the Catalogue of Life identifier of a name-string.
RecordID string `gorm:"type:varchar(100);index:record_id_bhl"`
// MatchedName is a scientific name match from the reference's text.
MatchedName string `gorm:"type:varchar(255);collate:C.UTF-8;not null"`
// ItemID is automatically generated identifier from BHL database.
// It corresponds to ID field in Item.
ItemID uint
// PartID is an automatically generated identifier from BHL database.
PartID uint
// PageID is the identifier autogenerated by BHL database.
PageID uint
// RefMatchQuality is the probability that a reference is 'real'
// 0 - nothing is found
// 1 - 15% (Odds > 0.01)
// 2 - 50% (Odds > 0.1)
// 3 - 80% (Odds > 1)
// 4 - 98% (Odds > 10)
RefMatchQuality int
// ScoreOdds calculated by Naive Bayes algorithm. We consider odds from 0.01 and
// higher.
// Here are the Odds of the best result.
Odds float64
}
type ColBhlResult ¶
type ColBhlResult struct {
// ColNameID matches ID from ColName
ColNameID uint `gorm:"index:col_name_id"`
// RecordID is the Catalogue of Life identifier of a name-string.
RecordID string `gorm:"type:varchar(100);index:record_id_bhl"`
// Result contains serialized version of the nomenclatural event
// search result.
Result []byte
}
type ColName ¶
type ColName struct {
// ID is automatically generated.
ID uint `gorm:"primary_key"`
// RecordID is the Catalogue of Life identifier of a name-string.
RecordID string `gorm:"type:varchar(100);primary_key;auto_increment:false"`
// Name is the verbatim name-string from the CoL.
Name string `gorm:"type:varchar(500);not null"`
// Ref is a nomenclatural reference from Catalogue of Life.
Ref string
// Kingdom is a kingdom name of the record.
Kingdom string `gorm:"type:varchar(100)"`
// Phylum is a phylum name of the record.
Phylum string `gorm:"type:varchar(100)"`
// Class is a class name of the record.
Class string `gorm:"type:varchar(100)"`
// Ordr is a order name of the record.
Ordr string `gorm:"type:varchar(100)"`
// Family is a family name of the record.
Family string `gorm:"type:varchar(100);index"`
// Genus is a genus name of the record.
Genus string `gorm:"type:varchar(100);index"`
// CanonicalSimple is a canonical form without hybrid signs, ranks etc.
CanonicalSimple string `gorm:"type:varchar(255);index:canonical_simple;not null"`
// CanonicalStem is a canonical form after removal of suffixes and
// substitution of some characters.
CanonicalStem string `gorm:"type:varchar(255);index:canonical_stem;not null"`
}
ColName contains
type Item ¶
type Item struct {
// ID is the identifier autogenerated by BHL database.
ID uint `gorm:"primary_key;auto_increment:false"`
// Identifier generated by Internet Archive for the Item.
BarCode string `gorm:"type:varchar(100);unique_index;not null"`
// Vol contains not normalized volume field from BHl database.
Vol string `gorm:"type:varchar(100);not null;default:''"`
// YearStart contains the earliest year of publication. For journal volume
// it would be a publication of the first journal issue, for a book it
// would be the date of publication.
YearStart sql.NullInt32
// YearEnd contains the latest year of publication. The field is often
// empty, if the Item was published at once.
YearEnd sql.NullInt32
// TitleID contains automatically generated id for the parent title of the
// item.
TitleID uint `gorm:"not null"`
// TitleDOI is the DOI of an item.
TitleDOI string `gorm:"type:varchar(100);not null;default:''"`
// TitleName is the name of a journal or a book.
TitleName string `gorm:"type:varchar(255);not null;default:''"`
// TitleAbbr1 is an acronym of a title where the first letter of each
// word is used.
TitleAbbr1 string `gorm:"type:varchar(10);not null;default:''"`
// TitleAbbr2 is an acronym of a title where 'common' words like 'and'
// 'the' etc. are ommitted.
TitleAbbr2 string `gorm:"type:varchar(10);not null;default:''"`
// TitleYearStart the first year when a title was published.
TitleYearStart sql.NullInt32
// TitleYearEnd is the last year when a title was published.
TitleYearEnd sql.NullInt32
// TitleLang is the most prevalent language of a title.
TitleLang string `gorm:"type:varchar(20);not null;default:''"`
}
Item is a physical entity digitized and aggregated by Internet Archive and BHL. It can be a volume of a journal, a book etc.
type ItemStats ¶
type ItemStats struct {
// ID is the Item identifier autogenerated by BHL database.
ID uint `gorm:"primary_key;auto_increment:false"`
// NamesTotal is the number of unique names in the item (rank genus and
// lower) verified by the Catalogue of Life and used in statistics
// calculations.
NamesTotal uint `gorm:"not null"`
// MainTaxon is the taxon containing more that 50% of all taxa in the item.
MainTaxon string `gorm:"type:varchar(100);index:main_taxon;not null;default:''"`
// MainTaxonRank is the rank of the MainTaxon.
MainTaxonRank string `gorm:"type:varchar(100);not null;default:''"`
// MainTaxonPercent is the percentage of taxa belonging to the MainTaxon.
MainTaxonPercent uint
// MainKingdom is the kingdom that contains most of the taxa in the item.
MainKingdom string `gorm:"type:varchar(100);not null;default:''"`
// MainKingdomPercent is the percentage of taxa associated with the
// MainKingdom.
MainKingdomPercent uint
// AnimaliaNum is the number of unique names in the item associated with
// Animalia by the Catalogue of Life.
AnimaliaNum uint `gorm:"not null"`
// PlantaeNum is the number of unique names in the item associated with
// Plantae by the Catalogue of Life.
PlantaeNum uint `gorm:"not null"`
// FungiNum is the number of unique names in the item associated with Fungi
// by the Catalogue of Life.
FungiNum uint `gorm:"not null"`
// BacteriaNum is the number of unique names in the item associated with
// Bacteria by the Catalogue of Life.
BacteriaNum uint `gorm:"not null"`
// MainPhylum is the phylum that contains most of the taxa in the item.
MainPhylum string `gorm:"type:varchar(100);not null;default:''"`
// MainPhylumPercent is the percentage of taxa associated with the
// MainPhylum.
MainPhylumPercent uint
// MainClass is the class that contains most of the taxa in the item.
MainClass string `gorm:"type:varchar(100);not null;default:''"`
// MainClassPercent is the percentage of taxa associated with the
// MainClass.
MainClassPercent uint
// MainOrder is the order that contains most of the taxa in the item.
MainOrder string `gorm:"type:varchar(100);not null;default:''"`
// MainOrderPercent is the percentage of taxa associated with the
// MainOrder.
MainOrderPercent uint
// MainFamily is the family that contains most of the taxa in the item.
MainFamily string `gorm:"type:varchar(100);not null;default:''"`
// MainFamilyPercent is the percentage of taxa associated with the
// MainFamily.
MainFamilyPercent uint
// MainGenus is the family that contains most of the taxa in the item.
MainGenus string `gorm:"type:varchar(100);not null;default:''"`
// MainGenusPercent is the percentage of taxa associated with the
// MainGenus.
MainGenusPercent uint
}
ItemStats contains taxonomical statistics for items.
type NameOccurrence ¶
type NameOccurrence struct {
// NameStringID corresponds to ID field in NameString.
// It is UUID v5 generated from the normalized version of
// a detected name.
NameStringID string `gorm:"type:uuid;index:name_string"`
// PageID corresponds to ID field in Page. It is a number automatically
// generated by BHL database.
PageID uint
// OffsetStart is the starting position of a detected name on the page.
// It is calculated using UTF-8 characters.
OffsetStart uint
// OffsetEnd is the ending position of a detected name on the page.
// It is calculated using UTF-8 characters.
OffsetEnd uint
// OddsLog10 is a logarithm with base 10 of odds that a detected string is
// actually a scientific name according to a Naive Bayes algorithm.
OddsLog10 float64
// AnnotNomen is a normalized nomenclatural annotation detected in a vicinity
// of the occurrence. Examples of annotations are `NO_ANNOT`, `SP_NOV` etc.
AnnotNomen string `gorm:"type:varchar(50);index:annot"`
}
NameOccurrence is the occurrence of a name-string in BHL.
type NameString ¶
type NameString struct {
// ID is UUID v5 generated from the Name field. There is always
// 1:1 relationship between Name and ID.
ID string `gorm:"type:uuid;primary_key"`
// Name is the normalized version of detected in BHL name.
Name string `gorm:"type:varchar(255);not null"`
// RecordID is the Catalogue of Life identifier of a matched taxon.
RecordID string `gorm:"type:varchar(100);index:record_id"`
// MatchType describes a resulting kind of a name-string match.
// The following match types are possible:
//
// NoMatch - GNverifier did not find a match for the name-string.
// Exact - Canonical form of a name matched exactly
// PartialExact - Canonical form matched exactly after removal of some words.
// Fuzzy - Canonical form matched, but with some differences.
// PartialFuzzy - Canonical form matched with differences after removal of some words.
// Virus - Name-string matched as a virus name.
MatchType string `gorm:"type:varchar(100)"`
// MatchSortOrder is used when verification has not only the best result, but
// all results. The best match always has MatchSortOrder = 0, the higher the
// number the less quality is assigned to the match.
MatchSortOrder int
// EditDistance shows how much difference exists between name-string and a
// match according to Levenshtein algorithm.
EditDistance uint
// StemEditDistance shows how much difference exists between name-string and
// a match according to Levenshtein algorithm.
StemEditDistance uint
// MatchedName provides the complete complete name-string.
MatchedName string `gorm:"type:varchar(255)"`
// MatchedCanonical provides canonical form of the matched name-string.
MatchedCanonical string `gorm:"type:varchar(255);index:canonical;not null"`
// CurrentName is the full currently accepted name of the match
// provided by the DataSource.
CurrentName string `gorm:"type:varchar(255)"`
// CurrentCanonical is a canonical form of the currently accepted name of
// the match.
CurrentCanonical string `gorm:"type:varchar(255);index:current_canonical;not null"`
// Classification contains a classification to the name provided by the
// Catalogue of Life.
Classification string
// ClassificationRanks provides ranks information for classification path.
ClassificationRanks string
// ClassificationIDs provides RankIDs for classification path.
ClassificationIDs string
// DataSourceID is the ID of the data-source according to GNverifier.
// The mapping of IDs to data-sources can be found at
// https://verifier.globalnames.org/data_sources
// site. In this case it should always be 1.
DataSourceId sql.NullInt32
// DataSourceTitle provides a title of the data-source that matched the
// name-string. In this case it should always be `The Catalogue of Life`.
DataSourceTitle string `gorm:"type:varchar(255)"`
// DataSourcesNumber is the number of dataSources that matched the name.
DataSourcesNumber uint
// Curation provides information about a level of curation according to
// GNverifier. The following categories are supported:
//
// NotCurated -- None of data-sources that matched a name-string are marked as curated.
// Curated -- Some data-sources with a match are marked as curated.
// AutoCurated -- Some data-sources have automatic quality control, but not much human curation.
Curation bool `gorm:"index:curation"`
// Occurrences is the number of times this name appeared in BHL texts.
Occurences uint
// OddsLog10 is a logarithm with base 10 of odds that a detected string is
// actually a scientific name according to a Naive Bayes algorithm.
OddsLog10 float32
// Error contains error that happened during verification. If this field
// is empty then verification was completed successfully for the name-string.
Error string `gorm:"type:varchar(255)"`
}
NameString is a unique normalize name-string that had been matched, at least partially to the Catalogue of Life.
type Page ¶
type Page struct {
// ID is the identifier autogenerated by BHL database.
ID uint `gorm:"primary_key;auto_increment:false"`
// ItemID is automatically generated identifier from BHL database.
// It corresponds to ID field in Item.
ItemID uint `gorm:"index:item;not null"`
// SequenceOrder corresponds to ordered position of a page in an item.
// For example a an item page that is preceded by 4 other pages should
// have SequenceOrder 5.
SequenceOrder uint `gorm:"not null"`
// PageNum corresponds to the page number/label assigned by the publisher
// of the item.
PageNum sql.NullInt64
}
Page contains metadata about a page file from BHL archive.
type PagePart ¶
type PagePart struct {
// PageID is the page identifier provided by BHL database.
PageID uint `gorm:"primary_key;auto_increment:false"`
// PartID is the part identifier provided by BHL database.
PartID uint `gorm:"primary_key;auto_increment:false"`
}
PagePart is a many-to-many relationship between Page and Part.
type Part ¶
type Part struct {
// ID is an automatically generated identifier from BHL database.
ID uint `gorm:"primary_key;auto_increment:false"`
// PageID is an automatically generated identifier for a page. It comes
// from BHL database.
PageID sql.NullInt32
// ItemID is an automatically generated identifier for an item. It comes
// from BHL database.
ItemID sql.NullInt32
// Length is the length of a part in pages.
Length sql.NullInt32
// DOI is a DOI assigned to the part.
DOI string `gorm:"type:varchar(100)"`
// ContributorName is a name of a project/person which provided information
// about a part.
ContributorName string `gorm:"type:varchar(255)"`
// SequenceOrder is a sequencial position of a part in the item. For
// example the second scientific paper in a journal will have a
// the SequenceOrder 2.
SequenceOrder sql.NullInt32
// SegmentType describe a type of a part. For example chapter, article, etc.
SegmentType string `gorm:"type:varchar(100)"`
// Title is the title of the part.
Title string `gorm:"type:text"`
// ContainerTitle is a title of a parent unit (items title?).
ContainerTitle string `gorm:"type:text"`
// PublicationDetails describes information about publisher.
PublicationDetails string `gorm:"type:text"`
// Volume is the volume of a citation.
Volume string `gorm:"type:varchar(100)"`
// Series is series of a citation.
Series string `gorm:"type:varchar(100)"`
// Issue is an issue of a citation.
Issue string `gorm:"type:varchar(100)"`
// Date is the date of the part publication.
Date string `gorm:"type:varchar(100)"`
// Year is the year of a part.
Year sql.NullInt32 `gorm:"index:year"`
// YearEnd is the year when a part finished its publication.
YearEnd sql.NullInt32
// Month is the month when a part was published.
Month sql.NullInt32
// Day is the day when a part was published.
Day sql.NullInt32
// PageNumStart is the page number where a part starts.
PageNumStart sql.NullInt32
// PageNumEnd is the page number where a part ends.
PageNumEnd sql.NullInt32
// Language is the prevalent language of a part.
Language string `gorm:"type:varchar(20)"`
}
Part is a distinct part of an item. It can be a chapter, an article, a scientific paper.
type Title ¶
type Title struct {
// ID is the identifier autogenerated by BHL database.
ID int
// Name is the name of a journal or a book.
Name string
// YearStart contains the earliest year of publication.
YearStart sql.NullInt32
// YearEnd contains the latest year of publication.
YearEnd sql.NullInt32
// Lang is the most prevalent language of a title.
Language string
// DOI is the DOI of a title.
DOI string
}
Title is a journal or a book that contains items. Title instances are transient and are stored in a database together with Item's data.