dwca

package
v0.5.11 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 2, 2026 License: MIT Imports: 8 Imported by: 2

Documentation

Index

Constants

View Source
const (
	DwcNS  = "http://rs.tdwg.org/dwc/terms/"
	DcNS   = "http://purl.org/dc/terms/"
	GbifNS = "http://rs.gbif.org/terms/1.0/"
)

DwC / DC / GBIF namespace prefixes.

View Source
const (
	TaxonRowType          = DwcNS + "Taxon"
	VernacularNameRowType = GbifNS + "VernacularName"
	DistributionRowType   = GbifNS + "Distribution"
)

Standard row-type URIs.

Variables

View Source
var CoreTerms = []DwcTerm{
	{"taxonID", DwcNS + "taxonID"},
	{"parentNameUsageID", DwcNS + "parentNameUsageID"},
	{"acceptedNameUsageID", DwcNS + "acceptedNameUsageID"},
	{"originalNameUsageID", DwcNS + "originalNameUsageID"},
	{"taxonomicStatus", DwcNS + "taxonomicStatus"},
	{"taxonRank", DwcNS + "taxonRank"},
	{"scientificName", DwcNS + "scientificName"},
	{"scientificNameAuthorship", DwcNS + "scientificNameAuthorship"},
	{"genericName", DwcNS + "genericName"},
	{"infragenericEpithet", DwcNS + "infragenericEpithet"},
	{"specificEpithet", DwcNS + "specificEpithet"},
	{"infraspecificEpithet", DwcNS + "infraspecificEpithet"},
	{"cultivarEpithet", DwcNS + "cultivarEpithet"},
	{"nomenclaturalCode", DwcNS + "nomenclaturalCode"},
	{"nomenclaturalStatus", DwcNS + "nomenclaturalStatus"},
	{"realm", DwcNS + "realm"},
	{"kingdom", DwcNS + "kingdom"},
	{"phylum", DwcNS + "phylum"},
	{"subphylum", DwcNS + "subphylum"},
	{"class", DwcNS + "class"},
	{"subclass", DwcNS + "subclass"},
	{"order", DwcNS + "order"},
	{"suborder", DwcNS + "suborder"},
	{"superfamily", DwcNS + "superfamily"},
	{"family", DwcNS + "family"},
	{"subfamily", DwcNS + "subfamily"},
	{"tribe", DwcNS + "tribe"},
	{"subtribe", DwcNS + "subtribe"},
	{"genus", DwcNS + "genus"},
	{"subgenus", DwcNS + "subgenus"},
	{"section", DwcNS + "section"},
	{"species", DwcNS + "species"},
	{"taxonRemarks", DwcNS + "taxonRemarks"},
	{"references", DcNS + "references"},
	{"modified", DcNS + "modified"},
}

CoreTerms defines the ordered fields for the Taxon core file. Based on the Catalogue of Life DwCA (col.xml).

View Source
var DistributionTerms = []DwcTerm{
	{"taxonID", DwcNS + "taxonID"},
	{"occurrenceStatus", DwcNS + "occurrenceStatus"},
	{"locationID", DwcNS + "locationID"},
	{"locality", DwcNS + "locality"},
	{"countryCode", DwcNS + "countryCode"},
	{"source", DcNS + "source"},
}

DistributionTerms defines the ordered fields for the Distribution extension file.

View Source
var VernacularTerms = []DwcTerm{
	{"taxonID", DwcNS + "taxonID"},
	{"vernacularName", DwcNS + "vernacularName"},
	{"language", DcNS + "language"},
	{"countryCode", DwcNS + "countryCode"},
	{"source", DcNS + "source"},
}

VernacularTerms defines the ordered fields for the VernacularName extension file.

Functions

func TermHeaders

func TermHeaders(terms []DwcTerm) []string

TermHeaders returns just the header names from a slice of DwcTerms.

Types

type Abstract

type Abstract struct {
	Para string `xml:"para"`
}

type Address

type Address struct {
	Country    string `xml:"country,omitempty"`
	City       string `xml:"city,omitempty"`
	PostalCode string `xml:"postalCode,omitempty"`
}

type AltID

type AltID struct {
	XMLName xml.Name `xml:"alternateIdentifier"`
	System  string   `xml:"system,attr,omitempty"`
	Value   string   `xml:",chardata"`
}

type Archive

type Archive interface {
	arch.Packager
	Reader
	Writer
}

Archive combines reading and writing capabilities for DwCA archives.

type AssociatedParty

type AssociatedParty struct {
	IndividualName   *IndividualName   `xml:"individualName"`
	OrganizationName *OrganizationName `xml:"organizationName"`
	Address          *Address          `xml:"address,omitempty"`
	Roles            []string          `xml:"role"`
}

type Attr

type Attr struct {
	Encoding           string  `xml:"encoding,attr"`
	FieldsTerminatedBy string  `xml:"fieldsTerminatedBy,attr"`
	LinesTerminatedBy  string  `xml:"linesTerminatedBy,attr"`
	FieldsEnclosedBy   string  `xml:"fieldsEnclosedBy,attr"`
	IgnoreHeaderLines  string  `xml:"ignoreHeaderLines,attr"`
	RowType            string  `xml:"rowType,attr"`
	Files              Files   `xml:"files"`
	Fields             []Field `xml:"field"`
}

Attr holds the common fields for Core and Extension.

type BoundingCoordinates

type BoundingCoordinates struct {
	WestBoundingCoordinate  float64 `xml:"westBoundingCoordinate"`
	EastBoundingCoordinate  float64 `xml:"eastBoundingCoordinate"`
	SouthBoundingCoordinate float64 `xml:"southBoundingCoordinate"`
	NorthBoundingCoordinate float64 `xml:"northBoundingCoordinate"`
}

type CalendarDate

type CalendarDate struct {
	Value string `xml:",chardata"`
}

type Contact

type Contact struct {
	IndividualName        *IndividualName   `xml:"individualName"`
	OrganizationName      *OrganizationName `xml:"organizationName"`
	Address               *Address          `xml:"address,omitempty"`
	ElectronicMailAddress string            `xml:"electronicMailAddress"`
}

type Core

type Core struct {
	ID ID `xml:"id"`
	*Attr
}

Core includes CommonElement and any core-specific fields (like ID).

type CoreData

type CoreData struct {
	// Index is the field index of the Core's Term.
	Index int

	// Term is the field name of the main Core Data (Topic).
	Term string

	// TermFull is the URI of the main Core Data (Topic).
	TermFull string

	// Locations is the locations of the Core files.
	Locations []string

	// FieldsData is a map of field Terms to their FieldData.
	FieldsData map[string]FieldData

	// FieldsIdx is a map of field indices to their FieldData.
	FieldsIdx map[int]FieldData
}

CoreData is a simplified version of Core data of the DwCA archive.

type CoreID

type CoreID struct {
	Index string `xml:"index,attr"`
	Idx   int    `xml:"-"`
}

CoreID holds the fields for the CoreID data.

type Coverage

type Coverage struct {
	GeographicCoverage *GeographicCoverage `xml:"geographicCoverage"`
	TemporalCoverage   *TemporalCoverage   `xml:"temporalCoverage"`
}

type Creator

type Creator struct {
	ID                    string `xml:"id,attr"`
	Scope                 string `xml:"scope,attr,omitempty"`
	IndividualName        *IndividualName
	OrganizationName      *OrganizationName `xml:"organizationName"`
	ElectronicMailAddress string            `xml:"electronicMailAddress"`
}

type Dataset

type Dataset struct {
	ID                    string              `xml:"id,attr"`
	AlternativeIdentifier AltID               `xml:"alternateIdentifier"`
	Title                 string              `xml:"title"`
	Creators              []Creator           `xml:"creator"`
	MetadataProviders     []MetadataProvider  `xml:"metadataProvider"`
	AssociatedParties     []AssociatedParty   `xml:"associatedParty"`
	PubDate               string              `xml:"pubDate"`
	Language              string              `xml:"language"`
	Abstract              Abstract            `xml:"abstract"`
	KeywodSets            []KeywordSet        `xml:"keywordSet"`
	IntellectualRights    *IntellectualRights `xml:"intellectualRights"`
	Coverage              *Coverage           `xml:"coverage"`
	Contacts              []Contact           `xml:"contact"`
}

type DwcTerm

type DwcTerm struct {
	Name string // CSV column header (e.g. "taxonID")
	URI  string // full term URI for meta.xml
}

DwcTerm pairs a short CSV header name with its full term URI.

type EML

type EML struct {
	XMLName        xml.Name `xml:"eml"`
	Lang           string   `xml:"xml:lang,attr"`
	SchemaLocation string   `xml:"xsi:schemaLocation,attr"`
	Dataset        Dataset  `xml:"dataset"`
}

type Extension

type Extension struct {
	CoreID CoreID `xml:"coreid"`
	*Attr
}

Extension includes CommonElement and any extension-specific fields (like CoreID).

type ExtensionData

type ExtensionData struct {
	// CoreIndex is the index of the Core main field in the Extension.
	// It allows to create a star schema of the DwCA archive.
	CoreIndex int

	// Locations is the locations of the Extension files.
	Locations []string

	// FieldsData is a map of field Terms to their FieldData.
	FieldsData map[string]FieldData

	// FieldsIdx is a map of field indices to their FieldData.
	FieldsIdx map[int]FieldData
}

ExtensionData is a simplified version of Extensions data of the DwCA.

type Field

type Field struct {
	// Index is the verbatim index of the field.
	Index string `xml:"index,attr"`

	// Idx is the int version of Index.
	Idx int `xml:"-"`

	// Term is the URI of the term.
	Term string `xml:"term,attr"`
}

Field holds the fields of the data.

type FieldData

type FieldData struct {
	// Index is the index of the field in the DwCA archive.
	Index int

	// Term is the field name of the field.
	Term string

	// TermFull is the URI of the field.
	TermFull string
}

FieldData is a simplified version of a field in the DwCA archive.

type Files

type Files struct {
	// Locations provides path to a file.
	Locations []string `xml:"location"`
}

Files holds the location of files.

type GeographicCoverage

type GeographicCoverage struct {
	GeographicDescription string               `xml:"geographicDescription,omitempty"`
	BoundingCoordinates   *BoundingCoordinates `xml:"boundingCoordinates,omitempty"`
}

type ID

type ID struct {
	Index string `xml:"index,attr"`
	Idx   int    `xml:"-"`
	Term  string `xml:"term,attr,omitempty"`
}

ID holds the fields for the Core ID.

type IndividualName

type IndividualName struct {
	XMLName xml.Name `xml:"individualName"`

	GivenName string `xml:"givenName"`
	SurName   string `xml:"surName"`
}

type IntellectualRights

type IntellectualRights struct {
	Para string `xml:"para"`
}

type Keyword

type Keyword struct {
	Value string `xml:",chardata"`
}

type KeywordSet

type KeywordSet struct {
	Keywords []Keyword `xml:"keyword"`
}

type Meta

type Meta struct {
	XMLName     xml.Name     `xml:"archive"`
	XMLNameStar xml.Name     `xml:"starArchive"`
	EMLFile     string       `xml:"metadata,attr"`
	Core        *Core        `xml:"core"`
	Extensions  []*Extension `xml:"extension"`
}

func BuildMeta

func BuildMeta(hasVern, hasDistr bool) *Meta

BuildMeta constructs a Meta struct describing the archive contents. The core (Taxon) is always included. Vernacular and Distribution extensions are included only when the corresponding flag is true.

func (*Meta) Simplify

func (m *Meta) Simplify() *MetaSimple

type MetaSimple

type MetaSimple struct {
	// CoreData is a simplified version of Core data of the DwCA archive.
	CoreData

	// ExtensionsData is a simplified version of Extensions data of the DwCA.
	ExtensionsData map[string]ExtensionData
}

MetaSimple is a simplifiec version of Meta object, that is used to access metadata fields by their names or indices.

type MetadataProvider

type MetadataProvider struct {
	IndividualName        *IndividualName   `xml:"individualName"`
	OrganizationName      *OrganizationName `xml:"organizationName"`
	Address               *Address          `xml:"address,omitempty"`
	ElectronicMailAddress string            `xml:"electronicMailAddress"`
	OnlineURL             string            `xml:"onlineUrl,omitempty"`
}

type OrganizationName

type OrganizationName struct {
	XMLName xml.Name `xml:"organizationName"`
	Value   string   `xml:",chardata"`
}

type Reader

type Reader interface {
	// Meta returns a pointer to the archive's Meta object.
	Meta() *Meta
	// EML returns a pointer to the archive's provenance EML object.
	EML() *EML
	// Diagnostics returns pointer to diagnostics that did run during
	// loading DwCA data.
	Diagnostics() *diagn.Diagnostics
	// LoadCore reads the content of core file and converts it to
	// coldp.NameUsage and coldp.Reference objects.
	LoadCore(
		ctx context.Context,
		ch chan<- coldp.Data,
	) error
	// LoadVernacular reads the content of vernacular names file and converts
	// rows to coldp.Vernacular objects.
	LoadVernacular(
		ctx context.Context,
		idx int,
		ch chan<- []coldp.Vernacular,
	) error
	// LoadDistribution reads the content of distribution file and
	// saves results to coldp.Distribution objects.
	LoadDistribution(
		ctx context.Context,
		idx int,
		ch chan<- coldp.Data,
	) error
	// CoreSlice takes offset and number of rows of the core file, and returns
	// the requested data. It returns an error in case if something went wrong.
	CoreSlice(offset, limit int) ([][]string, error)
	// CoreStream fills up a channel with data from the core file. It returns
	// the number of read rows, or an error if something went wrong.
	CoreStream(
		ctx context.Context,
		chCore chan<- []string,
	) (int, error)
	// ExtensionSlice takes index of the extension, offset and number of rows.
	// It returns the corresponding rows from the extension file. In case if
	// something went wrong it returns an error.
	ExtensionSlice(index, offset, limit int) ([][]string, error)
	// ExtensionStream feeds rows of an extension to the provided channel. It
	// returns error if something went wrong.
	ExtensionStream(
		ctx context.Context,
		index int,
		ch chan<- []string,
	) (int, error)
}

Reader groups all methods for consuming an existing DwCA.

type TemporalCoverage

type TemporalCoverage struct {
	BeginDate CalendarDate `xml:"beginDate"`
	EndDate   CalendarDate `xml:"endDate"`
}

type Writer

type Writer interface {
	// WriteMeta marshals the Meta struct to meta.xml on disk.
	WriteMeta(*Meta) error
	// WriteEML converts coldp.Meta to EML and writes eml.xml on disk.
	// If meta is nil or has no title, a synthetic EML is generated.
	WriteEML(*coldp.Meta) error
	// WriteCore writes core taxon rows from a NameUsage channel
	// to Taxon.tsv on disk.
	WriteCore(ctx context.Context, ch <-chan coldp.NameUsage) error
	// WriteVernaculars writes vernacular name extension rows
	// to VernacularName.tsv on disk.
	WriteVernaculars(
		ctx context.Context,
		ch <-chan coldp.Vernacular,
	) error
	// WriteDistributions writes distribution extension rows
	// to Distribution.tsv on disk.
	WriteDistributions(
		ctx context.Context,
		ch <-chan coldp.Distribution,
	) error
}

Writer groups all methods for creating a new DwCA. Every method writes its output to files inside the archive's rootDir (set by Create). Export then only packages those cached files into a zip.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL