Documentation
¶
Overview ¶
Package types defines DataHub domain types for the MCP server. These types represent entities, schemas, lineage, and other metadata returned by DataHub's GraphQL API.
Index ¶
- type ColumnLineage
- type ColumnLineageMapping
- type Dashboard
- type DataProduct
- type Dataset
- type Deprecation
- type Domain
- type Entity
- type ForeignKey
- type GlossaryTerm
- type GlossaryTermRelation
- type LineageEdge
- type LineageNode
- type LineageResult
- type MatchedField
- type Owner
- type OwnershipType
- type ParsedURN
- type Pipeline
- type Query
- type QueryList
- type SchemaField
- type SchemaMetadata
- type SearchEntity
- type SearchResult
- type Tag
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ColumnLineage ¶ added in v0.4.0
type ColumnLineage struct {
// DatasetURN is the URN of the dataset this lineage is for.
DatasetURN string `json:"dataset_urn"`
// Mappings contains the column-level lineage mappings.
Mappings []ColumnLineageMapping `json:"mappings"`
}
ColumnLineage represents fine-grained column-level lineage for a dataset.
type ColumnLineageMapping ¶ added in v0.4.0
type ColumnLineageMapping struct {
// DownstreamColumn is the field path in the downstream dataset.
DownstreamColumn string `json:"downstream_column"`
// UpstreamDataset is the URN of the upstream dataset.
UpstreamDataset string `json:"upstream_dataset"`
// UpstreamColumn is the field path in the upstream dataset.
UpstreamColumn string `json:"upstream_column"`
// Transform describes the transformation operation (optional).
// Examples: "IDENTITY", "TRANSFORM", "AGGREGATE"
Transform string `json:"transform,omitempty"`
// Query is the URN of the query that created this lineage (optional).
Query string `json:"query,omitempty"`
// ConfidenceScore indicates the confidence of the lineage mapping (optional).
// Values typically range from 0.0 to 1.0.
// Note: This field is not available in DataHub v1.3.x and will be zero.
ConfidenceScore float64 `json:"confidence_score,omitempty"`
}
ColumnLineageMapping represents a single column lineage relationship. It describes how a downstream column is derived from one or more upstream columns.
type Dashboard ¶
type Dashboard struct {
Entity
// DashboardURL is the URL to the dashboard.
DashboardURL string `json:"dashboard_url,omitempty"`
// Charts lists the charts in this dashboard.
Charts []string `json:"charts,omitempty"`
}
Dashboard represents a DataHub dashboard entity.
type DataProduct ¶
type DataProduct struct {
URN string `json:"urn"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
Domain *Domain `json:"domain,omitempty"`
Owners []Owner `json:"owners,omitempty"`
Assets []string `json:"assets,omitempty"` // URNs of datasets
Properties map[string]string `json:"properties,omitempty"`
}
DataProduct represents a DataHub data product. Data products group datasets for specific business use cases.
type Dataset ¶
type Dataset struct {
Entity
// Schema contains the dataset schema.
Schema *SchemaMetadata `json:"schema,omitempty"`
// SubTypes are the dataset sub-types (table, view, etc.).
SubTypes []string `json:"sub_types,omitempty"`
}
Dataset represents a DataHub dataset entity.
type Deprecation ¶
type Deprecation struct {
// Deprecated: This field indicates if the entity is deprecated.
Deprecated bool `json:"deprecated"`
// Note is the deprecation note.
Note string `json:"note,omitempty"`
// Actor is who deprecated the entity.
Actor string `json:"actor,omitempty"`
// DecommissionTime is when the entity will be decommissioned.
DecommissionTime int64 `json:"decommission_time,omitempty"`
}
Deprecation contains deprecation information.
type Domain ¶
type Domain struct {
// URN is the unique identifier.
URN string `json:"urn"`
// Name is the domain name.
Name string `json:"name"`
// Description is the domain description.
Description string `json:"description,omitempty"`
// ParentDomain is the parent domain URN.
ParentDomain string `json:"parent_domain,omitempty"`
// Owners are the domain owners.
Owners []Owner `json:"owners,omitempty"`
// EntityCount is the number of entities in this domain.
EntityCount int `json:"entity_count,omitempty"`
}
Domain represents a DataHub data domain.
type Entity ¶
type Entity struct {
// URN is the unique identifier for this entity.
URN string `json:"urn"`
// Type is the entity type (dataset, dashboard, dataFlow, etc.).
Type string `json:"type"`
// Name is the display name of the entity.
Name string `json:"name"`
// Description is the business description.
Description string `json:"description,omitempty"`
// Owners lists the owners of this entity.
Owners []Owner `json:"owners,omitempty"`
// Tags lists the tags applied to this entity.
Tags []Tag `json:"tags,omitempty"`
// GlossaryTerms lists the glossary terms associated with this entity.
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
// Domain is the data domain this entity belongs to.
Domain *Domain `json:"domain,omitempty"`
// Platform is the data platform (for datasets).
Platform string `json:"platform,omitempty"`
// SubTypes contains sub-type classifications (e.g., "table", "view" for datasets).
SubTypes []string `json:"sub_types,omitempty"`
// Deprecation contains deprecation info if the entity is deprecated.
Deprecation *Deprecation `json:"deprecation,omitempty"`
// Properties contains additional entity-specific properties.
Properties map[string]any `json:"properties,omitempty"`
// Created is the creation timestamp.
Created int64 `json:"created,omitempty"`
// LastModified is the last modification timestamp.
LastModified int64 `json:"last_modified,omitempty"`
}
Entity represents a DataHub entity with common metadata.
type ForeignKey ¶
type ForeignKey struct {
// Name is the constraint name.
Name string `json:"name,omitempty"`
// SourceFields are the source field paths.
SourceFields []string `json:"source_fields"`
// ForeignDataset is the referenced dataset URN.
ForeignDataset string `json:"foreign_dataset"`
// ForeignFields are the referenced field paths.
ForeignFields []string `json:"foreign_fields"`
}
ForeignKey represents a foreign key relationship.
type GlossaryTerm ¶
type GlossaryTerm struct {
// URN is the unique identifier.
URN string `json:"urn"`
// Name is the term name.
Name string `json:"name"`
// Description is the term definition.
Description string `json:"description,omitempty"`
// ParentNode is the parent glossary node URN.
ParentNode string `json:"parent_node,omitempty"`
// Owners are the term owners.
Owners []Owner `json:"owners,omitempty"`
// RelatedTerms are related glossary terms.
RelatedTerms []GlossaryTermRelation `json:"related_terms,omitempty"`
// Properties contains custom properties.
Properties map[string]string `json:"properties,omitempty"`
}
GlossaryTerm represents a business glossary term.
type GlossaryTermRelation ¶
type GlossaryTermRelation struct {
// URN is the related term URN.
URN string `json:"urn"`
// Name is the related term name.
Name string `json:"name"`
// RelationType is the type of relationship.
RelationType string `json:"relation_type"`
}
GlossaryTermRelation represents a relationship between glossary terms.
type LineageEdge ¶
type LineageEdge struct {
// Source is the source entity URN.
Source string `json:"source"`
// Target is the target entity URN.
Target string `json:"target"`
// Type is the relationship type.
Type string `json:"type,omitempty"`
// Created is when the relationship was created.
Created int64 `json:"created,omitempty"`
// UpdatedBy is who created/updated the relationship.
UpdatedBy string `json:"updated_by,omitempty"`
// Properties contains additional edge properties.
Properties map[string]any `json:"properties,omitempty"`
}
LineageEdge represents a lineage relationship between two entities.
type LineageNode ¶
type LineageNode struct {
// URN is the unique identifier.
URN string `json:"urn"`
// Type is the entity type.
Type string `json:"type"`
// Name is the display name.
Name string `json:"name"`
// Platform is the data platform.
Platform string `json:"platform,omitempty"`
// Description is the entity description.
Description string `json:"description,omitempty"`
// Level is the distance from the start node.
Level int `json:"level"`
}
LineageNode represents an entity in the lineage graph.
type LineageResult ¶
type LineageResult struct {
// Start is the URN of the entity we queried lineage for.
Start string `json:"start"`
// Nodes are the entities in the lineage graph.
Nodes []LineageNode `json:"nodes"`
// Edges are the relationships between nodes.
Edges []LineageEdge `json:"edges"`
// Direction is the lineage direction (UPSTREAM or DOWNSTREAM).
Direction string `json:"direction"`
// Depth is the depth of the lineage traversal.
Depth int `json:"depth"`
}
LineageResult represents the lineage graph for an entity.
type MatchedField ¶
type MatchedField struct {
// Name is the field name.
Name string `json:"name"`
// Value is the matched value.
Value string `json:"value"`
}
MatchedField indicates a field that matched the search query.
type Owner ¶
type Owner struct {
// URN is the owner's URN (corpuser or corpGroup).
URN string `json:"urn"`
// Type is the ownership type.
Type OwnershipType `json:"type"`
// Name is the owner's display name.
Name string `json:"name,omitempty"`
// Email is the owner's email address.
Email string `json:"email,omitempty"`
}
Owner represents an owner of a DataHub entity.
type OwnershipType ¶
type OwnershipType string
OwnershipType represents the type of ownership.
const ( OwnershipTypeTechnicalOwner OwnershipType = "TECHNICAL_OWNER" OwnershipTypeBusinessOwner OwnershipType = "BUSINESS_OWNER" OwnershipTypeDataSteward OwnershipType = "DATA_STEWARD" OwnershipTypeNone OwnershipType = "NONE" )
Ownership type constants.
type ParsedURN ¶
type ParsedURN struct {
// Raw is the original URN string.
Raw string `json:"raw"`
// EntityType is the type of entity (dataset, dashboard, dataFlow, etc.).
EntityType string `json:"entity_type"`
// Platform is the data platform (snowflake, bigquery, postgres, etc.).
Platform string `json:"platform,omitempty"`
// Name is the qualified name of the entity.
Name string `json:"name"`
// Env is the environment (PROD, DEV, etc.).
Env string `json:"env,omitempty"`
}
ParsedURN represents a parsed DataHub URN.
type Pipeline ¶
type Pipeline struct {
Entity
// DataFlow is the parent data flow URN.
DataFlow string `json:"data_flow,omitempty"`
// Inputs are the input dataset URNs.
Inputs []string `json:"inputs,omitempty"`
// Outputs are the output dataset URNs.
Outputs []string `json:"outputs,omitempty"`
}
Pipeline represents a DataHub data pipeline entity.
type Query ¶
type Query struct {
// URN is the query URN.
URN string `json:"urn,omitempty"`
// Name is the query name.
Name string `json:"name,omitempty"`
// Statement is the SQL query text.
Statement string `json:"statement"`
// Description is the query description.
Description string `json:"description,omitempty"`
// Source indicates how the query was created (e.g., "MANUAL", "SYSTEM").
Source string `json:"source,omitempty"`
// CreatedBy is who created the query.
CreatedBy string `json:"created_by,omitempty"`
// Created is when the query was created.
Created int64 `json:"created,omitempty"`
// LastRun is when the query was last executed.
LastRun int64 `json:"last_run,omitempty"`
// RunCount is how many times the query has been run.
RunCount int `json:"run_count,omitempty"`
}
Query represents a SQL query associated with a dataset.
type QueryList ¶
type QueryList struct {
// Queries is the list of queries.
Queries []Query `json:"queries"`
// Total is the total number of queries.
Total int `json:"total"`
}
QueryList represents a list of queries associated with a dataset.
type SchemaField ¶
type SchemaField struct {
// FieldPath is the full path to this field (e.g., "user.address.city").
FieldPath string `json:"field_path"`
// Type is the field's data type.
Type string `json:"type"`
// NativeType is the platform-specific type.
NativeType string `json:"native_type,omitempty"`
// Description is the field description.
Description string `json:"description,omitempty"`
// Nullable indicates if the field can be null.
Nullable bool `json:"nullable"`
// IsPartitionKey indicates if this is a partition key.
IsPartitionKey bool `json:"is_partition_key,omitempty"`
// Tags lists the tags applied to this field.
Tags []Tag `json:"tags,omitempty"`
// GlossaryTerms lists glossary terms for this field.
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
// JSONPath is the JSON path for nested fields.
JSONPath string `json:"json_path,omitempty"`
}
SchemaField represents a field in a dataset schema.
type SchemaMetadata ¶
type SchemaMetadata struct {
// Name is the schema name.
Name string `json:"name,omitempty"`
// PlatformSchema is the platform-specific schema representation.
PlatformSchema string `json:"platform_schema,omitempty"`
// Version is the schema version.
Version int64 `json:"version,omitempty"`
// Fields is the list of schema fields.
Fields []SchemaField `json:"fields"`
// PrimaryKeys lists the primary key field paths.
PrimaryKeys []string `json:"primary_keys,omitempty"`
// ForeignKeys lists foreign key relationships.
ForeignKeys []ForeignKey `json:"foreign_keys,omitempty"`
// Hash is the schema hash for change detection.
Hash string `json:"hash,omitempty"`
}
SchemaMetadata represents the schema of a dataset.
type SearchEntity ¶
type SearchEntity struct {
// URN is the unique identifier.
URN string `json:"urn"`
// Type is the entity type.
Type string `json:"type"`
// Name is the display name.
Name string `json:"name"`
// Description is the entity description.
Description string `json:"description,omitempty"`
// Platform is the data platform.
Platform string `json:"platform,omitempty"`
// Owners are the entity owners.
Owners []Owner `json:"owners,omitempty"`
// Tags are the entity tags.
Tags []Tag `json:"tags,omitempty"`
// Domain is the entity domain.
Domain *Domain `json:"domain,omitempty"`
// MatchedFields shows which fields matched the query.
MatchedFields []MatchedField `json:"matched_fields,omitempty"`
}
SearchEntity represents a single search result entity.
type SearchResult ¶
type SearchResult struct {
// Entities is the list of matching entities.
Entities []SearchEntity `json:"entities"`
// Total is the total number of matches.
Total int `json:"total"`
// Offset is the result offset.
Offset int `json:"offset"`
// Limit is the result limit.
Limit int `json:"limit"`
}
SearchResult represents search results from DataHub.
type Tag ¶
type Tag struct {
// URN is the unique identifier.
URN string `json:"urn"`
// Name is the tag name.
Name string `json:"name"`
// Description is the tag description.
Description string `json:"description,omitempty"`
// Properties contains custom properties.
Properties map[string]string `json:"properties,omitempty"`
}
Tag represents a DataHub tag.