datahub

package
v0.13.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 6, 2026 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Overview

Package datahub provides a DataHub implementation of the semantic provider.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Adapter

type Adapter struct {
	// contains filtered or unexported fields
}

Adapter implements semantic.Provider using DataHub.

func New

func New(cfg Config) (*Adapter, error)

New creates a new DataHub adapter with a real client.

func NewWithClient

func NewWithClient(cfg Config, client Client) (*Adapter, error)

NewWithClient creates a new DataHub adapter with a provided client (for testing).

func (*Adapter) BuildURN

func (a *Adapter) BuildURN(_ context.Context, table semantic.TableIdentifier) (string, error)

BuildURN creates a URN from a table identifier.

func (*Adapter) Close

func (a *Adapter) Close() error

Close releases resources.

func (*Adapter) GetColumnContext

func (a *Adapter) GetColumnContext(ctx context.Context, column semantic.ColumnIdentifier) (*semantic.ColumnContext, error)

GetColumnContext retrieves column context from DataHub.

func (*Adapter) GetColumnsContext

func (a *Adapter) GetColumnsContext(ctx context.Context, table semantic.TableIdentifier) (map[string]*semantic.ColumnContext, error)

GetColumnsContext retrieves all columns context from DataHub. When lineage is enabled, it inherits metadata from upstream datasets for undocumented columns.

func (*Adapter) GetGlossaryTerm

func (a *Adapter) GetGlossaryTerm(ctx context.Context, urn string) (*semantic.GlossaryTerm, error)

GetGlossaryTerm retrieves a glossary term from DataHub.

func (*Adapter) GetLineage

func (a *Adapter) GetLineage(ctx context.Context, table semantic.TableIdentifier, direction semantic.LineageDirection, maxDepth int) (*semantic.LineageInfo, error)

GetLineage retrieves lineage from DataHub.

func (*Adapter) GetTableContext

func (a *Adapter) GetTableContext(ctx context.Context, table semantic.TableIdentifier) (*semantic.TableContext, error)

GetTableContext retrieves table context from DataHub.

func (*Adapter) LineageConfig added in v0.8.1

func (a *Adapter) LineageConfig() LineageConfig

LineageConfig returns the lineage configuration. This allows verifying that configuration was wired correctly.

func (*Adapter) Name

func (a *Adapter) Name() string

Name returns the provider name.

func (*Adapter) ResolveURN

func (a *Adapter) ResolveURN(_ context.Context, urn string) (*semantic.TableIdentifier, error)

ResolveURN converts a DataHub URN to a table identifier.

func (*Adapter) SearchTables

func (a *Adapter) SearchTables(ctx context.Context, filter semantic.SearchFilter) ([]semantic.TableSearchResult, error)

SearchTables searches for tables in DataHub.

type AliasConfig added in v0.8.0

type AliasConfig struct {
	// Source is the fully-qualified source table name.
	Source string `yaml:"source"`

	// Targets are glob patterns matching target table names.
	Targets []string `yaml:"targets"`

	// ColumnMapping provides explicit column name mappings.
	// Key: target column, Value: source column
	ColumnMapping map[string]string `yaml:"column_mapping,omitempty"`
}

AliasConfig defines an explicit source-target relationship.

type Client

type Client interface {
	Search(ctx context.Context, query string, opts ...dhclient.SearchOption) (*types.SearchResult, error)
	GetEntity(ctx context.Context, urn string) (*types.Entity, error)
	GetSchema(ctx context.Context, urn string) (*types.SchemaMetadata, error)
	GetSchemas(ctx context.Context, urns []string) (map[string]*types.SchemaMetadata, error)
	GetLineage(ctx context.Context, urn string, opts ...dhclient.LineageOption) (*types.LineageResult, error)
	GetColumnLineage(ctx context.Context, urn string) (*types.ColumnLineage, error)
	GetGlossaryTerm(ctx context.Context, urn string) (*types.GlossaryTerm, error)
	Ping(ctx context.Context) error
	Close() error
}

Client defines the interface for DataHub operations. This allows for mocking in tests.

type ColumnTransformConfig added in v0.8.0

type ColumnTransformConfig struct {
	// TargetPattern is a glob pattern matching target dataset names.
	TargetPattern string `yaml:"target_pattern"`

	// StripPrefix removes this prefix from target column names.
	StripPrefix string `yaml:"strip_prefix,omitempty"`

	// StripSuffix removes this suffix from target column names.
	StripSuffix string `yaml:"strip_suffix,omitempty"`
}

ColumnTransformConfig defines a path normalization rule.

type Config

type Config struct {
	URL      string
	Token    string
	Platform string // Default platform for URN building (e.g., "trino", "postgres")
	Timeout  time.Duration
	Debug    bool // Enable debug logging

	// CatalogMapping maps query engine catalog names to metadata catalog names.
	// For example: {"rdbms": "warehouse"} means the Trino "rdbms" catalog
	// corresponds to the "warehouse" catalog in DataHub URNs.
	CatalogMapping map[string]string

	// Lineage configuration for inheritance-aware column resolution.
	Lineage LineageConfig
}

Config holds DataHub adapter configuration.

type LineageConfig added in v0.8.0

type LineageConfig struct {
	// Enabled activates lineage traversal for missing documentation.
	Enabled bool `yaml:"enabled"`

	// MaxHops limits upstream traversal depth. Range: 1-5. Default: 2.
	MaxHops int `yaml:"max_hops"`

	// Inherit specifies which metadata types to inherit.
	// Valid: "glossary_terms", "descriptions", "tags"
	Inherit []string `yaml:"inherit"`

	// ConflictResolution determines behavior when multiple upstreams
	// define metadata for the same column.
	// Values: "nearest" (closest upstream wins), "all" (merge), "skip" (no inheritance on conflict)
	ConflictResolution string `yaml:"conflict_resolution"`

	// PreferColumnLineage uses DataHub's column-level lineage edges when available.
	PreferColumnLineage bool `yaml:"prefer_column_lineage"`

	// ColumnTransforms defines path normalization rules.
	ColumnTransforms []ColumnTransformConfig `yaml:"column_transforms"`

	// Aliases defines explicit source-target mappings that bypass lineage lookup.
	Aliases []AliasConfig `yaml:"aliases"`

	// CacheTTL for lineage graphs.
	CacheTTL time.Duration `yaml:"cache_ttl"`

	// Timeout for the entire inheritance operation.
	Timeout time.Duration `yaml:"timeout"`
}

LineageConfig controls lineage-aware semantic enrichment.

func DefaultLineageConfig added in v0.8.0

func DefaultLineageConfig() LineageConfig

DefaultLineageConfig returns sensible defaults.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL