Documentation
¶
Overview ¶
Package datahub provides a DataHub implementation of the semantic provider.
Index ¶
- type Adapter
- func (a *Adapter) BuildURN(_ context.Context, table semantic.TableIdentifier) (string, error)
- func (a *Adapter) Close() error
- func (a *Adapter) GetColumnContext(ctx context.Context, column semantic.ColumnIdentifier) (*semantic.ColumnContext, error)
- func (a *Adapter) GetColumnsContext(ctx context.Context, table semantic.TableIdentifier) (map[string]*semantic.ColumnContext, error)
- func (a *Adapter) GetGlossaryTerm(ctx context.Context, urn string) (*semantic.GlossaryTerm, error)
- func (a *Adapter) GetLineage(ctx context.Context, table semantic.TableIdentifier, ...) (*semantic.LineageInfo, error)
- func (a *Adapter) GetTableContext(ctx context.Context, table semantic.TableIdentifier) (*semantic.TableContext, error)
- func (a *Adapter) LineageConfig() LineageConfig
- func (*Adapter) Name() string
- func (*Adapter) ResolveURN(_ context.Context, urn string) (*semantic.TableIdentifier, error)
- func (a *Adapter) SearchTables(ctx context.Context, filter semantic.SearchFilter) ([]semantic.TableSearchResult, error)
- type AliasConfig
- type Client
- type ColumnTransformConfig
- type Config
- type LineageConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Adapter ¶
type Adapter struct {
// contains filtered or unexported fields
}
Adapter implements semantic.Provider using DataHub.
func NewWithClient ¶
NewWithClient creates a new DataHub adapter with a provided client (for testing).
func (*Adapter) GetColumnContext ¶
func (a *Adapter) GetColumnContext(ctx context.Context, column semantic.ColumnIdentifier) (*semantic.ColumnContext, error)
GetColumnContext retrieves column context from DataHub.
func (*Adapter) GetColumnsContext ¶
func (a *Adapter) GetColumnsContext(ctx context.Context, table semantic.TableIdentifier) (map[string]*semantic.ColumnContext, error)
GetColumnsContext retrieves all columns context from DataHub. When lineage is enabled, it inherits metadata from upstream datasets for undocumented columns.
func (*Adapter) GetGlossaryTerm ¶
GetGlossaryTerm retrieves a glossary term from DataHub.
func (*Adapter) GetLineage ¶
func (a *Adapter) GetLineage(ctx context.Context, table semantic.TableIdentifier, direction semantic.LineageDirection, maxDepth int) (*semantic.LineageInfo, error)
GetLineage retrieves lineage from DataHub.
func (*Adapter) GetTableContext ¶
func (a *Adapter) GetTableContext(ctx context.Context, table semantic.TableIdentifier) (*semantic.TableContext, error)
GetTableContext retrieves table context from DataHub.
func (*Adapter) LineageConfig ¶ added in v0.8.1
func (a *Adapter) LineageConfig() LineageConfig
LineageConfig returns the lineage configuration. This allows verifying that configuration was wired correctly.
func (*Adapter) ResolveURN ¶
ResolveURN converts a DataHub URN to a table identifier.
func (*Adapter) SearchTables ¶
func (a *Adapter) SearchTables(ctx context.Context, filter semantic.SearchFilter) ([]semantic.TableSearchResult, error)
SearchTables searches for tables in DataHub.
type AliasConfig ¶ added in v0.8.0
type AliasConfig struct {
// Source is the fully-qualified source table name.
Source string `yaml:"source"`
// Targets are glob patterns matching target table names.
Targets []string `yaml:"targets"`
// ColumnMapping provides explicit column name mappings.
// Key: target column, Value: source column
ColumnMapping map[string]string `yaml:"column_mapping,omitempty"`
}
AliasConfig defines an explicit source-target relationship.
type Client ¶
type Client interface {
Search(ctx context.Context, query string, opts ...dhclient.SearchOption) (*types.SearchResult, error)
GetEntity(ctx context.Context, urn string) (*types.Entity, error)
GetSchema(ctx context.Context, urn string) (*types.SchemaMetadata, error)
GetSchemas(ctx context.Context, urns []string) (map[string]*types.SchemaMetadata, error)
GetLineage(ctx context.Context, urn string, opts ...dhclient.LineageOption) (*types.LineageResult, error)
GetColumnLineage(ctx context.Context, urn string) (*types.ColumnLineage, error)
GetGlossaryTerm(ctx context.Context, urn string) (*types.GlossaryTerm, error)
Ping(ctx context.Context) error
Close() error
}
Client defines the interface for DataHub operations. This allows for mocking in tests.
type ColumnTransformConfig ¶ added in v0.8.0
type ColumnTransformConfig struct {
// TargetPattern is a glob pattern matching target dataset names.
TargetPattern string `yaml:"target_pattern"`
// StripPrefix removes this prefix from target column names.
StripPrefix string `yaml:"strip_prefix,omitempty"`
// StripSuffix removes this suffix from target column names.
StripSuffix string `yaml:"strip_suffix,omitempty"`
}
ColumnTransformConfig defines a path normalization rule.
type Config ¶
type Config struct {
URL string
Token string
Platform string // Default platform for URN building (e.g., "trino", "postgres")
Timeout time.Duration
Debug bool // Enable debug logging
// CatalogMapping maps query engine catalog names to metadata catalog names.
// For example: {"rdbms": "warehouse"} means the Trino "rdbms" catalog
// corresponds to the "warehouse" catalog in DataHub URNs.
CatalogMapping map[string]string
// Lineage configuration for inheritance-aware column resolution.
Lineage LineageConfig
}
Config holds DataHub adapter configuration.
type LineageConfig ¶ added in v0.8.0
type LineageConfig struct {
// Enabled activates lineage traversal for missing documentation.
Enabled bool `yaml:"enabled"`
// MaxHops limits upstream traversal depth. Range: 1-5. Default: 2.
MaxHops int `yaml:"max_hops"`
// Inherit specifies which metadata types to inherit.
// Valid: "glossary_terms", "descriptions", "tags"
Inherit []string `yaml:"inherit"`
// ConflictResolution determines behavior when multiple upstreams
// define metadata for the same column.
// Values: "nearest" (closest upstream wins), "all" (merge), "skip" (no inheritance on conflict)
ConflictResolution string `yaml:"conflict_resolution"`
// PreferColumnLineage uses DataHub's column-level lineage edges when available.
PreferColumnLineage bool `yaml:"prefer_column_lineage"`
// ColumnTransforms defines path normalization rules.
ColumnTransforms []ColumnTransformConfig `yaml:"column_transforms"`
// Aliases defines explicit source-target mappings that bypass lineage lookup.
Aliases []AliasConfig `yaml:"aliases"`
// CacheTTL for lineage graphs.
CacheTTL time.Duration `yaml:"cache_ttl"`
// Timeout for the entire inheritance operation.
Timeout time.Duration `yaml:"timeout"`
}
LineageConfig controls lineage-aware semantic enrichment.
func DefaultLineageConfig ¶ added in v0.8.0
func DefaultLineageConfig() LineageConfig
DefaultLineageConfig returns sensible defaults.