semantic

package
v0.19.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2026 License: Apache-2.0 Imports: 9 Imported by: 0

Documentation

Overview

Package semantic provides semantic layer abstractions.

Package semantic provides semantic layer abstractions.

Package semantic provides abstractions for semantic metadata providers.

Index

Constants

View Source
const MaxStringLength = 2000

MaxStringLength is the maximum length for sanitized strings.

Variables

View Source
var DefaultInjectionLogger = &InjectionLogger{
	logFunc: log.Printf,
}

DefaultInjectionLogger is the default logger for injection attempts.

Functions

This section is empty.

Types

type CacheConfig

type CacheConfig struct {
	TTL time.Duration
}

CacheConfig configures the cache.

type CachedProvider

type CachedProvider struct {
	// contains filtered or unexported fields
}

CachedProvider wraps a Provider with caching.

func NewCachedProvider

func NewCachedProvider(provider Provider, cfg CacheConfig) *CachedProvider

NewCachedProvider creates a caching wrapper around a provider.

func (*CachedProvider) Close

func (c *CachedProvider) Close() error

Close closes the underlying provider.

func (*CachedProvider) GetColumnContext

func (c *CachedProvider) GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)

GetColumnContext retrieves column context with caching.

func (*CachedProvider) GetColumnsContext

func (c *CachedProvider) GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)

GetColumnsContext retrieves columns context with caching.

func (*CachedProvider) GetGlossaryTerm

func (c *CachedProvider) GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)

GetGlossaryTerm retrieves a glossary term with caching.

func (*CachedProvider) GetLineage

func (c *CachedProvider) GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)

GetLineage retrieves lineage with caching.

func (*CachedProvider) GetTableContext

func (c *CachedProvider) GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)

GetTableContext retrieves table context with caching.

func (*CachedProvider) Invalidate

func (c *CachedProvider) Invalidate()

Invalidate clears the cache.

func (*CachedProvider) Name

func (c *CachedProvider) Name() string

Name returns the underlying provider name.

func (*CachedProvider) SearchTables

func (c *CachedProvider) SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)

SearchTables searches without caching (queries vary too much).

type ColumnContext

type ColumnContext struct {
	// Basic info
	Name        string `json:"name"`
	Description string `json:"description,omitempty"`

	// Classification
	Tags          []string       `json:"tags,omitempty"`
	GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`

	// Sensitivity
	IsPII       bool `json:"is_pii,omitempty"`
	IsSensitive bool `json:"is_sensitive,omitempty"`

	// Business metadata
	BusinessName string `json:"business_name,omitempty"`

	// InheritedFrom is set when metadata was inherited from upstream lineage.
	InheritedFrom *InheritedMetadata `json:"inherited_from,omitempty"`
}

ColumnContext provides semantic context for a column.

type ColumnIdentifier

type ColumnIdentifier struct {
	TableIdentifier
	Column string `json:"column"`
}

ColumnIdentifier uniquely identifies a column.

func (ColumnIdentifier) String

func (c ColumnIdentifier) String() string

String returns a dot-separated representation including the column.

type Deprecation

type Deprecation struct {
	Deprecated bool       `json:"deprecated"`
	Note       string     `json:"note,omitempty"`
	Actor      string     `json:"actor,omitempty"`
	DecommDate *time.Time `json:"decommission_date,omitempty"`
}

Deprecation indicates if an entity is deprecated.

type Domain

type Domain struct {
	URN         string `json:"urn"`
	Name        string `json:"name"`
	Description string `json:"description,omitempty"`
}

Domain represents a data domain.

type GlossaryTerm

type GlossaryTerm struct {
	URN         string `json:"urn"`
	Name        string `json:"name"`
	Description string `json:"description,omitempty"`
}

GlossaryTerm represents a business glossary term.

type InheritedMetadata added in v0.8.0

type InheritedMetadata struct {
	// SourceURN is the DataHub URN of the upstream dataset.
	SourceURN string `json:"source_urn"`

	// SourceColumn is the column name in the upstream dataset.
	SourceColumn string `json:"source_column"`

	// Hops is the distance from the target dataset (1 = direct upstream).
	Hops int `json:"hops"`

	// MatchMethod indicates how the column was matched.
	// Values: "column_lineage", "name_exact", "name_transformed", "alias"
	MatchMethod string `json:"match_method"`
}

InheritedMetadata tracks the provenance of inherited column metadata.

type InjectionLogger added in v0.2.0

type InjectionLogger struct {
	// contains filtered or unexported fields
}

InjectionLogger logs detected prompt injection attempts.

func (*InjectionLogger) DetectAndLog added in v0.2.0

func (l *InjectionLogger) DetectAndLog(sanitizer *Sanitizer, source, field, input string) bool

DetectAndLog detects injection patterns in the input and logs if found. Returns true if injection was detected.

func (*InjectionLogger) Disable added in v0.2.0

func (l *InjectionLogger) Disable()

Disable disables injection logging.

func (*InjectionLogger) Enable added in v0.2.0

func (l *InjectionLogger) Enable()

Enable enables injection logging.

func (*InjectionLogger) LogInjectionAttempt added in v0.2.0

func (l *InjectionLogger) LogInjectionAttempt(source, field string, patterns []string)

LogInjectionAttempt logs a detected injection attempt.

func (*InjectionLogger) SetLogFunc added in v0.2.0

func (l *InjectionLogger) SetLogFunc(f func(format string, args ...any))

SetLogFunc sets the logging function for injection attempts.

type LineageDirection

type LineageDirection string

LineageDirection indicates the direction of lineage traversal.

const (
	LineageUpstream   LineageDirection = "upstream"
	LineageDownstream LineageDirection = "downstream"
)

Lineage direction constants.

type LineageEdge

type LineageEdge struct {
	URN            string `json:"urn"`
	Type           string `json:"type,omitempty"`
	TransformLogic string `json:"transform_logic,omitempty"`
}

LineageEdge represents an edge in the lineage graph.

type LineageEntity

type LineageEntity struct {
	URN      string        `json:"urn"`
	Type     string        `json:"type"`
	Name     string        `json:"name"`
	Platform string        `json:"platform,omitempty"`
	Depth    int           `json:"depth"`
	Parents  []LineageEdge `json:"parents,omitempty"`
	Children []LineageEdge `json:"children,omitempty"`
	Context  *TableContext `json:"context,omitempty"`
}

LineageEntity represents an entity in a lineage graph.

type LineageInfo

type LineageInfo struct {
	Direction LineageDirection `json:"direction"`
	Entities  []LineageEntity  `json:"entities"`
	MaxDepth  int              `json:"max_depth"`
}

LineageInfo contains lineage information for an entity.

type NoopProvider

type NoopProvider struct{}

NoopProvider is a no-op implementation for testing.

func NewNoopProvider

func NewNoopProvider() *NoopProvider

NewNoopProvider creates a new no-op provider.

func (*NoopProvider) Close

func (*NoopProvider) Close() error

Close does nothing.

func (*NoopProvider) GetColumnContext

func (*NoopProvider) GetColumnContext(_ context.Context, _ ColumnIdentifier) (*ColumnContext, error)

GetColumnContext returns empty context.

func (*NoopProvider) GetColumnsContext

func (*NoopProvider) GetColumnsContext(_ context.Context, _ TableIdentifier) (map[string]*ColumnContext, error)

GetColumnsContext returns empty map.

func (*NoopProvider) GetGlossaryTerm

func (*NoopProvider) GetGlossaryTerm(_ context.Context, _ string) (*GlossaryTerm, error)

GetGlossaryTerm returns an empty term.

func (*NoopProvider) GetLineage

func (*NoopProvider) GetLineage(_ context.Context, _ TableIdentifier, dir LineageDirection, maxDepth int) (*LineageInfo, error)

GetLineage returns empty lineage.

func (*NoopProvider) GetTableContext

func (*NoopProvider) GetTableContext(_ context.Context, _ TableIdentifier) (*TableContext, error)

GetTableContext returns empty context.

func (*NoopProvider) Name

func (*NoopProvider) Name() string

Name returns the provider name.

func (*NoopProvider) SearchTables

SearchTables returns empty results.

type Owner

type Owner struct {
	URN   string    `json:"urn"`
	Type  OwnerType `json:"type"`
	Name  string    `json:"name,omitempty"`
	Email string    `json:"email,omitempty"`
}

Owner represents a data owner.

type OwnerType

type OwnerType string

OwnerType indicates the type of owner.

const (
	OwnerTypeUser  OwnerType = "user"
	OwnerTypeGroup OwnerType = "group"
)

Owner type constants.

type Provider

type Provider interface {
	// Name returns the provider name.
	Name() string

	// GetTableContext retrieves semantic context for a table.
	GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)

	// GetColumnContext retrieves semantic context for a single column.
	GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)

	// GetColumnsContext retrieves semantic context for all columns of a table.
	GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)

	// GetLineage retrieves lineage information for a table.
	GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)

	// GetGlossaryTerm retrieves a glossary term by URN.
	GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)

	// SearchTables searches for tables matching the filter.
	SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)

	// Close releases resources.
	Close() error
}

Provider retrieves semantic metadata from catalog systems. DataHub implements this. Future alternatives (Atlas, Unity Catalog) can too.

type SanitizeConfig added in v0.2.0

type SanitizeConfig struct {
	// MaxLength is the maximum length for strings (default: 2000).
	MaxLength int

	// StripInjectionPatterns removes detected injection patterns instead of flagging.
	StripInjectionPatterns bool

	// LogInjectionAttempts enables logging of detected injection attempts.
	LogInjectionAttempts bool
}

SanitizeConfig configures sanitization behavior.

func DefaultSanitizeConfig added in v0.2.0

func DefaultSanitizeConfig() SanitizeConfig

DefaultSanitizeConfig returns a safe default configuration.

type Sanitizer added in v0.2.0

type Sanitizer struct {
	// contains filtered or unexported fields
}

Sanitizer sanitizes metadata strings to prevent prompt injection and other attacks.

func NewSanitizer added in v0.2.0

func NewSanitizer(cfg SanitizeConfig) *Sanitizer

NewSanitizer creates a new sanitizer with the given configuration.

func (*Sanitizer) DetectInjection added in v0.2.0

func (*Sanitizer) DetectInjection(input string) (detected bool, patterns []string)

DetectInjection checks if the input contains potential prompt injection patterns. Returns true if injection is detected along with matched patterns.

func (*Sanitizer) SanitizeColumnContext added in v0.2.0

func (s *Sanitizer) SanitizeColumnContext(cc *ColumnContext) *ColumnContext

SanitizeColumnContext sanitizes all string fields in a ColumnContext.

func (*Sanitizer) SanitizeDescription added in v0.2.0

func (s *Sanitizer) SanitizeDescription(desc string) string

SanitizeDescription sanitizes a description field.

func (*Sanitizer) SanitizeString added in v0.2.0

func (s *Sanitizer) SanitizeString(input string) string

SanitizeString sanitizes a string by removing control characters, truncating to max length, and optionally stripping injection patterns.

func (*Sanitizer) SanitizeTableContext added in v0.2.0

func (s *Sanitizer) SanitizeTableContext(tc *TableContext) *TableContext

SanitizeTableContext sanitizes all string fields in a TableContext.

func (*Sanitizer) SanitizeTag added in v0.2.0

func (*Sanitizer) SanitizeTag(tag string) string

SanitizeTag validates and sanitizes a tag name. Returns empty string if the tag is invalid.

func (*Sanitizer) SanitizeTags added in v0.2.0

func (s *Sanitizer) SanitizeTags(tags []string) []string

SanitizeTags sanitizes a slice of tags, removing invalid ones.

type SearchFilter

type SearchFilter struct {
	Query    string   `json:"query"`
	Platform string   `json:"platform,omitempty"`
	Tags     []string `json:"tags,omitempty"`
	Domain   string   `json:"domain,omitempty"`
	Owner    string   `json:"owner,omitempty"`
	Limit    int      `json:"limit,omitempty"`
	Offset   int      `json:"offset,omitempty"`
}

SearchFilter defines criteria for searching tables.

type TableContext

type TableContext struct {
	// Basic info
	URN         string `json:"urn,omitempty"`
	Description string `json:"description,omitempty"`

	// Ownership
	Owners []Owner `json:"owners,omitempty"`

	// Classification
	Tags          []string       `json:"tags,omitempty"`
	GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
	Domain        *Domain        `json:"domain,omitempty"`

	// Status
	Deprecation *Deprecation `json:"deprecation,omitempty"`

	// Quality
	QualityScore *float64 `json:"quality_score,omitempty"`

	// Metadata
	CustomProperties map[string]string `json:"custom_properties,omitempty"`
	LastModified     *time.Time        `json:"last_modified,omitempty"`
}

TableContext provides semantic context for a table.

type TableIdentifier

type TableIdentifier struct {
	Catalog string `json:"catalog,omitempty"`
	Schema  string `json:"schema"`
	Table   string `json:"table"`
}

TableIdentifier uniquely identifies a table.

func (TableIdentifier) String

func (t TableIdentifier) String() string

String returns a dot-separated representation.

type TableSearchResult

type TableSearchResult struct {
	URN          string   `json:"urn"`
	Name         string   `json:"name"`
	Platform     string   `json:"platform,omitempty"`
	Description  string   `json:"description,omitempty"`
	Tags         []string `json:"tags,omitempty"`
	Domain       string   `json:"domain,omitempty"`
	MatchedField string   `json:"matched_field,omitempty"`
}

TableSearchResult represents a search result.

type URNResolver

type URNResolver interface {
	// ResolveURN converts a URN to a table identifier.
	ResolveURN(ctx context.Context, urn string) (*TableIdentifier, error)

	// BuildURN creates a URN from a table identifier.
	BuildURN(ctx context.Context, table TableIdentifier) (string, error)
}

URNResolver can resolve URNs to table identifiers.

Directories

Path Synopsis
Package datahub provides a DataHub implementation of the semantic provider.
Package datahub provides a DataHub implementation of the semantic provider.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL