Documentation
¶
Overview ¶
Package semantic provides semantic layer abstractions.
Package semantic provides semantic layer abstractions.
Package semantic provides abstractions for semantic metadata providers.
Index ¶
- Constants
- Variables
- type CacheConfig
- type CachedProvider
- func (c *CachedProvider) Close() error
- func (c *CachedProvider) GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
- func (c *CachedProvider) GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
- func (c *CachedProvider) GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
- func (c *CachedProvider) GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, ...) (*LineageInfo, error)
- func (c *CachedProvider) GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
- func (c *CachedProvider) Invalidate()
- func (c *CachedProvider) Name() string
- func (c *CachedProvider) SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
- type ColumnContext
- type ColumnIdentifier
- type Deprecation
- type Domain
- type GlossaryTerm
- type InheritedMetadata
- type InjectionLogger
- func (l *InjectionLogger) DetectAndLog(sanitizer *Sanitizer, source, field, input string) bool
- func (l *InjectionLogger) Disable()
- func (l *InjectionLogger) Enable()
- func (l *InjectionLogger) LogInjectionAttempt(source, field string, patterns []string)
- func (l *InjectionLogger) SetLogFunc(f func(format string, args ...any))
- type LineageDirection
- type LineageEdge
- type LineageEntity
- type LineageInfo
- type NoopProvider
- func (n *NoopProvider) Close() error
- func (n *NoopProvider) GetColumnContext(_ context.Context, _ ColumnIdentifier) (*ColumnContext, error)
- func (n *NoopProvider) GetColumnsContext(_ context.Context, _ TableIdentifier) (map[string]*ColumnContext, error)
- func (n *NoopProvider) GetGlossaryTerm(_ context.Context, _ string) (*GlossaryTerm, error)
- func (n *NoopProvider) GetLineage(_ context.Context, _ TableIdentifier, dir LineageDirection, maxDepth int) (*LineageInfo, error)
- func (n *NoopProvider) GetTableContext(_ context.Context, _ TableIdentifier) (*TableContext, error)
- func (n *NoopProvider) Name() string
- func (n *NoopProvider) SearchTables(_ context.Context, _ SearchFilter) ([]TableSearchResult, error)
- type Owner
- type OwnerType
- type Provider
- type SanitizeConfig
- type Sanitizer
- func (s *Sanitizer) DetectInjection(input string) (bool, []string)
- func (s *Sanitizer) SanitizeColumnContext(cc *ColumnContext) *ColumnContext
- func (s *Sanitizer) SanitizeDescription(desc string) string
- func (s *Sanitizer) SanitizeString(input string) string
- func (s *Sanitizer) SanitizeTableContext(tc *TableContext) *TableContext
- func (s *Sanitizer) SanitizeTag(tag string) string
- func (s *Sanitizer) SanitizeTags(tags []string) []string
- type SearchFilter
- type TableContext
- type TableIdentifier
- type TableSearchResult
- type URNResolver
Constants ¶
const MaxStringLength = 2000
MaxStringLength is the maximum length for sanitized strings.
Variables ¶
var DefaultInjectionLogger = &InjectionLogger{ logFunc: log.Printf, }
DefaultInjectionLogger is the default logger for injection attempts.
Functions ¶
This section is empty.
Types ¶
type CachedProvider ¶
type CachedProvider struct {
// contains filtered or unexported fields
}
CachedProvider wraps a Provider with caching.
func NewCachedProvider ¶
func NewCachedProvider(provider Provider, cfg CacheConfig) *CachedProvider
NewCachedProvider creates a caching wrapper around a provider.
func (*CachedProvider) Close ¶
func (c *CachedProvider) Close() error
Close closes the underlying provider.
func (*CachedProvider) GetColumnContext ¶
func (c *CachedProvider) GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
GetColumnContext retrieves column context with caching.
func (*CachedProvider) GetColumnsContext ¶
func (c *CachedProvider) GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
GetColumnsContext retrieves columns context with caching.
func (*CachedProvider) GetGlossaryTerm ¶
func (c *CachedProvider) GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
GetGlossaryTerm retrieves a glossary term with caching.
func (*CachedProvider) GetLineage ¶
func (c *CachedProvider) GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)
GetLineage retrieves lineage with caching.
func (*CachedProvider) GetTableContext ¶
func (c *CachedProvider) GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
GetTableContext retrieves table context with caching.
func (*CachedProvider) Invalidate ¶
func (c *CachedProvider) Invalidate()
Invalidate clears the cache.
func (*CachedProvider) Name ¶
func (c *CachedProvider) Name() string
Name returns the underlying provider name.
func (*CachedProvider) SearchTables ¶
func (c *CachedProvider) SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
SearchTables searches without caching (queries vary too much).
type ColumnContext ¶
type ColumnContext struct {
// Basic info
Name string `json:"name"`
Description string `json:"description,omitempty"`
// Classification
Tags []string `json:"tags,omitempty"`
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
// Sensitivity
IsPII bool `json:"is_pii,omitempty"`
IsSensitive bool `json:"is_sensitive,omitempty"`
// Business metadata
BusinessName string `json:"business_name,omitempty"`
// InheritedFrom is set when metadata was inherited from upstream lineage.
InheritedFrom *InheritedMetadata `json:"inherited_from,omitempty"`
}
ColumnContext provides semantic context for a column.
type ColumnIdentifier ¶
type ColumnIdentifier struct {
TableIdentifier
Column string `json:"column"`
}
ColumnIdentifier uniquely identifies a column.
func (ColumnIdentifier) String ¶
func (c ColumnIdentifier) String() string
String returns a dot-separated representation including the column.
type Deprecation ¶
type Deprecation struct {
Deprecated bool `json:"deprecated"`
Note string `json:"note,omitempty"`
Actor string `json:"actor,omitempty"`
DecommDate *time.Time `json:"decommission_date,omitempty"`
}
Deprecation indicates if an entity is deprecated.
type Domain ¶
type Domain struct {
URN string `json:"urn"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
}
Domain represents a data domain.
type GlossaryTerm ¶
type GlossaryTerm struct {
URN string `json:"urn"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
}
GlossaryTerm represents a business glossary term.
type InheritedMetadata ¶ added in v0.8.0
type InheritedMetadata struct {
// SourceURN is the DataHub URN of the upstream dataset.
SourceURN string `json:"source_urn"`
// SourceColumn is the column name in the upstream dataset.
SourceColumn string `json:"source_column"`
// Hops is the distance from the target dataset (1 = direct upstream).
Hops int `json:"hops"`
// MatchMethod indicates how the column was matched.
// Values: "column_lineage", "name_exact", "name_transformed", "alias"
MatchMethod string `json:"match_method"`
}
InheritedMetadata tracks the provenance of inherited column metadata.
type InjectionLogger ¶ added in v0.2.0
type InjectionLogger struct {
// contains filtered or unexported fields
}
InjectionLogger logs detected prompt injection attempts.
func (*InjectionLogger) DetectAndLog ¶ added in v0.2.0
func (l *InjectionLogger) DetectAndLog(sanitizer *Sanitizer, source, field, input string) bool
DetectAndLog detects injection patterns in the input and logs if found. Returns true if injection was detected.
func (*InjectionLogger) Disable ¶ added in v0.2.0
func (l *InjectionLogger) Disable()
Disable disables injection logging.
func (*InjectionLogger) Enable ¶ added in v0.2.0
func (l *InjectionLogger) Enable()
Enable enables injection logging.
func (*InjectionLogger) LogInjectionAttempt ¶ added in v0.2.0
func (l *InjectionLogger) LogInjectionAttempt(source, field string, patterns []string)
LogInjectionAttempt logs a detected injection attempt.
func (*InjectionLogger) SetLogFunc ¶ added in v0.2.0
func (l *InjectionLogger) SetLogFunc(f func(format string, args ...any))
SetLogFunc sets the logging function for injection attempts.
type LineageDirection ¶
type LineageDirection string
LineageDirection indicates the direction of lineage traversal.
const ( LineageUpstream LineageDirection = "upstream" LineageDownstream LineageDirection = "downstream" )
type LineageEdge ¶
type LineageEdge struct {
URN string `json:"urn"`
Type string `json:"type,omitempty"`
TransformLogic string `json:"transform_logic,omitempty"`
}
LineageEdge represents an edge in the lineage graph.
type LineageEntity ¶
type LineageEntity struct {
URN string `json:"urn"`
Type string `json:"type"`
Name string `json:"name"`
Platform string `json:"platform,omitempty"`
Depth int `json:"depth"`
Parents []LineageEdge `json:"parents,omitempty"`
Children []LineageEdge `json:"children,omitempty"`
Context *TableContext `json:"context,omitempty"`
}
LineageEntity represents an entity in a lineage graph.
type LineageInfo ¶
type LineageInfo struct {
Direction LineageDirection `json:"direction"`
Entities []LineageEntity `json:"entities"`
MaxDepth int `json:"max_depth"`
}
LineageInfo contains lineage information for an entity.
type NoopProvider ¶
type NoopProvider struct{}
NoopProvider is a no-op implementation for testing.
func NewNoopProvider ¶
func NewNoopProvider() *NoopProvider
NewNoopProvider creates a new no-op provider.
func (*NoopProvider) GetColumnContext ¶
func (n *NoopProvider) GetColumnContext(_ context.Context, _ ColumnIdentifier) (*ColumnContext, error)
GetColumnContext returns empty context.
func (*NoopProvider) GetColumnsContext ¶
func (n *NoopProvider) GetColumnsContext(_ context.Context, _ TableIdentifier) (map[string]*ColumnContext, error)
GetColumnsContext returns empty map.
func (*NoopProvider) GetGlossaryTerm ¶
func (n *NoopProvider) GetGlossaryTerm(_ context.Context, _ string) (*GlossaryTerm, error)
GetGlossaryTerm returns nil.
func (*NoopProvider) GetLineage ¶
func (n *NoopProvider) GetLineage(_ context.Context, _ TableIdentifier, dir LineageDirection, maxDepth int) (*LineageInfo, error)
GetLineage returns empty lineage.
func (*NoopProvider) GetTableContext ¶
func (n *NoopProvider) GetTableContext(_ context.Context, _ TableIdentifier) (*TableContext, error)
GetTableContext returns empty context.
func (*NoopProvider) SearchTables ¶
func (n *NoopProvider) SearchTables(_ context.Context, _ SearchFilter) ([]TableSearchResult, error)
SearchTables returns empty results.
type Owner ¶
type Owner struct {
URN string `json:"urn"`
Type OwnerType `json:"type"`
Name string `json:"name,omitempty"`
Email string `json:"email,omitempty"`
}
Owner represents a data owner.
type Provider ¶
type Provider interface {
// Name returns the provider name.
Name() string
// GetTableContext retrieves semantic context for a table.
GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
// GetColumnContext retrieves semantic context for a single column.
GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
// GetColumnsContext retrieves semantic context for all columns of a table.
GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
// GetLineage retrieves lineage information for a table.
GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)
// GetGlossaryTerm retrieves a glossary term by URN.
GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
// SearchTables searches for tables matching the filter.
SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
// Close releases resources.
Close() error
}
Provider retrieves semantic metadata from catalog systems. DataHub implements this. Future alternatives (Atlas, Unity Catalog) can too.
type SanitizeConfig ¶ added in v0.2.0
type SanitizeConfig struct {
// MaxLength is the maximum length for strings (default: 2000).
MaxLength int
// StripInjectionPatterns removes detected injection patterns instead of flagging.
StripInjectionPatterns bool
// LogInjectionAttempts enables logging of detected injection attempts.
LogInjectionAttempts bool
}
SanitizeConfig configures sanitization behavior.
func DefaultSanitizeConfig ¶ added in v0.2.0
func DefaultSanitizeConfig() SanitizeConfig
DefaultSanitizeConfig returns a safe default configuration.
type Sanitizer ¶ added in v0.2.0
type Sanitizer struct {
// contains filtered or unexported fields
}
Sanitizer sanitizes metadata strings to prevent prompt injection and other attacks.
func NewSanitizer ¶ added in v0.2.0
func NewSanitizer(cfg SanitizeConfig) *Sanitizer
NewSanitizer creates a new sanitizer with the given configuration.
func (*Sanitizer) DetectInjection ¶ added in v0.2.0
DetectInjection checks if the input contains potential prompt injection patterns. Returns true if injection is detected along with matched patterns.
func (*Sanitizer) SanitizeColumnContext ¶ added in v0.2.0
func (s *Sanitizer) SanitizeColumnContext(cc *ColumnContext) *ColumnContext
SanitizeColumnContext sanitizes all string fields in a ColumnContext.
func (*Sanitizer) SanitizeDescription ¶ added in v0.2.0
SanitizeDescription sanitizes a description field.
func (*Sanitizer) SanitizeString ¶ added in v0.2.0
SanitizeString sanitizes a string by removing control characters, truncating to max length, and optionally stripping injection patterns.
func (*Sanitizer) SanitizeTableContext ¶ added in v0.2.0
func (s *Sanitizer) SanitizeTableContext(tc *TableContext) *TableContext
SanitizeTableContext sanitizes all string fields in a TableContext.
func (*Sanitizer) SanitizeTag ¶ added in v0.2.0
SanitizeTag validates and sanitizes a tag name. Returns empty string if the tag is invalid.
func (*Sanitizer) SanitizeTags ¶ added in v0.2.0
SanitizeTags sanitizes a slice of tags, removing invalid ones.
type SearchFilter ¶
type SearchFilter struct {
Query string `json:"query"`
Platform string `json:"platform,omitempty"`
Tags []string `json:"tags,omitempty"`
Domain string `json:"domain,omitempty"`
Owner string `json:"owner,omitempty"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
}
SearchFilter defines criteria for searching tables.
type TableContext ¶
type TableContext struct {
// Basic info
URN string `json:"urn,omitempty"`
Description string `json:"description,omitempty"`
// Ownership
Owners []Owner `json:"owners,omitempty"`
// Classification
Tags []string `json:"tags,omitempty"`
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
Domain *Domain `json:"domain,omitempty"`
// Status
Deprecation *Deprecation `json:"deprecation,omitempty"`
// Quality
QualityScore *float64 `json:"quality_score,omitempty"`
// Metadata
CustomProperties map[string]string `json:"custom_properties,omitempty"`
LastModified *time.Time `json:"last_modified,omitempty"`
}
TableContext provides semantic context for a table.
type TableIdentifier ¶
type TableIdentifier struct {
Catalog string `json:"catalog,omitempty"`
Schema string `json:"schema"`
Table string `json:"table"`
}
TableIdentifier uniquely identifies a table.
func (TableIdentifier) String ¶
func (t TableIdentifier) String() string
String returns a dot-separated representation.
type TableSearchResult ¶
type TableSearchResult struct {
URN string `json:"urn"`
Name string `json:"name"`
Platform string `json:"platform,omitempty"`
Description string `json:"description,omitempty"`
Tags []string `json:"tags,omitempty"`
Domain string `json:"domain,omitempty"`
MatchedField string `json:"matched_field,omitempty"`
}
TableSearchResult represents a search result.
type URNResolver ¶
type URNResolver interface {
// ResolveURN converts a URN to a table identifier.
ResolveURN(ctx context.Context, urn string) (*TableIdentifier, error)
// BuildURN creates a URN from a table identifier.
BuildURN(ctx context.Context, table TableIdentifier) (string, error)
}
URNResolver can resolve URNs to table identifiers.