Documentation
¶
Overview ¶
Package semantic provides semantic layer abstractions.
Package semantic provides semantic layer abstractions.
Package semantic provides abstractions for semantic metadata providers.
Index ¶
- Constants
- Variables
- type CacheConfig
- type CachedProvider
- func (c *CachedProvider) Close() error
- func (c *CachedProvider) GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
- func (c *CachedProvider) GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
- func (c *CachedProvider) GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
- func (c *CachedProvider) GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, ...) (*LineageInfo, error)
- func (c *CachedProvider) GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
- func (c *CachedProvider) Invalidate()
- func (c *CachedProvider) Name() string
- func (c *CachedProvider) SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
- type ColumnContext
- type ColumnIdentifier
- type Deprecation
- type Domain
- type GlossaryTerm
- type InheritedMetadata
- type InjectionLogger
- func (l *InjectionLogger) DetectAndLog(sanitizer *Sanitizer, source, field, input string) bool
- func (l *InjectionLogger) Disable()
- func (l *InjectionLogger) Enable()
- func (l *InjectionLogger) LogInjectionAttempt(source, field string, patterns []string)
- func (l *InjectionLogger) SetLogFunc(f func(format string, args ...any))
- type LineageDirection
- type LineageEdge
- type LineageEntity
- type LineageInfo
- type NoopProvider
- func (*NoopProvider) Close() error
- func (*NoopProvider) GetColumnContext(_ context.Context, _ ColumnIdentifier) (*ColumnContext, error)
- func (*NoopProvider) GetColumnsContext(_ context.Context, _ TableIdentifier) (map[string]*ColumnContext, error)
- func (*NoopProvider) GetGlossaryTerm(_ context.Context, _ string) (*GlossaryTerm, error)
- func (*NoopProvider) GetLineage(_ context.Context, _ TableIdentifier, dir LineageDirection, maxDepth int) (*LineageInfo, error)
- func (*NoopProvider) GetTableContext(_ context.Context, _ TableIdentifier) (*TableContext, error)
- func (*NoopProvider) Name() string
- func (*NoopProvider) SearchTables(_ context.Context, _ SearchFilter) ([]TableSearchResult, error)
- type Owner
- type OwnerType
- type Provider
- type SanitizeConfig
- type Sanitizer
- func (*Sanitizer) DetectInjection(input string) (detected bool, patterns []string)
- func (s *Sanitizer) SanitizeColumnContext(cc *ColumnContext) *ColumnContext
- func (s *Sanitizer) SanitizeDescription(desc string) string
- func (s *Sanitizer) SanitizeString(input string) string
- func (s *Sanitizer) SanitizeTableContext(tc *TableContext) *TableContext
- func (*Sanitizer) SanitizeTag(tag string) string
- func (s *Sanitizer) SanitizeTags(tags []string) []string
- type SearchFilter
- type TableContext
- type TableIdentifier
- type TableSearchResult
- type URNResolver
Constants ¶
const MaxStringLength = 2000
MaxStringLength is the maximum length for sanitized strings.
Variables ¶
var DefaultInjectionLogger = &InjectionLogger{ logFunc: log.Printf, }
DefaultInjectionLogger is the default logger for injection attempts.
Functions ¶
This section is empty.
Types ¶
type CachedProvider ¶
type CachedProvider struct {
// contains filtered or unexported fields
}
CachedProvider wraps a Provider with caching.
func NewCachedProvider ¶
func NewCachedProvider(provider Provider, cfg CacheConfig) *CachedProvider
NewCachedProvider creates a caching wrapper around a provider.
func (*CachedProvider) Close ¶
func (c *CachedProvider) Close() error
Close closes the underlying provider.
func (*CachedProvider) GetColumnContext ¶
func (c *CachedProvider) GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
GetColumnContext retrieves column context with caching.
func (*CachedProvider) GetColumnsContext ¶
func (c *CachedProvider) GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
GetColumnsContext retrieves columns context with caching.
func (*CachedProvider) GetGlossaryTerm ¶
func (c *CachedProvider) GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
GetGlossaryTerm retrieves a glossary term with caching.
func (*CachedProvider) GetLineage ¶
func (c *CachedProvider) GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)
GetLineage retrieves lineage with caching.
func (*CachedProvider) GetTableContext ¶
func (c *CachedProvider) GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
GetTableContext retrieves table context with caching.
func (*CachedProvider) Invalidate ¶
func (c *CachedProvider) Invalidate()
Invalidate clears the cache.
func (*CachedProvider) Name ¶
func (c *CachedProvider) Name() string
Name returns the underlying provider name.
func (*CachedProvider) SearchTables ¶
func (c *CachedProvider) SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
SearchTables searches without caching (queries vary too much).
type ColumnContext ¶
type ColumnContext struct {
// Basic info
Name string `json:"name"`
Description string `json:"description,omitempty"`
// Classification
Tags []string `json:"tags,omitempty"`
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
// Sensitivity
IsPII bool `json:"is_pii,omitempty"`
IsSensitive bool `json:"is_sensitive,omitempty"`
// Business metadata
BusinessName string `json:"business_name,omitempty"`
// InheritedFrom is set when metadata was inherited from upstream lineage.
InheritedFrom *InheritedMetadata `json:"inherited_from,omitempty"`
}
ColumnContext provides semantic context for a column.
func (*ColumnContext) HasContent ¶ added in v0.24.0
func (c *ColumnContext) HasContent() bool
HasContent reports whether the column has any meaningful metadata worth including in enrichment responses. Columns with no description, tags, glossary terms, sensitivity flags, business name, or inherited metadata are considered empty and can be omitted to save tokens.
type ColumnIdentifier ¶
type ColumnIdentifier struct {
TableIdentifier
Column string `json:"column"`
}
ColumnIdentifier uniquely identifies a column.
func (ColumnIdentifier) String ¶
func (c ColumnIdentifier) String() string
String returns a dot-separated representation including the column.
type Deprecation ¶
type Deprecation struct {
Deprecated bool `json:"deprecated"`
Note string `json:"note,omitempty"`
Actor string `json:"actor,omitempty"`
DecommDate *time.Time `json:"decommission_date,omitempty"`
}
Deprecation indicates if an entity is deprecated.
type Domain ¶
type Domain struct {
URN string `json:"urn"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
}
Domain represents a data domain.
type GlossaryTerm ¶
type GlossaryTerm struct {
URN string `json:"urn"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
}
GlossaryTerm represents a business glossary term.
type InheritedMetadata ¶ added in v0.8.0
type InheritedMetadata struct {
// SourceURN is the DataHub URN of the upstream dataset.
SourceURN string `json:"source_urn"`
// SourceColumn is the column name in the upstream dataset.
SourceColumn string `json:"source_column"`
// Hops is the distance from the target dataset (1 = direct upstream).
Hops int `json:"hops"`
// MatchMethod indicates how the column was matched.
// Values: "column_lineage", "name_exact", "name_transformed", "alias"
MatchMethod string `json:"match_method"`
}
InheritedMetadata tracks the provenance of inherited column metadata.
type InjectionLogger ¶ added in v0.2.0
type InjectionLogger struct {
// contains filtered or unexported fields
}
InjectionLogger logs detected prompt injection attempts.
func (*InjectionLogger) DetectAndLog ¶ added in v0.2.0
func (l *InjectionLogger) DetectAndLog(sanitizer *Sanitizer, source, field, input string) bool
DetectAndLog detects injection patterns in the input and logs if found. Returns true if injection was detected.
func (*InjectionLogger) Disable ¶ added in v0.2.0
func (l *InjectionLogger) Disable()
Disable disables injection logging.
func (*InjectionLogger) Enable ¶ added in v0.2.0
func (l *InjectionLogger) Enable()
Enable enables injection logging.
func (*InjectionLogger) LogInjectionAttempt ¶ added in v0.2.0
func (l *InjectionLogger) LogInjectionAttempt(source, field string, patterns []string)
LogInjectionAttempt logs a detected injection attempt.
func (*InjectionLogger) SetLogFunc ¶ added in v0.2.0
func (l *InjectionLogger) SetLogFunc(f func(format string, args ...any))
SetLogFunc sets the logging function for injection attempts.
type LineageDirection ¶
type LineageDirection string
LineageDirection indicates the direction of lineage traversal.
const ( LineageUpstream LineageDirection = "upstream" LineageDownstream LineageDirection = "downstream" )
Lineage direction constants.
type LineageEdge ¶
type LineageEdge struct {
URN string `json:"urn"`
Type string `json:"type,omitempty"`
TransformLogic string `json:"transform_logic,omitempty"`
}
LineageEdge represents an edge in the lineage graph.
type LineageEntity ¶
type LineageEntity struct {
URN string `json:"urn"`
Type string `json:"type"`
Name string `json:"name"`
Platform string `json:"platform,omitempty"`
Depth int `json:"depth"`
Parents []LineageEdge `json:"parents,omitempty"`
Children []LineageEdge `json:"children,omitempty"`
Context *TableContext `json:"context,omitempty"`
}
LineageEntity represents an entity in a lineage graph.
type LineageInfo ¶
type LineageInfo struct {
Direction LineageDirection `json:"direction"`
Entities []LineageEntity `json:"entities"`
MaxDepth int `json:"max_depth"`
}
LineageInfo contains lineage information for an entity.
type NoopProvider ¶
type NoopProvider struct{}
NoopProvider is a no-op implementation for testing.
func NewNoopProvider ¶
func NewNoopProvider() *NoopProvider
NewNoopProvider creates a new no-op provider.
func (*NoopProvider) GetColumnContext ¶
func (*NoopProvider) GetColumnContext(_ context.Context, _ ColumnIdentifier) (*ColumnContext, error)
GetColumnContext returns empty context.
func (*NoopProvider) GetColumnsContext ¶
func (*NoopProvider) GetColumnsContext(_ context.Context, _ TableIdentifier) (map[string]*ColumnContext, error)
GetColumnsContext returns empty map.
func (*NoopProvider) GetGlossaryTerm ¶
func (*NoopProvider) GetGlossaryTerm(_ context.Context, _ string) (*GlossaryTerm, error)
GetGlossaryTerm returns an empty term.
func (*NoopProvider) GetLineage ¶
func (*NoopProvider) GetLineage(_ context.Context, _ TableIdentifier, dir LineageDirection, maxDepth int) (*LineageInfo, error)
GetLineage returns empty lineage.
func (*NoopProvider) GetTableContext ¶
func (*NoopProvider) GetTableContext(_ context.Context, _ TableIdentifier) (*TableContext, error)
GetTableContext returns empty context.
func (*NoopProvider) SearchTables ¶
func (*NoopProvider) SearchTables(_ context.Context, _ SearchFilter) ([]TableSearchResult, error)
SearchTables returns empty results.
type Owner ¶
type Owner struct {
URN string `json:"urn"`
Type OwnerType `json:"type"`
Name string `json:"name,omitempty"`
Email string `json:"email,omitempty"`
}
Owner represents a data owner.
type Provider ¶
type Provider interface {
// Name returns the provider name.
Name() string
// GetTableContext retrieves semantic context for a table.
GetTableContext(ctx context.Context, table TableIdentifier) (*TableContext, error)
// GetColumnContext retrieves semantic context for a single column.
GetColumnContext(ctx context.Context, column ColumnIdentifier) (*ColumnContext, error)
// GetColumnsContext retrieves semantic context for all columns of a table.
GetColumnsContext(ctx context.Context, table TableIdentifier) (map[string]*ColumnContext, error)
// GetLineage retrieves lineage information for a table.
GetLineage(ctx context.Context, table TableIdentifier, direction LineageDirection, maxDepth int) (*LineageInfo, error)
// GetGlossaryTerm retrieves a glossary term by URN.
GetGlossaryTerm(ctx context.Context, urn string) (*GlossaryTerm, error)
// SearchTables searches for tables matching the filter.
SearchTables(ctx context.Context, filter SearchFilter) ([]TableSearchResult, error)
// Close releases resources.
Close() error
}
Provider retrieves semantic metadata from catalog systems. DataHub implements this. Future alternatives (Atlas, Unity Catalog) can too.
type SanitizeConfig ¶ added in v0.2.0
type SanitizeConfig struct {
// MaxLength is the maximum length for strings (default: 2000).
MaxLength int
// StripInjectionPatterns removes detected injection patterns instead of flagging.
StripInjectionPatterns bool
// LogInjectionAttempts enables logging of detected injection attempts.
LogInjectionAttempts bool
}
SanitizeConfig configures sanitization behavior.
func DefaultSanitizeConfig ¶ added in v0.2.0
func DefaultSanitizeConfig() SanitizeConfig
DefaultSanitizeConfig returns a safe default configuration.
type Sanitizer ¶ added in v0.2.0
type Sanitizer struct {
// contains filtered or unexported fields
}
Sanitizer sanitizes metadata strings to prevent prompt injection and other attacks.
func NewSanitizer ¶ added in v0.2.0
func NewSanitizer(cfg SanitizeConfig) *Sanitizer
NewSanitizer creates a new sanitizer with the given configuration.
func (*Sanitizer) DetectInjection ¶ added in v0.2.0
DetectInjection checks if the input contains potential prompt injection patterns. Returns true if injection is detected along with matched patterns.
func (*Sanitizer) SanitizeColumnContext ¶ added in v0.2.0
func (s *Sanitizer) SanitizeColumnContext(cc *ColumnContext) *ColumnContext
SanitizeColumnContext sanitizes all string fields in a ColumnContext.
func (*Sanitizer) SanitizeDescription ¶ added in v0.2.0
SanitizeDescription sanitizes a description field.
func (*Sanitizer) SanitizeString ¶ added in v0.2.0
SanitizeString sanitizes a string by removing control characters, truncating to max length, and optionally stripping injection patterns.
func (*Sanitizer) SanitizeTableContext ¶ added in v0.2.0
func (s *Sanitizer) SanitizeTableContext(tc *TableContext) *TableContext
SanitizeTableContext sanitizes all string fields in a TableContext.
func (*Sanitizer) SanitizeTag ¶ added in v0.2.0
SanitizeTag validates and sanitizes a tag name. Returns empty string if the tag is invalid.
func (*Sanitizer) SanitizeTags ¶ added in v0.2.0
SanitizeTags sanitizes a slice of tags, removing invalid ones.
type SearchFilter ¶
type SearchFilter struct {
Query string `json:"query"`
Platform string `json:"platform,omitempty"`
Tags []string `json:"tags,omitempty"`
Domain string `json:"domain,omitempty"`
Owner string `json:"owner,omitempty"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
}
SearchFilter defines criteria for searching tables.
type TableContext ¶
type TableContext struct {
// Basic info
URN string `json:"urn,omitempty"`
Description string `json:"description,omitempty"`
// Ownership
Owners []Owner `json:"owners,omitempty"`
// Classification
Tags []string `json:"tags,omitempty"`
GlossaryTerms []GlossaryTerm `json:"glossary_terms,omitempty"`
Domain *Domain `json:"domain,omitempty"`
// Status
Deprecation *Deprecation `json:"deprecation,omitempty"`
// Quality
QualityScore *float64 `json:"quality_score,omitempty"`
// Metadata
CustomProperties map[string]string `json:"custom_properties,omitempty"`
LastModified *time.Time `json:"last_modified,omitempty"`
}
TableContext provides semantic context for a table.
type TableIdentifier ¶
type TableIdentifier struct {
Catalog string `json:"catalog,omitempty"`
Schema string `json:"schema"`
Table string `json:"table"`
}
TableIdentifier uniquely identifies a table.
func (TableIdentifier) String ¶
func (t TableIdentifier) String() string
String returns a dot-separated representation.
type TableSearchResult ¶
type TableSearchResult struct {
URN string `json:"urn"`
Name string `json:"name"`
Platform string `json:"platform,omitempty"`
Description string `json:"description,omitempty"`
Tags []string `json:"tags,omitempty"`
Domain string `json:"domain,omitempty"`
MatchedField string `json:"matched_field,omitempty"`
}
TableSearchResult represents a search result.
type URNResolver ¶
type URNResolver interface {
// ResolveURN converts a URN to a table identifier.
ResolveURN(ctx context.Context, urn string) (*TableIdentifier, error)
// BuildURN creates a URN from a table identifier.
BuildURN(ctx context.Context, table TableIdentifier) (string, error)
}
URNResolver can resolve URNs to table identifiers.