integration

package
v0.7.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 16, 2026 License: Apache-2.0 Imports: 2 Imported by: 0

Documentation

Overview

Package integration provides interfaces for extending mcp-datahub behavior. These interfaces are consumed by mcp-datahub but implemented by users in their custom MCP servers.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AccessFilter

type AccessFilter interface {
	// CanAccess checks if the current user can access the given URN.
	CanAccess(ctx context.Context, urn string) (bool, error)

	// FilterURNs filters a list of URNs to only those accessible by the current user.
	FilterURNs(ctx context.Context, urns []string) ([]string, error)
}

AccessFilter controls access to DataHub entities. Implement this interface to add custom authorization logic.

type AuditLogger

type AuditLogger interface {
	// LogToolCall logs a tool invocation.
	LogToolCall(ctx context.Context, tool string, params map[string]any, userID string) error
}

AuditLogger logs tool invocations for audit purposes. Implement this interface to add custom audit logging.

type ExecutionContext added in v0.2.0

type ExecutionContext struct {
	// Tables maps DataHub URNs to their resolved table identifiers.
	Tables map[string]*TableIdentifier `json:"tables,omitempty"`

	// Connections lists the query engine connections involved.
	Connections []string `json:"connections,omitempty"`

	// Queries are relevant queries that involve these entities.
	Queries []ExecutionQuery `json:"queries,omitempty"`

	// Source indicates which provider supplied this context.
	Source string `json:"source,omitempty"`
}

ExecutionContext provides query execution context for lineage bridging. This connects DataHub lineage information to query engine execution details.

type ExecutionQuery added in v0.2.0

type ExecutionQuery struct {
	// SQL is the query text.
	SQL string `json:"sql,omitempty"`

	// Sources are the source table URNs.
	Sources []string `json:"sources,omitempty"`

	// Targets are the target table URNs (for INSERT/CREATE).
	Targets []string `json:"targets,omitempty"`

	// ExecutedAt is when the query was last executed (if known).
	ExecutedAt *time.Time `json:"executed_at,omitempty"`

	// QueryID is the query engine's query identifier.
	QueryID string `json:"query_id,omitempty"`
}

ExecutionQuery represents a query that involves lineage entities.

type MetadataEnricher

type MetadataEnricher interface {
	// EnrichEntity adds custom metadata to an entity response.
	EnrichEntity(ctx context.Context, urn string, data map[string]any) (map[string]any, error)
}

MetadataEnricher adds additional metadata to tool responses. Implement this interface to enrich DataHub responses with custom data.

type NoOpQueryProvider added in v0.2.0

type NoOpQueryProvider struct{}

NoOpQueryProvider is a QueryProvider that does nothing. Use this as a placeholder or for testing.

func (*NoOpQueryProvider) Close added in v0.2.0

func (n *NoOpQueryProvider) Close() error

Close implements QueryProvider.

func (*NoOpQueryProvider) GetExecutionContext added in v0.2.0

func (n *NoOpQueryProvider) GetExecutionContext(_ context.Context, _ []string) (*ExecutionContext, error)

GetExecutionContext implements QueryProvider.

func (*NoOpQueryProvider) GetQueryExamples added in v0.2.0

func (n *NoOpQueryProvider) GetQueryExamples(_ context.Context, _ string) ([]QueryExample, error)

GetQueryExamples implements QueryProvider.

func (*NoOpQueryProvider) GetTableAvailability added in v0.2.0

func (n *NoOpQueryProvider) GetTableAvailability(_ context.Context, _ string) (*TableAvailability, error)

GetTableAvailability implements QueryProvider.

func (*NoOpQueryProvider) Name added in v0.2.0

func (n *NoOpQueryProvider) Name() string

Name implements QueryProvider.

func (*NoOpQueryProvider) ResolveTable added in v0.2.0

func (n *NoOpQueryProvider) ResolveTable(_ context.Context, _ string) (*TableIdentifier, error)

ResolveTable implements QueryProvider.

type QueryExample added in v0.2.0

type QueryExample struct {
	// Name is a short identifier for the example.
	Name string `json:"name"`

	// Description explains what the query does.
	Description string `json:"description,omitempty"`

	// SQL is the executable SQL statement.
	SQL string `json:"sql"`

	// Category classifies the example type.
	// Common values: "sample", "aggregation", "join", "filter", "common"
	Category string `json:"category,omitempty"`

	// Source indicates where this example came from.
	// Values: "generated", "history", "template", "documentation"
	Source string `json:"source,omitempty"`
}

QueryExample represents a sample SQL query for a DataHub entity.

type QueryProvider added in v0.2.0

type QueryProvider interface {
	// Name returns the provider name (e.g., "trino", "spark", "presto").
	// Used for logging, metrics, and source attribution.
	Name() string

	// ResolveTable maps a DataHub URN to a query engine table identifier.
	// Returns nil if the URN cannot be resolved to a queryable table.
	//
	// Example URN: urn:li:dataset:(urn:li:dataPlatform:trino,catalog.schema.table,PROD)
	// Returns: &TableIdentifier{Catalog: "catalog", Schema: "schema", Table: "table"}
	ResolveTable(ctx context.Context, urn string) (*TableIdentifier, error)

	// GetTableAvailability checks if a DataHub entity is available as a queryable table.
	// This is used to enrich search results with query availability status.
	// Returns nil if availability cannot be determined (not an error).
	GetTableAvailability(ctx context.Context, urn string) (*TableAvailability, error)

	// GetQueryExamples returns sample SQL queries for a DataHub entity.
	// Returns an empty slice if no examples are available (not an error).
	// Examples might include: SELECT samples, common aggregations, join patterns.
	GetQueryExamples(ctx context.Context, urn string) ([]QueryExample, error)

	// GetExecutionContext returns execution context for lineage bridging.
	// Given a set of DataHub URNs (e.g., from lineage), returns information
	// about how they relate to query execution (sources, targets, transformations).
	// Returns nil if no execution context is available (not an error).
	GetExecutionContext(ctx context.Context, urns []string) (*ExecutionContext, error)

	// Close releases any resources held by the provider.
	// Implementations should be idempotent.
	Close() error
}

QueryProvider provides query execution context for DataHub entities. Implementations typically connect to Trino, Spark, or other SQL engines to provide table resolution, query examples, and availability information.

This interface enables mcp-trino (or other query toolkits) to inject execution context into mcp-datahub tools WITHOUT creating import cycles.

All methods should return nil (not an error) if information is not found. Errors should only be returned for connection/authentication failures or other exceptional conditions.

Implementations must be safe for concurrent use.

type QueryProviderFunc added in v0.2.0

type QueryProviderFunc struct {
	NameFn                 func() string
	ResolveTableFn         func(ctx context.Context, urn string) (*TableIdentifier, error)
	GetTableAvailabilityFn func(ctx context.Context, urn string) (*TableAvailability, error)
	GetQueryExamplesFn     func(ctx context.Context, urn string) ([]QueryExample, error)
	GetExecutionContextFn  func(ctx context.Context, urns []string) (*ExecutionContext, error)
	CloseFn                func() error
}

QueryProviderFunc allows implementing QueryProvider with individual functions. Any nil function returns nil/empty results (not errors).

Example:

provider := &integration.QueryProviderFunc{
    NameFn: func() string { return "custom" },
    ResolveTableFn: func(ctx context.Context, urn string) (*TableIdentifier, error) {
        // Custom resolution logic
        return parseURN(urn), nil
    },
}

func (*QueryProviderFunc) Close added in v0.2.0

func (f *QueryProviderFunc) Close() error

Close implements QueryProvider.

func (*QueryProviderFunc) GetExecutionContext added in v0.2.0

func (f *QueryProviderFunc) GetExecutionContext(ctx context.Context, urns []string) (*ExecutionContext, error)

GetExecutionContext implements QueryProvider.

func (*QueryProviderFunc) GetQueryExamples added in v0.2.0

func (f *QueryProviderFunc) GetQueryExamples(ctx context.Context, urn string) ([]QueryExample, error)

GetQueryExamples implements QueryProvider.

func (*QueryProviderFunc) GetTableAvailability added in v0.2.0

func (f *QueryProviderFunc) GetTableAvailability(ctx context.Context, urn string) (*TableAvailability, error)

GetTableAvailability implements QueryProvider.

func (*QueryProviderFunc) Name added in v0.2.0

func (f *QueryProviderFunc) Name() string

Name implements QueryProvider.

func (*QueryProviderFunc) ResolveTable added in v0.2.0

func (f *QueryProviderFunc) ResolveTable(ctx context.Context, urn string) (*TableIdentifier, error)

ResolveTable implements QueryProvider.

type TableAvailability added in v0.2.0

type TableAvailability struct {
	// Available indicates if the table exists and is queryable.
	Available bool `json:"available"`

	// Table is the resolved table identifier (nil if not available).
	Table *TableIdentifier `json:"table,omitempty"`

	// Connection is the query engine connection where the table is available.
	Connection string `json:"connection,omitempty"`

	// Error explains why the table is not available (if Available is false).
	Error string `json:"error,omitempty"`

	// LastChecked is when availability was last verified.
	LastChecked time.Time `json:"last_checked,omitempty"`

	// RowCount is an optional estimate of table rows (if known).
	RowCount *int64 `json:"row_count,omitempty"`

	// LastUpdated is when the table data was last modified (if known).
	LastUpdated *time.Time `json:"last_updated,omitempty"`
}

TableAvailability represents whether a DataHub entity is available as a queryable table in the connected query engine.

type TableIdentifier added in v0.2.0

type TableIdentifier struct {
	// Connection is the named connection (empty for default).
	Connection string `json:"connection,omitempty"`

	// Catalog is the catalog/database name.
	Catalog string `json:"catalog"`

	// Schema is the schema name.
	Schema string `json:"schema"`

	// Table is the table name.
	Table string `json:"table"`
}

TableIdentifier uniquely identifies a table in a query engine. This structure is intentionally compatible with (but not imported from) mcp-trino's semantic.TableIdentifier to maintain island architecture.

func (TableIdentifier) String added in v0.2.0

func (t TableIdentifier) String() string

String returns the fully-qualified table name.

type URNResolver

type URNResolver interface {
	// ResolveToDataHubURN converts an external identifier to a DataHub URN.
	ResolveToDataHubURN(ctx context.Context, externalID string) (string, error)
}

URNResolver resolves external identifiers to DataHub URNs. Implement this interface to map your internal IDs to DataHub URNs.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL