Documentation
¶
Overview ¶
Package parser provides SQL parsing with dialect-aware syntax validation.
Usage ¶
stmt, err := parser.ParseWithDialect("SELECT a, b FROM t", myDialect)
if err != nil {
// handle error
}
The parser requires a dialect to be specified. Use the dialect registry to get a dialect by name:
d, ok := dialect.Get("duckdb")
stmt, err := parser.ParseWithDialect(sql, d)
Grammar Overview ¶
The parser implements a recursive descent parser for a subset of SQL:
statement → [WITH cte_list] select_body
select_body → select_core [(UNION|INTERSECT|EXCEPT) [ALL] select_body]
select_core → SELECT [DISTINCT] select_list FROM from_clause
[WHERE expr] [GROUP BY expr_list] [HAVING expr]
[QUALIFY expr] [ORDER BY order_list] [LIMIT expr]
See each file for detailed grammar rules for that section.
Package parser provides SQL parsing and column-level lineage extraction. This file provides token type aliases for convenience.
Index ¶
- Constants
- Variables
- func AddLeadingComment(n *core.NodeInfo, c *token.Comment)
- func AddTrailingComment(n *core.NodeInfo, c *token.Comment)
- func GetSpan(n *core.NodeInfo) token.Span
- func ParseWithDialect(sql string, d *core.Dialect) (*core.SelectStmt, error)
- func ParseWithDialectAndComments(sql string, d *core.Dialect) (*core.SelectStmt, []*token.Comment, error)
- type ColumnResolver
- type ColumnSource
- type FetchClauseData
- type LexError
- type Lexer
- type ParseError
- type Parser
- func (p *Parser) AddError(msg string)
- func (p *Parser) Check(t token.TokenType) bool
- func (p *Parser) Comments() []*token.Comment
- func (p *Parser) Dialect() *core.Dialect
- func (p *Parser) Expect(t token.TokenType) error
- func (p *Parser) Match(t token.TokenType) bool
- func (p *Parser) NextToken()
- func (p *Parser) ParseExpression() (core.Expr, error)
- func (p *Parser) ParseExpressionList() ([]core.Expr, error)
- func (p *Parser) ParseIdentifier() (string, error)
- func (p *Parser) ParseOrderByList() ([]core.OrderByItem, error)
- func (p *Parser) Peek() token.Token
- func (p *Parser) Position() token.Position
- func (p *Parser) Token() token.Token
- type Position
- type ResolutionError
- type ResolveError
- type Resolver
- type Schema
- type Scope
- func (s *Scope) AllEntries() []*ScopeEntry
- func (s *Scope) Child() *Scope
- func (s *Scope) ExpandStar(tableName string) []*core.ColumnRef
- func (s *Scope) HasSchemaInfo() bool
- func (s *Scope) Lookup(name string) (*ScopeEntry, bool)
- func (s *Scope) LookupCTE(name string) (*ScopeEntry, bool)
- func (s *Scope) RegisterCTE(name string, columns []string)
- func (s *Scope) RegisterCTEWithSources(name string, columns []string, underlyingSources []string)
- func (s *Scope) RegisterDerived(alias string, columns []string)
- func (s *Scope) RegisterDerivedWithSources(alias string, columns []string, underlyingSources []string)
- func (s *Scope) RegisterTable(table *core.TableName)
- func (s *Scope) ResolveColumn(ref *core.ColumnRef) (*ScopeEntry, bool)
- func (s *Scope) ResolveColumnFull(ref *core.ColumnRef) (*ColumnSource, bool)
- type ScopeEntry
- type ScopeType
- type Token
- type TokenType
Constants ¶
const ( ErrUnexpectedToken = "unexpected token %s, expected %s" ErrUnterminatedString = "unterminated string literal" ErrInvalidNumber = "invalid number literal" ErrUnknownColumn = "unknown column %q" ErrUnknownTable = "unknown table or alias %q" ErrAmbiguousColumn = "ambiguous column reference %q" // Dialect-specific error messages ErrUnsupportedClause = "%s is not supported in %s dialect" ErrUnsupportedOperator = "operator %s is not supported in %s dialect" ErrNoClauseHandler = "no handler registered for clause %s" )
Common error messages
const ( SoftKeywordName = "NAME" SoftKeywordValue = "VALUE" // For future PIVOT/UNPIVOT support )
Soft keywords are identifiers that have special meaning in specific contexts. They are not reserved words and can be used as identifiers elsewhere. Example: "NAME" is a soft keyword in "UNION BY NAME" but can still be used as a column name in "SELECT name FROM users".
const ( // Special tokens TOKEN_EOF = token.EOF TOKEN_ILLEGAL = token.ILLEGAL // Literals TOKEN_IDENT = token.IDENT TOKEN_NUMBER = token.NUMBER TOKEN_STRING = token.STRING // Operators TOKEN_PLUS = token.PLUS TOKEN_MINUS = token.MINUS TOKEN_STAR = token.STAR TOKEN_SLASH = token.SLASH TOKEN_MOD = token.MOD TOKEN_DPIPE = token.DPIPE TOKEN_EQ = token.EQ TOKEN_NE = token.NE TOKEN_LT = token.LT TOKEN_GT = token.GT TOKEN_LE = token.LE TOKEN_GE = token.GE TOKEN_DOT = token.DOT TOKEN_COMMA = token.COMMA TOKEN_LPAREN = token.LPAREN TOKEN_RPAREN = token.RPAREN TOKEN_LBRACKET = token.LBRACKET TOKEN_RBRACKET = token.RBRACKET TOKEN_LBRACE = token.LBRACE TOKEN_RBRACE = token.RBRACE TOKEN_COLON = token.COLON TOKEN_ARROW = token.ARROW // Keywords (alphabetical) TOKEN_ALL = token.ALL TOKEN_AND = token.AND TOKEN_AS = token.AS TOKEN_ASC = token.ASC TOKEN_BETWEEN = token.BETWEEN TOKEN_BY = token.BY TOKEN_CASE = token.CASE TOKEN_CAST = token.CAST TOKEN_CROSS = token.CROSS TOKEN_CURRENT = token.CURRENT TOKEN_DESC = token.DESC TOKEN_DISTINCT = token.DISTINCT TOKEN_ELSE = token.ELSE TOKEN_END = token.END TOKEN_EXISTS = token.EXISTS TOKEN_EXCEPT = token.EXCEPT TOKEN_FALSE = token.FALSE TOKEN_FETCH = token.FETCH TOKEN_FILTER = token.FILTER TOKEN_FIRST = token.FIRST TOKEN_FOLLOWING = token.FOLLOWING TOKEN_FROM = token.FROM TOKEN_FULL = token.FULL TOKEN_GROUP = token.GROUP TOKEN_GROUPS = token.GROUPS TOKEN_HAVING = token.HAVING TOKEN_IN = token.IN TOKEN_INNER = token.INNER TOKEN_INTERSECT = token.INTERSECT TOKEN_IS = token.IS TOKEN_JOIN = token.JOIN TOKEN_LAST = token.LAST TOKEN_LATERAL = token.LATERAL TOKEN_LEFT = token.LEFT TOKEN_LIKE = token.LIKE TOKEN_LIMIT = token.LIMIT TOKEN_NATURAL = token.NATURAL TOKEN_NEXT = token.NEXT TOKEN_NOT = token.NOT TOKEN_NULL = token.NULL TOKEN_NULLS = token.NULLS TOKEN_OFFSET = token.OFFSET TOKEN_ON = token.ON TOKEN_ONLY = token.ONLY TOKEN_OR = token.OR TOKEN_ORDER = token.ORDER TOKEN_OUTER = token.OUTER TOKEN_OVER = token.OVER TOKEN_PARTITION = token.PARTITION TOKEN_PERCENT = token.PERCENT TOKEN_PRECEDING = token.PRECEDING TOKEN_RANGE = token.RANGE TOKEN_RECURSIVE = token.RECURSIVE TOKEN_RIGHT = token.RIGHT TOKEN_ROW = token.ROW TOKEN_ROWS = token.ROWS TOKEN_SELECT = token.SELECT TOKEN_THEN = token.THEN TOKEN_TIES = token.TIES TOKEN_TRUE = token.TRUE TOKEN_UNBOUNDED = token.UNBOUNDED TOKEN_UNION = token.UNION TOKEN_USING = token.USING TOKEN_WHEN = token.WHEN TOKEN_WHERE = token.WHERE TOKEN_WINDOW = token.WINDOW TOKEN_WITH = token.WITH TOKEN_WITHIN = token.WITHIN // Template tokens TOKEN_MACRO = token.MACRO )
Variables ¶
var LookupIdent = token.LookupIdent
LookupIdent is re-exported from token package.
Functions ¶
func AddLeadingComment ¶
AddLeadingComment adds a leading comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.
func AddTrailingComment ¶
AddTrailingComment adds a trailing comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.
func GetSpan ¶
GetSpan returns the node's source span. Note: This is now a method on NodeInfo in core, but kept for compatibility.
func ParseWithDialect ¶
ParseWithDialect parses the SQL with a specific dialect and returns the AST.
func ParseWithDialectAndComments ¶
func ParseWithDialectAndComments(sql string, d *core.Dialect) (*core.SelectStmt, []*token.Comment, error)
ParseWithDialectAndComments parses SQL and returns both AST and comments.
Types ¶
type ColumnResolver ¶
type ColumnResolver struct {
// contains filtered or unexported fields
}
ColumnResolver resolves column references within an expression. Used during lineage extraction to find all source columns.
func NewColumnResolver ¶
func NewColumnResolver(scope *Scope, d *core.Dialect) (*ColumnResolver, error)
NewColumnResolver creates a column resolver for the given scope. Returns an error if dialect is nil.
func (*ColumnResolver) CollectColumns ¶
func (cr *ColumnResolver) CollectColumns(expr core.Expr) []*core.ColumnRef
CollectColumns collects all column references from an expression.
func (*ColumnResolver) ExpandStar ¶
func (cr *ColumnResolver) ExpandStar(tableName string) []*core.ColumnRef
ExpandStar expands a star expression to individual column references.
func (*ColumnResolver) ResolveColumnRef ¶
func (cr *ColumnResolver) ResolveColumnRef(ref *core.ColumnRef) (*ColumnSource, bool)
ResolveColumnRef resolves a column reference to its source.
type ColumnSource ¶
type ColumnSource struct {
Table string // Source table name
SourceTable string // Fully qualified source (e.g., schema.table)
Column string // Column name
FromCTE bool // True if from a CTE
FromDerived bool // True if from a derived table
}
ColumnSource represents a resolved source for a column reference.
type FetchClauseData ¶
type FetchClauseData interface {
GetFirst() bool
GetCount() core.Expr
GetPercent() bool
GetWithTies() bool
}
FetchClauseData is an interface for extracting data from dialect-defined FetchClause types.
type Lexer ¶
type Lexer struct {
// Comments collected during lexing (for formatter)
Comments []*token.Comment
// contains filtered or unexported fields
}
Lexer tokenizes SQL input.
func NewLexerWithDialect ¶
NewLexerWithDialect creates a new dialect-aware Lexer for the given input.
type ParseError ¶
ParseError represents a parsing error with position information.
func (*ParseError) Error ¶
func (e *ParseError) Error() string
type Parser ¶
type Parser struct {
// contains filtered or unexported fields
}
Parser parses SQL into an AST.
func (*Parser) Check ¶
Check returns true if the current token is of the given type (implements spi.ParserOps).
func (*Parser) Comments ¶
Comments returns the comments collected during lexing. Call this after parsing to get all comments for the formatter.
func (*Parser) Expect ¶
Expect consumes the current token if it matches, otherwise returns an error (implements spi.ParserOps).
func (*Parser) NextToken ¶
func (p *Parser) NextToken()
NextToken advances to the next token (implements spi.ParserOps).
func (*Parser) ParseExpression ¶
ParseExpression parses an expression (implements spi.ParserOps).
func (*Parser) ParseExpressionList ¶
ParseExpressionList parses a comma-separated list of expressions (implements spi.ParserOps).
func (*Parser) ParseIdentifier ¶
ParseIdentifier parses an identifier (implements spi.ParserOps).
func (*Parser) ParseOrderByList ¶
func (p *Parser) ParseOrderByList() ([]core.OrderByItem, error)
ParseOrderByList parses an ORDER BY list (implements spi.ParserOps).
type ResolutionError ¶
type ResolutionError struct {
Message string
}
ResolutionError represents a column/table resolution error.
func (*ResolutionError) Error ¶
func (e *ResolutionError) Error() string
type ResolveError ¶
type ResolveError struct {
Message string
}
ResolveError represents an error during resolution.
func (*ResolveError) Error ¶
func (e *ResolveError) Error() string
type Resolver ¶
type Resolver struct {
// contains filtered or unexported fields
}
Resolver walks the AST and resolves: - CTE definitions (names and columns) - Table references in FROM clauses - Column references to their source tables - Star expansion (SELECT * and t.*)
func NewResolver ¶
NewResolver creates a new resolver with the given dialect and schema. Returns an error if dialect is nil.
type Schema ¶
Schema maps table names to their columns. Used for SELECT * expansion when schema information is available.
type Scope ¶
type Scope struct {
// contains filtered or unexported fields
}
Scope tracks all available tables, CTEs, and their columns within a query context.
func (*Scope) AllEntries ¶
func (s *Scope) AllEntries() []*ScopeEntry
AllEntries returns all scope entries in the current scope (not including parent).
func (*Scope) ExpandStar ¶
ExpandStar expands a SELECT * to column references. If tableName is empty, expands * for all tables in scope. If tableName is provided, expands only for that table.
Returns nil if the table is not found or has no known columns.
func (*Scope) HasSchemaInfo ¶
HasSchemaInfo returns true if the scope has column information for any table.
func (*Scope) Lookup ¶
func (s *Scope) Lookup(name string) (*ScopeEntry, bool)
Lookup finds a scope entry by name (table name or alias). Searches current scope first, then parent scopes.
func (*Scope) LookupCTE ¶
func (s *Scope) LookupCTE(name string) (*ScopeEntry, bool)
LookupCTE looks up a CTE by name. CTEs are only looked up in parent scopes (they're defined before the main query).
func (*Scope) RegisterCTE ¶
RegisterCTE registers a CTE with its resolved columns.
func (*Scope) RegisterCTEWithSources ¶
RegisterCTEWithSources registers a CTE with its resolved columns and underlying sources.
func (*Scope) RegisterDerived ¶
RegisterDerived registers a derived table (subquery in FROM).
func (*Scope) RegisterDerivedWithSources ¶
func (s *Scope) RegisterDerivedWithSources(alias string, columns []string, underlyingSources []string)
RegisterDerivedWithSources registers a derived table with its underlying sources.
func (*Scope) RegisterTable ¶
RegisterTable registers a physical table from a FROM clause.
func (*Scope) ResolveColumn ¶
func (s *Scope) ResolveColumn(ref *core.ColumnRef) (*ScopeEntry, bool)
ResolveColumn attempts to resolve a column reference to its source table. Returns the scope entry and true if found, nil and false otherwise.
For unqualified columns, it searches all entries in scope. For qualified columns (table.column), it looks up by qualifier.
func (*Scope) ResolveColumnFull ¶
func (s *Scope) ResolveColumnFull(ref *core.ColumnRef) (*ColumnSource, bool)
ResolveColumnFull resolves a column reference and returns full source information.
type ScopeEntry ¶
type ScopeEntry struct {
Type ScopeType
Name string // Original table/CTE name
Alias string // Alias (if any)
Columns []string // Known columns (from schema or derived query)
SourceTable string // For physical tables: fully qualified name (schema.table)
UnderlyingSources []string // For CTEs/derived tables: underlying physical tables
}
ScopeEntry represents a table/CTE/derived table in scope.
func (*ScopeEntry) EffectiveName ¶
func (e *ScopeEntry) EffectiveName() string
EffectiveName returns the name used to reference this entry (alias if present, else name).