parser

package
v0.0.0-...-e6c4605 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 6, 2026 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Overview

Package parser provides SQL parsing with dialect-aware syntax validation.

Usage

stmt, err := parser.ParseWithDialect("SELECT a, b FROM t", myDialect)
if err != nil {
    // handle error
}

The parser requires a dialect to be specified. Use the dialect registry to get a dialect by name:

d, ok := dialect.Get("duckdb")
stmt, err := parser.ParseWithDialect(sql, d)

Grammar Overview

The parser implements a recursive descent parser for a subset of SQL:

statement     → [WITH cte_list] select_body
select_body   → select_core [(UNION|INTERSECT|EXCEPT) [ALL] select_body]
select_core   → SELECT [DISTINCT] select_list FROM from_clause
                [WHERE expr] [GROUP BY expr_list] [HAVING expr]
                [QUALIFY expr] [ORDER BY order_list] [LIMIT expr]

See each file for detailed grammar rules for that section.

Package parser provides SQL parsing and column-level lineage extraction. This file provides token type aliases for convenience.

Index

Constants

View Source
const (
	ErrUnexpectedToken    = "unexpected token %s, expected %s"
	ErrUnterminatedString = "unterminated string literal"
	ErrInvalidNumber      = "invalid number literal"
	ErrUnknownColumn      = "unknown column %q"
	ErrUnknownTable       = "unknown table or alias %q"
	ErrAmbiguousColumn    = "ambiguous column reference %q"

	// Dialect-specific error messages
	ErrUnsupportedClause   = "%s is not supported in %s dialect"
	ErrUnsupportedOperator = "operator %s is not supported in %s dialect"
	ErrNoClauseHandler     = "no handler registered for clause %s"
)

Common error messages

View Source
const (
	SoftKeywordName  = "NAME"
	SoftKeywordValue = "VALUE" // For future PIVOT/UNPIVOT support
)

Soft keywords are identifiers that have special meaning in specific contexts. They are not reserved words and can be used as identifiers elsewhere. Example: "NAME" is a soft keyword in "UNION BY NAME" but can still be used as a column name in "SELECT name FROM users".

View Source
const (
	// Special tokens
	TOKEN_EOF     = token.EOF
	TOKEN_ILLEGAL = token.ILLEGAL

	// Literals
	TOKEN_IDENT  = token.IDENT
	TOKEN_NUMBER = token.NUMBER
	TOKEN_STRING = token.STRING

	// Operators
	TOKEN_PLUS     = token.PLUS
	TOKEN_MINUS    = token.MINUS
	TOKEN_STAR     = token.STAR
	TOKEN_SLASH    = token.SLASH
	TOKEN_MOD      = token.MOD
	TOKEN_DPIPE    = token.DPIPE
	TOKEN_EQ       = token.EQ
	TOKEN_NE       = token.NE
	TOKEN_LT       = token.LT
	TOKEN_GT       = token.GT
	TOKEN_LE       = token.LE
	TOKEN_GE       = token.GE
	TOKEN_DOT      = token.DOT
	TOKEN_COMMA    = token.COMMA
	TOKEN_LPAREN   = token.LPAREN
	TOKEN_RPAREN   = token.RPAREN
	TOKEN_LBRACKET = token.LBRACKET
	TOKEN_RBRACKET = token.RBRACKET
	TOKEN_LBRACE   = token.LBRACE
	TOKEN_RBRACE   = token.RBRACE
	TOKEN_COLON    = token.COLON
	TOKEN_ARROW    = token.ARROW

	// Keywords (alphabetical)
	TOKEN_ALL       = token.ALL
	TOKEN_AND       = token.AND
	TOKEN_AS        = token.AS
	TOKEN_ASC       = token.ASC
	TOKEN_BETWEEN   = token.BETWEEN
	TOKEN_BY        = token.BY
	TOKEN_CASE      = token.CASE
	TOKEN_CAST      = token.CAST
	TOKEN_CROSS     = token.CROSS
	TOKEN_CURRENT   = token.CURRENT
	TOKEN_DESC      = token.DESC
	TOKEN_DISTINCT  = token.DISTINCT
	TOKEN_ELSE      = token.ELSE
	TOKEN_END       = token.END
	TOKEN_EXISTS    = token.EXISTS
	TOKEN_EXCEPT    = token.EXCEPT
	TOKEN_FALSE     = token.FALSE
	TOKEN_FETCH     = token.FETCH
	TOKEN_FILTER    = token.FILTER
	TOKEN_FIRST     = token.FIRST
	TOKEN_FOLLOWING = token.FOLLOWING
	TOKEN_FROM      = token.FROM
	TOKEN_FULL      = token.FULL
	TOKEN_GROUP     = token.GROUP
	TOKEN_GROUPS    = token.GROUPS
	TOKEN_HAVING    = token.HAVING
	TOKEN_IN        = token.IN
	TOKEN_INNER     = token.INNER
	TOKEN_INTERSECT = token.INTERSECT
	TOKEN_IS        = token.IS
	TOKEN_JOIN      = token.JOIN
	TOKEN_LAST      = token.LAST
	TOKEN_LATERAL   = token.LATERAL
	TOKEN_LEFT      = token.LEFT
	TOKEN_LIKE      = token.LIKE
	TOKEN_LIMIT     = token.LIMIT
	TOKEN_NATURAL   = token.NATURAL
	TOKEN_NEXT      = token.NEXT
	TOKEN_NOT       = token.NOT
	TOKEN_NULL      = token.NULL
	TOKEN_NULLS     = token.NULLS
	TOKEN_OFFSET    = token.OFFSET
	TOKEN_ON        = token.ON
	TOKEN_ONLY      = token.ONLY
	TOKEN_OR        = token.OR
	TOKEN_ORDER     = token.ORDER
	TOKEN_OUTER     = token.OUTER
	TOKEN_OVER      = token.OVER
	TOKEN_PARTITION = token.PARTITION
	TOKEN_PERCENT   = token.PERCENT
	TOKEN_PRECEDING = token.PRECEDING
	TOKEN_RANGE     = token.RANGE
	TOKEN_RECURSIVE = token.RECURSIVE
	TOKEN_RIGHT     = token.RIGHT
	TOKEN_ROW       = token.ROW
	TOKEN_ROWS      = token.ROWS
	TOKEN_SELECT    = token.SELECT
	TOKEN_THEN      = token.THEN
	TOKEN_TIES      = token.TIES
	TOKEN_TRUE      = token.TRUE
	TOKEN_UNBOUNDED = token.UNBOUNDED
	TOKEN_UNION     = token.UNION
	TOKEN_USING     = token.USING
	TOKEN_WHEN      = token.WHEN
	TOKEN_WHERE     = token.WHERE
	TOKEN_WINDOW    = token.WINDOW
	TOKEN_WITH      = token.WITH
	TOKEN_WITHIN    = token.WITHIN

	// Template tokens
	TOKEN_MACRO = token.MACRO
)

Variables

View Source
var LookupIdent = token.LookupIdent

LookupIdent is re-exported from token package.

Functions

func AddLeadingComment

func AddLeadingComment(n *core.NodeInfo, c *token.Comment)

AddLeadingComment adds a leading comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func AddTrailingComment

func AddTrailingComment(n *core.NodeInfo, c *token.Comment)

AddTrailingComment adds a trailing comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func GetSpan

func GetSpan(n *core.NodeInfo) token.Span

GetSpan returns the node's source span. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func ParseWithDialect

func ParseWithDialect(sql string, d *core.Dialect) (*core.SelectStmt, error)

ParseWithDialect parses the SQL with a specific dialect and returns the AST.

func ParseWithDialectAndComments

func ParseWithDialectAndComments(sql string, d *core.Dialect) (*core.SelectStmt, []*token.Comment, error)

ParseWithDialectAndComments parses SQL and returns both AST and comments.

Types

type ColumnResolver

type ColumnResolver struct {
	// contains filtered or unexported fields
}

ColumnResolver resolves column references within an expression. Used during lineage extraction to find all source columns.

func NewColumnResolver

func NewColumnResolver(scope *Scope, d *core.Dialect) (*ColumnResolver, error)

NewColumnResolver creates a column resolver for the given scope. Returns an error if dialect is nil.

func (*ColumnResolver) CollectColumns

func (cr *ColumnResolver) CollectColumns(expr core.Expr) []*core.ColumnRef

CollectColumns collects all column references from an expression.

func (*ColumnResolver) ExpandStar

func (cr *ColumnResolver) ExpandStar(tableName string) []*core.ColumnRef

ExpandStar expands a star expression to individual column references.

func (*ColumnResolver) ResolveColumnRef

func (cr *ColumnResolver) ResolveColumnRef(ref *core.ColumnRef) (*ColumnSource, bool)

ResolveColumnRef resolves a column reference to its source.

type ColumnSource

type ColumnSource struct {
	Table       string // Source table name
	SourceTable string // Fully qualified source (e.g., schema.table)
	Column      string // Column name
	FromCTE     bool   // True if from a CTE
	FromDerived bool   // True if from a derived table
}

ColumnSource represents a resolved source for a column reference.

type FetchClauseData

type FetchClauseData interface {
	GetFirst() bool
	GetCount() core.Expr
	GetPercent() bool
	GetWithTies() bool
}

FetchClauseData is an interface for extracting data from dialect-defined FetchClause types.

type LexError

type LexError struct {
	Pos     Position
	Message string
}

LexError represents a lexical analysis error.

func (*LexError) Error

func (e *LexError) Error() string

type Lexer

type Lexer struct {

	// Comments collected during lexing (for formatter)
	Comments []*token.Comment
	// contains filtered or unexported fields
}

Lexer tokenizes SQL input.

func NewLexer

func NewLexer(input string) *Lexer

NewLexer creates a new Lexer for the given input.

func NewLexerWithDialect

func NewLexerWithDialect(input string, d *core.Dialect) *Lexer

NewLexerWithDialect creates a new dialect-aware Lexer for the given input.

func (*Lexer) NextToken

func (l *Lexer) NextToken() Token

NextToken returns the next token.

type ParseError

type ParseError struct {
	Pos     Position
	Message string
}

ParseError represents a parsing error with position information.

func (*ParseError) Error

func (e *ParseError) Error() string

type Parser

type Parser struct {
	// contains filtered or unexported fields
}

Parser parses SQL into an AST.

func NewParser

func NewParser(sql string, d *core.Dialect) *Parser

NewParser creates a new parser for the given SQL input with dialect support.

func (*Parser) AddError

func (p *Parser) AddError(msg string)

AddError adds a parse error (implements spi.ParserOps).

func (*Parser) Check

func (p *Parser) Check(t token.TokenType) bool

Check returns true if the current token is of the given type (implements spi.ParserOps).

func (*Parser) Comments

func (p *Parser) Comments() []*token.Comment

Comments returns the comments collected during lexing. Call this after parsing to get all comments for the formatter.

func (*Parser) Dialect

func (p *Parser) Dialect() *core.Dialect

Dialect returns the parser's dialect, if any.

func (*Parser) Expect

func (p *Parser) Expect(t token.TokenType) error

Expect consumes the current token if it matches, otherwise returns an error (implements spi.ParserOps).

func (*Parser) Match

func (p *Parser) Match(t token.TokenType) bool

Match consumes the current token if it matches (implements spi.ParserOps).

func (*Parser) NextToken

func (p *Parser) NextToken()

NextToken advances to the next token (implements spi.ParserOps).

func (*Parser) ParseExpression

func (p *Parser) ParseExpression() (core.Expr, error)

ParseExpression parses an expression (implements spi.ParserOps).

func (*Parser) ParseExpressionList

func (p *Parser) ParseExpressionList() ([]core.Expr, error)

ParseExpressionList parses a comma-separated list of expressions (implements spi.ParserOps).

func (*Parser) ParseIdentifier

func (p *Parser) ParseIdentifier() (string, error)

ParseIdentifier parses an identifier (implements spi.ParserOps).

func (*Parser) ParseOrderByList

func (p *Parser) ParseOrderByList() ([]core.OrderByItem, error)

ParseOrderByList parses an ORDER BY list (implements spi.ParserOps).

func (*Parser) Peek

func (p *Parser) Peek() token.Token

Peek returns the lookahead token (implements spi.ParserOps).

func (*Parser) Position

func (p *Parser) Position() token.Position

Position returns the current token's position (implements spi.ParserOps).

func (*Parser) Token

func (p *Parser) Token() token.Token

Token returns the current token (implements spi.ParserOps).

type Position

type Position = token.Position

Position is an alias for token.Position.

type ResolutionError

type ResolutionError struct {
	Message string
}

ResolutionError represents a column/table resolution error.

func (*ResolutionError) Error

func (e *ResolutionError) Error() string

type ResolveError

type ResolveError struct {
	Message string
}

ResolveError represents an error during resolution.

func (*ResolveError) Error

func (e *ResolveError) Error() string

type Resolver

type Resolver struct {
	// contains filtered or unexported fields
}

Resolver walks the AST and resolves: - CTE definitions (names and columns) - Table references in FROM clauses - Column references to their source tables - Star expansion (SELECT * and t.*)

func NewResolver

func NewResolver(d *core.Dialect, schema Schema) (*Resolver, error)

NewResolver creates a new resolver with the given dialect and schema. Returns an error if dialect is nil.

func (*Resolver) Resolve

func (r *Resolver) Resolve(stmt *core.SelectStmt) (*Scope, error)

Resolve builds scopes for a SELECT statement and returns the root scope.

type Schema

type Schema map[string][]string

Schema maps table names to their columns. Used for SELECT * expansion when schema information is available.

type Scope

type Scope struct {
	// contains filtered or unexported fields
}

Scope tracks all available tables, CTEs, and their columns within a query context.

func NewScope

func NewScope(d *core.Dialect, schema Schema) (*Scope, error)

NewScope creates a new root scope. Returns an error if dialect is nil.

func (*Scope) AllEntries

func (s *Scope) AllEntries() []*ScopeEntry

AllEntries returns all scope entries in the current scope (not including parent).

func (*Scope) Child

func (s *Scope) Child() *Scope

Child creates a child scope for nested queries (subqueries, derived tables).

func (*Scope) ExpandStar

func (s *Scope) ExpandStar(tableName string) []*core.ColumnRef

ExpandStar expands a SELECT * to column references. If tableName is empty, expands * for all tables in scope. If tableName is provided, expands only for that table.

Returns nil if the table is not found or has no known columns.

func (*Scope) HasSchemaInfo

func (s *Scope) HasSchemaInfo() bool

HasSchemaInfo returns true if the scope has column information for any table.

func (*Scope) Lookup

func (s *Scope) Lookup(name string) (*ScopeEntry, bool)

Lookup finds a scope entry by name (table name or alias). Searches current scope first, then parent scopes.

func (*Scope) LookupCTE

func (s *Scope) LookupCTE(name string) (*ScopeEntry, bool)

LookupCTE looks up a CTE by name. CTEs are only looked up in parent scopes (they're defined before the main query).

func (*Scope) RegisterCTE

func (s *Scope) RegisterCTE(name string, columns []string)

RegisterCTE registers a CTE with its resolved columns.

func (*Scope) RegisterCTEWithSources

func (s *Scope) RegisterCTEWithSources(name string, columns []string, underlyingSources []string)

RegisterCTEWithSources registers a CTE with its resolved columns and underlying sources.

func (*Scope) RegisterDerived

func (s *Scope) RegisterDerived(alias string, columns []string)

RegisterDerived registers a derived table (subquery in FROM).

func (*Scope) RegisterDerivedWithSources

func (s *Scope) RegisterDerivedWithSources(alias string, columns []string, underlyingSources []string)

RegisterDerivedWithSources registers a derived table with its underlying sources.

func (*Scope) RegisterTable

func (s *Scope) RegisterTable(table *core.TableName)

RegisterTable registers a physical table from a FROM clause.

func (*Scope) ResolveColumn

func (s *Scope) ResolveColumn(ref *core.ColumnRef) (*ScopeEntry, bool)

ResolveColumn attempts to resolve a column reference to its source table. Returns the scope entry and true if found, nil and false otherwise.

For unqualified columns, it searches all entries in scope. For qualified columns (table.column), it looks up by qualifier.

func (*Scope) ResolveColumnFull

func (s *Scope) ResolveColumnFull(ref *core.ColumnRef) (*ColumnSource, bool)

ResolveColumnFull resolves a column reference and returns full source information.

type ScopeEntry

type ScopeEntry struct {
	Type              ScopeType
	Name              string   // Original table/CTE name
	Alias             string   // Alias (if any)
	Columns           []string // Known columns (from schema or derived query)
	SourceTable       string   // For physical tables: fully qualified name (schema.table)
	UnderlyingSources []string // For CTEs/derived tables: underlying physical tables
}

ScopeEntry represents a table/CTE/derived table in scope.

func (*ScopeEntry) EffectiveName

func (e *ScopeEntry) EffectiveName() string

EffectiveName returns the name used to reference this entry (alias if present, else name).

type ScopeType

type ScopeType int

ScopeType indicates the type of scope entry.

const (
	// ScopeTable represents a physical table.
	ScopeTable ScopeType = iota
	// ScopeCTE represents a Common Table Expression.
	ScopeCTE
	// ScopeDerived represents a derived table (subquery in FROM).
	ScopeDerived
)

type Token

type Token = token.Token

Token is an alias for token.Token.

func Tokenize

func Tokenize(input string) []Token

Tokenize returns all tokens from the input.

type TokenType

type TokenType = token.TokenType

TokenType is an alias for token.TokenType.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL