parser

package

v0.0.0-...-e6c4605 Latest Latest Go to latest Published: Feb 6, 2026 License: Apache-2.0 Imports: 7 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/leapstack-labs/leapsql

Links

Open Source Insights

Documentation ¶

Overview ¶

Package parser provides SQL parsing with dialect-aware syntax validation.

Usage ¶

stmt, err := parser.ParseWithDialect("SELECT a, b FROM t", myDialect)
if err != nil {
    // handle error
}

The parser requires a dialect to be specified. Use the dialect registry to get a dialect by name:

d, ok := dialect.Get("duckdb")
stmt, err := parser.ParseWithDialect(sql, d)

Grammar Overview ¶

The parser implements a recursive descent parser for a subset of SQL:

statement     → [WITH cte_list] select_body
select_body   → select_core [(UNION|INTERSECT|EXCEPT) [ALL] select_body]
select_core   → SELECT [DISTINCT] select_list FROM from_clause
                [WHERE expr] [GROUP BY expr_list] [HAVING expr]
                [QUALIFY expr] [ORDER BY order_list] [LIMIT expr]

See each file for detailed grammar rules for that section.

Package parser provides SQL parsing and column-level lineage extraction. This file provides token type aliases for convenience.

Index ¶

Constants
Variables
func AddLeadingComment(n *core.NodeInfo, c *token.Comment)
func AddTrailingComment(n *core.NodeInfo, c *token.Comment)
func GetSpan(n *core.NodeInfo) token.Span
func ParseWithDialect(sql string, d *core.Dialect) (*core.SelectStmt, error)
func ParseWithDialectAndComments(sql string, d *core.Dialect) (*core.SelectStmt, []*token.Comment, error)
type ColumnResolver
- func NewColumnResolver(scope *Scope, d *core.Dialect) (*ColumnResolver, error)
- func (cr *ColumnResolver) CollectColumns(expr core.Expr) []*core.ColumnRef
- func (cr *ColumnResolver) ExpandStar(tableName string) []*core.ColumnRef
- func (cr *ColumnResolver) ResolveColumnRef(ref *core.ColumnRef) (*ColumnSource, bool)
type ColumnSource
type FetchClauseData
type LexError
- func (e *LexError) Error() string
type Lexer
- func NewLexer(input string) *Lexer
- func NewLexerWithDialect(input string, d *core.Dialect) *Lexer
- func (l *Lexer) NextToken() Token
type ParseError
- func (e *ParseError) Error() string
type Parser
- func NewParser(sql string, d *core.Dialect) *Parser
- func (p *Parser) AddError(msg string)
- func (p *Parser) Check(t token.TokenType) bool
- func (p *Parser) Comments() []*token.Comment
- func (p *Parser) Dialect() *core.Dialect
- func (p *Parser) Expect(t token.TokenType) error
- func (p *Parser) Match(t token.TokenType) bool
- func (p *Parser) NextToken()
- func (p *Parser) ParseExpression() (core.Expr, error)
- func (p *Parser) ParseExpressionList() ([]core.Expr, error)
- func (p *Parser) ParseIdentifier() (string, error)
- func (p *Parser) ParseOrderByList() ([]core.OrderByItem, error)
- func (p *Parser) Peek() token.Token
- func (p *Parser) Position() token.Position
- func (p *Parser) Token() token.Token
type Position
type ResolutionError
- func (e *ResolutionError) Error() string
type ResolveError
- func (e *ResolveError) Error() string
type Resolver
- func NewResolver(d *core.Dialect, schema Schema) (*Resolver, error)
- func (r *Resolver) Resolve(stmt *core.SelectStmt) (*Scope, error)
type Schema
type Scope
- func NewScope(d *core.Dialect, schema Schema) (*Scope, error)
- func (s *Scope) AllEntries() []*ScopeEntry
- func (s *Scope) Child() *Scope
- func (s *Scope) ExpandStar(tableName string) []*core.ColumnRef
- func (s *Scope) HasSchemaInfo() bool
- func (s *Scope) Lookup(name string) (*ScopeEntry, bool)
- func (s *Scope) LookupCTE(name string) (*ScopeEntry, bool)
- func (s *Scope) RegisterCTE(name string, columns []string)
- func (s *Scope) RegisterCTEWithSources(name string, columns []string, underlyingSources []string)
- func (s *Scope) RegisterDerived(alias string, columns []string)
- func (s *Scope) RegisterDerivedWithSources(alias string, columns []string, underlyingSources []string)
- func (s *Scope) RegisterTable(table *core.TableName)
- func (s *Scope) ResolveColumn(ref *core.ColumnRef) (*ScopeEntry, bool)
- func (s *Scope) ResolveColumnFull(ref *core.ColumnRef) (*ColumnSource, bool)
type ScopeEntry
- func (e *ScopeEntry) EffectiveName() string
type ScopeType
type Token
- func Tokenize(input string) []Token
type TokenType

Constants ¶

View Source

const (
	ErrUnexpectedToken    = "unexpected token %s, expected %s"
	ErrUnterminatedString = "unterminated string literal"
	ErrInvalidNumber      = "invalid number literal"
	ErrUnknownColumn      = "unknown column %q"
	ErrUnknownTable       = "unknown table or alias %q"
	ErrAmbiguousColumn    = "ambiguous column reference %q"

	// Dialect-specific error messages
	ErrUnsupportedClause   = "%s is not supported in %s dialect"
	ErrUnsupportedOperator = "operator %s is not supported in %s dialect"
	ErrNoClauseHandler     = "no handler registered for clause %s"
)

Common error messages

View Source

const (
	SoftKeywordName  = "NAME"
	SoftKeywordValue = "VALUE" // For future PIVOT/UNPIVOT support
)

Soft keywords are identifiers that have special meaning in specific contexts. They are not reserved words and can be used as identifiers elsewhere. Example: "NAME" is a soft keyword in "UNION BY NAME" but can still be used as a column name in "SELECT name FROM users".

View Source

const (
	// Special tokens
	TOKEN_EOF     = token.EOF
	TOKEN_ILLEGAL = token.ILLEGAL

	// Literals
	TOKEN_IDENT  = token.IDENT
	TOKEN_NUMBER = token.NUMBER
	TOKEN_STRING = token.STRING

	// Operators
	TOKEN_PLUS     = token.PLUS
	TOKEN_MINUS    = token.MINUS
	TOKEN_STAR     = token.STAR
	TOKEN_SLASH    = token.SLASH
	TOKEN_MOD      = token.MOD
	TOKEN_DPIPE    = token.DPIPE
	TOKEN_EQ       = token.EQ
	TOKEN_NE       = token.NE
	TOKEN_LT       = token.LT
	TOKEN_GT       = token.GT
	TOKEN_LE       = token.LE
	TOKEN_GE       = token.GE
	TOKEN_DOT      = token.DOT
	TOKEN_COMMA    = token.COMMA
	TOKEN_LPAREN   = token.LPAREN
	TOKEN_RPAREN   = token.RPAREN
	TOKEN_LBRACKET = token.LBRACKET
	TOKEN_RBRACKET = token.RBRACKET
	TOKEN_LBRACE   = token.LBRACE
	TOKEN_RBRACE   = token.RBRACE
	TOKEN_COLON    = token.COLON
	TOKEN_ARROW    = token.ARROW

	// Keywords (alphabetical)
	TOKEN_ALL       = token.ALL
	TOKEN_AND       = token.AND
	TOKEN_AS        = token.AS
	TOKEN_ASC       = token.ASC
	TOKEN_BETWEEN   = token.BETWEEN
	TOKEN_BY        = token.BY
	TOKEN_CASE      = token.CASE
	TOKEN_CAST      = token.CAST
	TOKEN_CROSS     = token.CROSS
	TOKEN_CURRENT   = token.CURRENT
	TOKEN_DESC      = token.DESC
	TOKEN_DISTINCT  = token.DISTINCT
	TOKEN_ELSE      = token.ELSE
	TOKEN_END       = token.END
	TOKEN_EXISTS    = token.EXISTS
	TOKEN_EXCEPT    = token.EXCEPT
	TOKEN_FALSE     = token.FALSE
	TOKEN_FETCH     = token.FETCH
	TOKEN_FILTER    = token.FILTER
	TOKEN_FIRST     = token.FIRST
	TOKEN_FOLLOWING = token.FOLLOWING
	TOKEN_FROM      = token.FROM
	TOKEN_FULL      = token.FULL
	TOKEN_GROUP     = token.GROUP
	TOKEN_GROUPS    = token.GROUPS
	TOKEN_HAVING    = token.HAVING
	TOKEN_IN        = token.IN
	TOKEN_INNER     = token.INNER
	TOKEN_INTERSECT = token.INTERSECT
	TOKEN_IS        = token.IS
	TOKEN_JOIN      = token.JOIN
	TOKEN_LAST      = token.LAST
	TOKEN_LATERAL   = token.LATERAL
	TOKEN_LEFT      = token.LEFT
	TOKEN_LIKE      = token.LIKE
	TOKEN_LIMIT     = token.LIMIT
	TOKEN_NATURAL   = token.NATURAL
	TOKEN_NEXT      = token.NEXT
	TOKEN_NOT       = token.NOT
	TOKEN_NULL      = token.NULL
	TOKEN_NULLS     = token.NULLS
	TOKEN_OFFSET    = token.OFFSET
	TOKEN_ON        = token.ON
	TOKEN_ONLY      = token.ONLY
	TOKEN_OR        = token.OR
	TOKEN_ORDER     = token.ORDER
	TOKEN_OUTER     = token.OUTER
	TOKEN_OVER      = token.OVER
	TOKEN_PARTITION = token.PARTITION
	TOKEN_PERCENT   = token.PERCENT
	TOKEN_PRECEDING = token.PRECEDING
	TOKEN_RANGE     = token.RANGE
	TOKEN_RECURSIVE = token.RECURSIVE
	TOKEN_RIGHT     = token.RIGHT
	TOKEN_ROW       = token.ROW
	TOKEN_ROWS      = token.ROWS
	TOKEN_SELECT    = token.SELECT
	TOKEN_THEN      = token.THEN
	TOKEN_TIES      = token.TIES
	TOKEN_TRUE      = token.TRUE
	TOKEN_UNBOUNDED = token.UNBOUNDED
	TOKEN_UNION     = token.UNION
	TOKEN_USING     = token.USING
	TOKEN_WHEN      = token.WHEN
	TOKEN_WHERE     = token.WHERE
	TOKEN_WINDOW    = token.WINDOW
	TOKEN_WITH      = token.WITH
	TOKEN_WITHIN    = token.WITHIN

	// Template tokens
	TOKEN_MACRO = token.MACRO
)

Variables ¶

View Source

var LookupIdent = token.LookupIdent

LookupIdent is re-exported from token package.

Functions ¶

func AddLeadingComment ¶

func AddLeadingComment(n *core.NodeInfo, c *token.Comment)

AddLeadingComment adds a leading comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func AddTrailingComment ¶

func AddTrailingComment(n *core.NodeInfo, c *token.Comment)

AddTrailingComment adds a trailing comment to the node. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func GetSpan ¶

func GetSpan(n *core.NodeInfo) token.Span

GetSpan returns the node's source span. Note: This is now a method on NodeInfo in core, but kept for compatibility.

func ParseWithDialect ¶

func ParseWithDialect(sql string, d *core.Dialect) (*core.SelectStmt, error)

ParseWithDialect parses the SQL with a specific dialect and returns the AST.

func ParseWithDialectAndComments ¶

func ParseWithDialectAndComments(sql string, d *core.Dialect) (*core.SelectStmt, []*token.Comment, error)

ParseWithDialectAndComments parses SQL and returns both AST and comments.

Types ¶

type ColumnResolver ¶

type ColumnResolver struct {
	// contains filtered or unexported fields
}

ColumnResolver resolves column references within an expression. Used during lineage extraction to find all source columns.

func NewColumnResolver ¶

func NewColumnResolver(scope *Scope, d *core.Dialect) (*ColumnResolver, error)

NewColumnResolver creates a column resolver for the given scope. Returns an error if dialect is nil.

func (*ColumnResolver) CollectColumns ¶

func (cr *ColumnResolver) CollectColumns(expr core.Expr) []*core.ColumnRef

CollectColumns collects all column references from an expression.

func (*ColumnResolver) ExpandStar ¶

func (cr *ColumnResolver) ExpandStar(tableName string) []*core.ColumnRef

ExpandStar expands a star expression to individual column references.

func (*ColumnResolver) ResolveColumnRef ¶

func (cr *ColumnResolver) ResolveColumnRef(ref *core.ColumnRef) (*ColumnSource, bool)

ResolveColumnRef resolves a column reference to its source.

type ColumnSource ¶

type ColumnSource struct {
	Table       string // Source table name
	SourceTable string // Fully qualified source (e.g., schema.table)
	Column      string // Column name
	FromCTE     bool   // True if from a CTE
	FromDerived bool   // True if from a derived table
}

ColumnSource represents a resolved source for a column reference.

type FetchClauseData ¶

type FetchClauseData interface {
	GetFirst() bool
	GetCount() core.Expr
	GetPercent() bool
	GetWithTies() bool
}

FetchClauseData is an interface for extracting data from dialect-defined FetchClause types.

type LexError ¶

type LexError struct {
	Pos     Position
	Message string
}

LexError represents a lexical analysis error.

func (*LexError) Error ¶

func (e *LexError) Error() string

type Lexer ¶

type Lexer struct {

	// Comments collected during lexing (for formatter)
	Comments []*token.Comment
	// contains filtered or unexported fields
}

Lexer tokenizes SQL input.

func NewLexer ¶

func NewLexer(input string) *Lexer

NewLexer creates a new Lexer for the given input.

func NewLexerWithDialect ¶

func NewLexerWithDialect(input string, d *core.Dialect) *Lexer

NewLexerWithDialect creates a new dialect-aware Lexer for the given input.

func (*Lexer) NextToken ¶

func (l *Lexer) NextToken() Token

NextToken returns the next token.

type ParseError ¶

type ParseError struct {
	Pos     Position
	Message string
}

ParseError represents a parsing error with position information.

func (*ParseError) Error ¶

func (e *ParseError) Error() string

type Parser ¶

type Parser struct {
	// contains filtered or unexported fields
}

Parser parses SQL into an AST.

func NewParser ¶

func NewParser(sql string, d *core.Dialect) *Parser

NewParser creates a new parser for the given SQL input with dialect support.

func (*Parser) AddError ¶

func (p *Parser) AddError(msg string)

AddError adds a parse error (implements spi.ParserOps).

func (*Parser) Check ¶

func (p *Parser) Check(t token.TokenType) bool

Check returns true if the current token is of the given type (implements spi.ParserOps).

func (*Parser) Comments ¶

func (p *Parser) Comments() []*token.Comment

Comments returns the comments collected during lexing. Call this after parsing to get all comments for the formatter.

func (*Parser) Dialect ¶

func (p *Parser) Dialect() *core.Dialect

Dialect returns the parser's dialect, if any.

func (*Parser) Expect ¶

func (p *Parser) Expect(t token.TokenType) error

Expect consumes the current token if it matches, otherwise returns an error (implements spi.ParserOps).

func (*Parser) Match ¶

func (p *Parser) Match(t token.TokenType) bool

Match consumes the current token if it matches (implements spi.ParserOps).

func (*Parser) NextToken ¶

func (p *Parser) NextToken()

NextToken advances to the next token (implements spi.ParserOps).

func (*Parser) ParseExpression ¶

func (p *Parser) ParseExpression() (core.Expr, error)

ParseExpression parses an expression (implements spi.ParserOps).

func (*Parser) ParseExpressionList ¶

func (p *Parser) ParseExpressionList() ([]core.Expr, error)

ParseExpressionList parses a comma-separated list of expressions (implements spi.ParserOps).

func (*Parser) ParseIdentifier ¶

func (p *Parser) ParseIdentifier() (string, error)

ParseIdentifier parses an identifier (implements spi.ParserOps).

func (*Parser) ParseOrderByList ¶

func (p *Parser) ParseOrderByList() ([]core.OrderByItem, error)

ParseOrderByList parses an ORDER BY list (implements spi.ParserOps).

func (*Parser) Peek ¶

func (p *Parser) Peek() token.Token

Peek returns the lookahead token (implements spi.ParserOps).

func (*Parser) Position ¶

func (p *Parser) Position() token.Position

Position returns the current token's position (implements spi.ParserOps).

func (*Parser) Token ¶

func (p *Parser) Token() token.Token

Token returns the current token (implements spi.ParserOps).

type Position ¶

type Position = token.Position

Position is an alias for token.Position.

type ResolutionError ¶

type ResolutionError struct {
	Message string
}

ResolutionError represents a column/table resolution error.

func (*ResolutionError) Error ¶

func (e *ResolutionError) Error() string

type ResolveError ¶

type ResolveError struct {
	Message string
}

ResolveError represents an error during resolution.

func (*ResolveError) Error ¶

func (e *ResolveError) Error() string

type Resolver ¶

type Resolver struct {
	// contains filtered or unexported fields
}

Resolver walks the AST and resolves: - CTE definitions (names and columns) - Table references in FROM clauses - Column references to their source tables - Star expansion (SELECT * and t.*)

func NewResolver ¶

func NewResolver(d *core.Dialect, schema Schema) (*Resolver, error)

NewResolver creates a new resolver with the given dialect and schema. Returns an error if dialect is nil.

func (*Resolver) Resolve ¶

func (r *Resolver) Resolve(stmt *core.SelectStmt) (*Scope, error)

Resolve builds scopes for a SELECT statement and returns the root scope.

type Schema ¶

type Schema map[string][]string

Schema maps table names to their columns. Used for SELECT * expansion when schema information is available.

type Scope ¶

type Scope struct {
	// contains filtered or unexported fields
}

Scope tracks all available tables, CTEs, and their columns within a query context.

func NewScope ¶

func NewScope(d *core.Dialect, schema Schema) (*Scope, error)

NewScope creates a new root scope. Returns an error if dialect is nil.

func (*Scope) AllEntries ¶

func (s *Scope) AllEntries() []*ScopeEntry

AllEntries returns all scope entries in the current scope (not including parent).

func (*Scope) Child ¶

func (s *Scope) Child() *Scope

Child creates a child scope for nested queries (subqueries, derived tables).

func (*Scope) ExpandStar ¶

func (s *Scope) ExpandStar(tableName string) []*core.ColumnRef

ExpandStar expands a SELECT * to column references. If tableName is empty, expands * for all tables in scope. If tableName is provided, expands only for that table.

Returns nil if the table is not found or has no known columns.

func (*Scope) HasSchemaInfo ¶

func (s *Scope) HasSchemaInfo() bool

HasSchemaInfo returns true if the scope has column information for any table.

func (*Scope) Lookup ¶

func (s *Scope) Lookup(name string) (*ScopeEntry, bool)

Lookup finds a scope entry by name (table name or alias). Searches current scope first, then parent scopes.

func (*Scope) LookupCTE ¶

func (s *Scope) LookupCTE(name string) (*ScopeEntry, bool)

LookupCTE looks up a CTE by name. CTEs are only looked up in parent scopes (they're defined before the main query).

func (*Scope) RegisterCTE ¶

func (s *Scope) RegisterCTE(name string, columns []string)

RegisterCTE registers a CTE with its resolved columns.

func (*Scope) RegisterCTEWithSources ¶

func (s *Scope) RegisterCTEWithSources(name string, columns []string, underlyingSources []string)

RegisterCTEWithSources registers a CTE with its resolved columns and underlying sources.

func (*Scope) RegisterDerived ¶

func (s *Scope) RegisterDerived(alias string, columns []string)

RegisterDerived registers a derived table (subquery in FROM).

func (*Scope) RegisterDerivedWithSources ¶

func (s *Scope) RegisterDerivedWithSources(alias string, columns []string, underlyingSources []string)

RegisterDerivedWithSources registers a derived table with its underlying sources.

func (*Scope) RegisterTable ¶

func (s *Scope) RegisterTable(table *core.TableName)

RegisterTable registers a physical table from a FROM clause.

func (*Scope) ResolveColumn ¶

func (s *Scope) ResolveColumn(ref *core.ColumnRef) (*ScopeEntry, bool)

ResolveColumn attempts to resolve a column reference to its source table. Returns the scope entry and true if found, nil and false otherwise.

For unqualified columns, it searches all entries in scope. For qualified columns (table.column), it looks up by qualifier.

func (*Scope) ResolveColumnFull ¶

func (s *Scope) ResolveColumnFull(ref *core.ColumnRef) (*ColumnSource, bool)

ResolveColumnFull resolves a column reference and returns full source information.

type ScopeEntry ¶

type ScopeEntry struct {
	Type              ScopeType
	Name              string   // Original table/CTE name
	Alias             string   // Alias (if any)
	Columns           []string // Known columns (from schema or derived query)
	SourceTable       string   // For physical tables: fully qualified name (schema.table)
	UnderlyingSources []string // For CTEs/derived tables: underlying physical tables
}

ScopeEntry represents a table/CTE/derived table in scope.

func (*ScopeEntry) EffectiveName ¶

func (e *ScopeEntry) EffectiveName() string

EffectiveName returns the name used to reference this entry (alias if present, else name).

type ScopeType ¶

type ScopeType int

ScopeType indicates the type of scope entry.

const (
	// ScopeTable represents a physical table.
	ScopeTable ScopeType = iota
	// ScopeCTE represents a Common Table Expression.
	ScopeCTE
	// ScopeDerived represents a derived table (subquery in FROM).
	ScopeDerived
)

type Token ¶

type Token = token.Token

Token is an alias for token.Token.

func Tokenize ¶

func Tokenize(input string) []Token

Tokenize returns all tokens from the input.

type TokenType ¶

type TokenType = token.TokenType

TokenType is an alias for token.TokenType.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL