peg

package module
v0.0.0-...-5acc593 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 17, 2025 License: MIT Imports: 5 Imported by: 2

README

go-peg

NOTE: This library is deprecated. Please use cpp-peglib instead.

Yet another PEG (Parsing Expression Grammars) parser generator for Go.

If you need a PEG grammar checker, you may want to check peglint.

If you need a C++ version, please see cpp-peglib.

Extended features
  • Token operator: < >
  • Automatic whitespace skipping: %whitespace
  • Expression parsing for binary operators (precedence climbing method)
  • Parameterized rule or Macro
  • Word expression: %word
  • AST generation
  • Enhanced error reporting with suggestions
  • Error recovery during parsing
  • Detailed tracing for debugging
Usage
// Create a PEG parser
parser, _ := NewParser(`
    # Simple calculator
    EXPR         ←  ATOM (BINOP ATOM)*
    ATOM         ←  NUMBER / '(' EXPR ')'
    BINOP        ←  < [-+/*] >
    NUMBER       ←  < [0-9]+ >
    %whitespace  ←  [ \t]*
    ---
    # Expression parsing option
    %expr  = EXPR   # Rule to apply 'precedence climbing method' to
    %binop = L + -  # Precedence level 1
    %binop = L * /  # Precedence level 2
`)

// Setup semantic actions
g := parser.Grammar
g["EXPR"].Action = func(v *Values, d Any) (Any, error) {
    val := v.ToInt(0)
    if v.Len() > 1 {
        ope := v.ToStr(1)
        rhs := v.ToInt(2)
        switch ope {
        case "+": val += rhs
        case "-": val -= rhs
        case "*": val *= rhs
        case "/": val /= rhs
        }
    }
    return val, nil
}
g["BINOP"].Action = func(v *Values, d Any) (Any, error) {
    return v.Token(), nil
}
g["NUMBER"].Action = func(v *Values, d Any) (Any, error) {
    return strconv.Atoi(v.Token())
}

// Parse
input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "
val, _ := parser.ParseAndGetValue(input, nil)

fmt.Println(val) // Output: -3

Parameterized Rule or Macro

# Syntax
Start      ← _ Expr
Expr       ← Sum
Sum        ← List(Product, SumOpe)
Product    ← List(Value, ProOpe)
Value      ← Number / T('(') Expr T(')')

# Token
SumOpe     ← T('+' / '-')
ProOpe     ← T('*' / '/')
Number     ← T([0-9]+)
~_         ← [ \t\r\n]*

# Macro
List(I, D) ← I (D I)*
T(x)       ← < x > _

Word expression

parser, _ := NewParser(`
    ROOT         ←  'hello' 'world'
    %whitespace  ←  [ \t\r\n]*
    %word        ←  [a-z]+
`)

parser.Parse("hello world", nil) # OK
parser.Parse("helloworld", nil)  # NG

AST generation

// Create a PEG parser
parser, _ := NewParser(`
    EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
    TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
    FACTOR           <-  NUMBER / '(' EXPRESSION ')'
    TERM_OPERATOR    <-  < [-+] >
    FACTOR_OPERATOR  <-  < [/*] >
    NUMBER           <-  < [0-9]+ >
    %whitespace      <-  [ \t\r\n]*
`)

// Evaluator
var eval func(ast *Ast) int
eval = func(ast *Ast) int {
    if ast.Name == "NUMBER" {
        val, _ := strconv.Atoi(ast.Token)
        return val
    } else {
        nodes := ast.Nodes
        val := eval(nodes[0])
        for i := 1; i < len(nodes); i += 2 {
            num := eval(nodes[i+1])
            ope := nodes[i].Token[0]
            switch ope {
            case '+':
                val += num
                break
            case '-':
                val -= num
                break
            case '*':
                val *= num
                break
            case '/':
                val /= num
                break
            }
        }
        return val
    }
}

// Generate AST
parser.EnableAst()
input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "
ret, _ := parser.ParseAndGetValue(input, nil)
ast := ret.(*Ast)

// Optimize AST
opt := NewAstOptimizer(nil)
ast = opt.Optimize(ast, nil)

// Evaluate AST
val := eval(ast)

fmt.Println(val) // Output: -3

Error Reporting and Recovery

The library provides enhanced error reporting with detailed context and suggestions:

parser, _ := NewParser(grammar)
_, err := parser.ParseAndGetValue(input, nil)
if err != nil {
    fmt.Println(err) // Shows detailed error with line/column and pointer
    
    // For syntax errors, you can get suggestions
    if syntaxErr, ok := err.(*SyntaxError); ok {
        for _, suggestion := range syntaxErr.GetSuggestions() {
            fmt.Println("- " + suggestion)
        }
    }
}

You can also enable error recovery to continue parsing after errors:

parser, _ := NewParser(grammar)
parser.RecoveryEnabled = true
parser.MaxErrors = 10 // Maximum number of errors to report
val, errs := parser.ParseAndGetValueWithRecovery(input, nil)
for _, err := range errs {
    fmt.Println(err)
}

Tracing for Debugging

Enable detailed tracing to debug your grammar:

parser, _ := NewParser(grammar)
parser.EnableTracing(&TracingOptions{
    ShowRuleEntry:    true,
    ShowRuleExit:     true,
    ShowTokens:       false,
    ShowErrorContext: true,
    OutputFormat:     "text",
})

Using peglint

The peglint tool now supports error recovery and better error reporting:

# Basic usage
peglint grammar.peg -f source.txt

# With error recovery
peglint -recovery -max-errors 5 grammar.peg -f source.txt

# With tracing
peglint -trace grammar.peg -f source.txt

TODO

  • Memoization (Packrat parsing)

License

MIT license (© 2016 Yuji Hirose)

Documentation

Overview

Example
// Create a PEG parser
parser, _ := NewParser(`
        # Grammar for simple calculator...
        EXPRESSION       <-  _ TERM (TERM_OPERATOR TERM)*
        TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
        FACTOR           <-  NUMBER / '(' _ EXPRESSION ')' _
        TERM_OPERATOR    <-  < [-+] > _
        FACTOR_OPERATOR  <-  < [/*] > _
        NUMBER           <-  < [0-9]+ > _
		~_               <-  [ \t]*
    `)

// Setup actions
reduce := func(v *Values, d Any) (Any, error) {
	val := v.ToInt(0)
	for i := 1; i < len(v.Vs); i += 2 {
		num := v.ToInt(i + 1)
		switch v.ToStr(i) {
		case "+":
			val += num
		case "-":
			val -= num
		case "*":
			val *= num
		case "/":
			val /= num
		}
	}
	return val, nil
}

g := parser.Grammar
g["EXPRESSION"].Action = reduce
g["TERM"].Action = reduce
g["TERM_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
g["FACTOR_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) }

// Parse
input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "
val, _ := parser.ParseAndGetValue(input, nil)

fmt.Println(val)
Output:

-3
Example (Combinators)
// Grammar
var EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER Rule

EXPRESSION.Ope = Seq(&TERM, Zom(Seq(&TERM_OPERATOR, &TERM)))
TERM.Ope = Seq(&FACTOR, Zom(Seq(&FACTOR_OPERATOR, &FACTOR)))
FACTOR.Ope = Cho(&NUMBER, Seq(Lit("("), &EXPRESSION, Lit(")")))
TERM_OPERATOR.Ope = Seq(Tok(Cls("-+")))
FACTOR_OPERATOR.Ope = Seq(Tok(Cls("/*")))
NUMBER.Ope = Seq(Tok(Oom(Cls("0-9"))))

EXPRESSION.WhitespaceOpe = Zom(Cls(" \t"))

// Actions
reduce := func(v *Values, d Any) (Any, error) {
	ret := v.ToInt(0)
	for i := 1; i < len(v.Vs); i += 2 {
		ope := v.ToStr(i)
		n := v.ToInt(i + 1)
		switch ope {
		case "+":
			ret += n
		case "-":
			ret -= n
		case "*":
			ret *= n
		case "/":
			ret /= n
		}
	}
	return ret, nil
}

EXPRESSION.Action = reduce
TERM.Action = reduce
TERM_OPERATOR.Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
FACTOR_OPERATOR.Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
NUMBER.Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) }

// Parse
l, v, _ := EXPRESSION.Parse(" (1 + 2 * (3 + 4)) / 5 - 6 ", nil)

fmt.Println(l)
fmt.Println(v)
Output:

27
-3
Example (ExpressionParsing)
// Create a PEG parser
parser, _ := NewParser(`
        # Grammar for simple calculator...
        EXPRESSION   <-  ATOM (BINOP ATOM)*
        ATOM         <-  NUMBER / '(' EXPRESSION ')'
        BINOP        <-  < [-+/*] >
        NUMBER       <-  < [0-9]+ >
		%whitespace  <-  [ \t]*
		---
        # Expression parsing
		%expr  = EXPRESSION
		%binop = L + -  # level 1
		%binop = L * /  # level 2
    `)

// Setup actions
g := parser.Grammar
g["EXPRESSION"].Action = func(v *Values, d Any) (Any, error) {
	val := v.ToInt(0)
	if v.Len() > 1 {
		rhs := v.ToInt(2)
		ope := v.ToStr(1)
		switch ope {
		case "+":
			val += rhs
		case "-":
			val -= rhs
		case "*":
			val *= rhs
		case "/":
			val /= rhs
		}
	}
	return val, nil
}
g["BINOP"].Action = func(v *Values, d Any) (Any, error) {
	return v.Token(), nil
}
g["NUMBER"].Action = func(v *Values, d Any) (Any, error) {
	return strconv.Atoi(v.Token())
}

// Parse
input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "
val, _ := parser.ParseAndGetValue(input, nil)

fmt.Println(val)
Output:

-3
Example (Whitespace)
// Create a PEG parser
parser, _ := NewParser(`
        # Grammar for simple calculator...
        EXPRESSION       <-  TERM (TERM_OPERATOR TERM)*
        TERM             <-  FACTOR (FACTOR_OPERATOR FACTOR)*
        FACTOR           <-  NUMBER / '(' EXPRESSION ')'
        TERM_OPERATOR    <-  < [-+] >
        FACTOR_OPERATOR  <-  < [/*] >
        NUMBER           <-  < [0-9]+ >
		%whitespace      <-  [ \t]*
    `)

// Setup actions
reduce := func(v *Values, d Any) (Any, error) {
	val := v.ToInt(0)
	for i := 1; i < len(v.Vs); i += 2 {
		num := v.ToInt(i + 1)
		switch v.ToStr(i) {
		case "+":
			val += num
		case "-":
			val -= num
		case "*":
			val *= num
		case "/":
			val /= num
		}
	}
	return val, nil
}

g := parser.Grammar
g["EXPRESSION"].Action = reduce
g["TERM"].Action = reduce
g["TERM_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
g["FACTOR_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil }
g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) }

// Parse
input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 "
val, _ := parser.ParseAndGetValue(input, nil)

fmt.Println(val)
Output:

-3

Index

Examples

Constants

View Source
const (
	WhitespceRuleName = "%whitespace"
	WordRuleName      = "%word"
	OptExpressionRule = "%expr"
	OptBinaryOperator = "%binop"
)

Variables

This section is empty.

Functions

func Apd

func Apd(ope operator) operator

func Cho

func Cho(opes ...operator) operator

func ChoCore

func ChoCore(opes []operator) operator

func Cls

func Cls(chars string) operator

func Dot

func Dot() operator

func EnableExpressionParsing

func EnableExpressionParsing(p *Parser, name string, bopinf BinOpeInfo) error

func Exp

func Exp(atom operator, binop operator, bopinf BinOpeInfo, action *Action) operator

func Ign

func Ign(ope operator) operator

func Lit

func Lit(lit string) operator

func Npd

func Npd(ope operator) operator

func Oom

func Oom(ope operator) operator

func Opt

func Opt(ope operator) operator

func Ref

func Ref(ident string, args []operator, pos int) operator

func Seq

func Seq(opes ...operator) operator

func SeqCore

func SeqCore(opes []operator) operator

func Tok

func Tok(ope operator) operator

func Usr

func Usr(fn func(s string, p int, v *Values, d Any) int) operator

func Wsp

func Wsp(ope operator) operator

func Zom

func Zom(ope operator) operator

Types

type Action

type Action func(v *Values, d Any) (Any, error)

Action

type Any

type Any interface {
}

Any

type Ast

type Ast struct {
	//Path  string
	Ln     int
	Col    int
	S      string
	Name   string
	Token  string
	Nodes  []*Ast
	Parent *Ast
	Data   interface{}
}

func (*Ast) String

func (ast *Ast) String() string

type AstOptimizer

type AstOptimizer struct {
	// contains filtered or unexported fields
}

func NewAstOptimizer

func NewAstOptimizer(exceptions []string) *AstOptimizer

func (*AstOptimizer) Optimize

func (o *AstOptimizer) Optimize(org *Ast, par *Ast) *Ast

type BinOpeInfo

type BinOpeInfo map[string]struct {
	// contains filtered or unexported fields
}

type Error

type Error struct {
	Details []ErrorDetail
	Type    ErrorType
}

Error

func (*Error) Error

func (e *Error) Error() string

func (*Error) GetSuggestions

func (e *Error) GetSuggestions() []string

GetSuggestions provides helpful suggestions based on error context

type ErrorDetail

type ErrorDetail struct {
	Ln   int
	Col  int
	Msg  string
	Line string
}

Error detail

func (ErrorDetail) String

func (d ErrorDetail) String() string

type ErrorType

type ErrorType int

Error types

const (
	SyntaxErrorType ErrorType = iota
	GrammarErrorType
	SemanticErrorType
)

type GrammarError

type GrammarError struct {
	BaseError Error
	RuleName  string
	ErrorType string // e.g., "left recursion", "undefined rule", etc.
}

func (*GrammarError) Error

func (e *GrammarError) Error() string

Implement the error interface for GrammarError

type Parser

type Parser struct {
	Grammar map[string]*Rule

	TracerEnter     func(name string, s string, v *Values, d Any, p int)
	TracerLeave     func(name string, s string, v *Values, d Any, p int, l int)
	RecoveryEnabled bool            // Enable error recovery
	MaxErrors       int             // Maximum number of errors to report before stopping
	TracingOptions  *TracingOptions // Options for tracing
	// contains filtered or unexported fields
}

Parser

func NewParser

func NewParser(s string) (p *Parser, err error)

func NewParserWithUserRules

func NewParserWithUserRules(s string, rules map[string]operator) (p *Parser, err error)

func (*Parser) EnableAst

func (p *Parser) EnableAst() (err error)

func (*Parser) EnableTracing

func (p *Parser) EnableTracing(options *TracingOptions)

EnableTracing sets up tracing with the specified options

func (*Parser) Parse

func (p *Parser) Parse(s string, d Any) (err error)

func (*Parser) ParseAndGetAst

func (p *Parser) ParseAndGetAst(s string, d Any) (*Ast, error)

func (*Parser) ParseAndGetValue

func (p *Parser) ParseAndGetValue(s string, d Any) (val Any, err error)

func (*Parser) ParseAndGetValueWithRecovery

func (p *Parser) ParseAndGetValueWithRecovery(s string, d Any) (val Any, errs []error)

ParseAndGetValueWithRecovery parses the input string with error recovery and returns the value

func (*Parser) ParseWithRecovery

func (p *Parser) ParseWithRecovery(s string, d Any) (errs []error)

ParseWithRecovery parses the input string with error recovery

type Rule

type Rule struct {
	Name          string
	SS            string
	Pos           int
	Ope           operator
	Action        Action
	Enter         func(d Any)
	Leave         func(d Any)
	Message       func() (message string)
	Ignore        bool
	WhitespaceOpe operator
	WordOpe       operator

	Parameters []string

	TracerEnter func(name string, s string, v *Values, d Any, p int)
	TracerLeave func(name string, s string, v *Values, d Any, p int, l int)
	// contains filtered or unexported fields
}

Rule

func (*Rule) Label

func (o *Rule) Label() string

func (*Rule) Parse

func (r *Rule) Parse(s string, d Any) (l int, val Any, err error)

type SyntaxError

type SyntaxError struct {
	BaseError Error
	Expected  []string
}

Create more specific error types

func (*SyntaxError) Error

func (e *SyntaxError) Error() string

Implement the error interface for SyntaxError

func (*SyntaxError) GetSuggestions

func (e *SyntaxError) GetSuggestions() []string

GetSuggestions provides helpful suggestions based on error context for SyntaxError

type Token

type Token struct {
	Pos int
	S   string
}

Token

type TracingOptions

type TracingOptions struct {
	ShowRuleEntry    bool   // Show when entering a rule
	ShowRuleExit     bool   // Show when exiting a rule
	ShowTokens       bool   // Show tokens as they are parsed
	ShowErrorContext bool   // Show context around errors
	OutputFormat     string // Format for output: "text" or "json"
}

TracingOptions defines the configuration for parser tracing

type Values

type Values struct {
	SS     string
	Vs     []Any
	Pos    int
	S      string
	Choice int
	Ts     []Token
}

Semantic values

func (*Values) Len

func (v *Values) Len() int

func (*Values) ToBool

func (v *Values) ToBool(i int) bool

func (*Values) ToFloat32

func (v *Values) ToFloat32(i int) float32

func (*Values) ToFloat64

func (v *Values) ToFloat64(i int) float64

func (*Values) ToInt

func (v *Values) ToInt(i int) int

func (*Values) ToOpe

func (v *Values) ToOpe(i int) operator

func (*Values) ToStr

func (v *Values) ToStr(i int) string

func (*Values) Token

func (v *Values) Token() string

Directories

Path Synopsis
cmd
peglint command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL