parsercombinator

package module
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 30, 2025 License: Apache-2.0 Imports: 6 Imported by: 4

README

Parser Combinator Library for Go

A powerful and flexible parser combinator library for Go, specifically designed for building parsers that work with pre-tokenized input to construct Abstract Syntax Trees (ASTs).

Features

  • Token-based parsing: Works with pre-tokenized input rather than raw strings
  • Type-safe: Leverages Go's generics for type safety
  • Comprehensive error handling: Advanced error reporting with custom messages
  • Debugging support: Built-in tracing capabilities
  • Recovery mechanisms: Error recovery for robust parsing
  • Lookahead support: Positive and negative lookahead operations
  • Composable: Easy to combine simple parsers into complex ones

Installation

go get github.com/shibukawa/parsercombinator

Quick Start

package main

import (
    "fmt"
    pc "github.com/shibukawa/parsercombinator"
)

// Define a simple calculator parser
func main() {
    // Create basic parsers
    digit := pc.Trace("digit", func(pctx *pc.ParseContext[int], src []pc.Token[int]) (int, []pc.Token[int], error) {
        if src[0].Type == "raw" {
            i, err := strconv.Atoi(src[0].Raw)
            if err != nil {
                return 0, nil, pc.NewErrNotMatch("integer", src[0].Raw, src[0].Pos)
            }
            return 1, []pc.Token[int]{{Type: "digit", Pos: src[0].Pos, Val: i}}, nil
        }
        return 0, nil, pc.NewErrNotMatch("digit", src[0].Type, src[0].Pos)
    })

    operator := pc.Trace("operator", func(pctx *pc.ParseContext[int], src []pc.Token[int]) (int, []pc.Token[int], error) {
        if src[0].Type == "raw" && (src[0].Raw == "+" || src[0].Raw == "-") {
            return 1, []pc.Token[int]{{Type: "operator", Pos: src[0].Pos, Raw: src[0].Raw}}, nil
        }
        return 0, nil, pc.NewErrNotMatch("operator", src[0].Raw, src[0].Pos)
    })

    // Combine parsers
    expression := pc.Seq(digit, operator, digit)

    // Parse input
    context := pc.NewParseContext[int]()
    result, err := pc.EvaluateWithRawTokens(context, []string{"5", "+", "3"}, expression)
    if err != nil {
        fmt.Printf("Error: %v\n", err)
        return
    }
    
    fmt.Printf("Result: %v\n", result) // [5, 3] (operator values are 0)
}

Core Components

Parser Function

The core type is Parser[T]:

type Parser[T any] func(*ParseContext[T], []Token[T]) (consumed int, newTokens []Token[T], err error)
Token Structure
type Token[T any] struct {
    Type string  // Token type identifier
    Pos  *Pos    // Position information
    Raw  string  // Original raw text
    Val  T       // Parsed value
}
Parse Context
type ParseContext[T any] struct {
    Tokens         []Token[T]     // Input tokens
    Pos            int            // Current position
    RemainedTokens []Token[T]     // Remaining tokens after parsing
    Results        []Token[T]     // Parsed result tokens
    Traces         []*TraceInfo   // Debug traces
    Errors         []*ParseError  // Collected errors
    TraceEnable    bool           // Enable/disable tracing
}

Basic Combinators

Sequence (Seq)

Matches parsers in sequence:

parser := pc.Seq(digit, operator, digit) // Matches: digit operator digit
Choice (Or)

Tries parsers in order, returns first successful match:

parser := pc.Or(digit, string, identifier) // Matches any of the alternatives
Repetition
  • ZeroOrMore: Matches zero or more occurrences
  • OneOrMore: Matches one or more occurrences
  • Repeat: Matches with specific min/max counts
  • Optional: Matches zero or one occurrence
numbers := pc.ZeroOrMore("numbers", digit)
requiredNumbers := pc.OneOrMore("required-numbers", digit)
exactlyThree := pc.Repeat("exactly-three", 3, 3, digit)
maybeDigit := pc.Optional(digit)

Advanced Features

Lookahead Operations
// Positive lookahead - check without consuming
parser := pc.Seq(
    pc.Lookahead(keyword), // Check for keyword
    actualParser,          // Then parse normally
)

// Negative lookahead - ensure something doesn't follow
parser := pc.Seq(
    identifier,
    pc.NotFollowedBy(digit), // Identifier not followed by digit
)

// Peek - get result without consuming
parser := pc.Seq(
    pc.Peek(nextToken), // See what's coming
    conditionalParser,   // Parse based on peek result
)
Error Handling and User-Friendly Messages
// Label for better error messages
numberParser := pc.Label("number", digit)

// Expected - for specific error messages
parser := pc.Or(
    validExpression,
    pc.Expected[int]("closing parenthesis"),
)

// Fail - for explicit failures
parser := pc.Or(
    implementedFeature,
    pc.Fail[int]("feature not implemented in this version"),
)
Error Recovery
// Recover from errors and continue parsing
parser := pc.Recover(
    pc.Digit(),        // Precondition check
    parseStatement,    // Main parsing logic
    pc.Until(";"),     // Recovery: skip until semicolon
)
Transformation

Transform parsed results:

// Transform tokens
addParser := pc.Trans(
    pc.Seq(digit, operator, digit),
    func(pctx *pc.ParseContext[int], tokens []pc.Token[int]) ([]pc.Token[int], error) {
        result := tokens[0].Val + tokens[2].Val // Add the numbers
        return []pc.Token[int]{{
            Type: "result",
            Pos:  tokens[0].Pos,
            Val:  result,
        }}, nil
    },
)
Recursive Parsers with Alias
// Define recursive grammar
expressionBody, expression := pc.NewAlias[int]("expression")

parser := expressionBody(
    pc.Or(
        digit,
        pc.Seq(
            pc.Literal("("),
            expression, // Recursive reference
            pc.Literal(")"),
        ),
    ),
)
Debugging and Tracing
context := pc.NewParseContext[int]()
context.TraceEnable = true // Enable tracing

result, err := pc.EvaluateWithRawTokens(context, input, parser)

// Print trace information
context.DumpTrace() // Prints to stdout
traceText := context.DumpTraceAsText() // Get as string

Error Types

The library defines several error types:

  • ErrNotMatch: Parser doesn't match (recoverable)
  • ErrRepeatCount: Repetition count not met (recoverable)
  • ErrCritical: Critical error (not recoverable)
// Create custom errors
err := pc.NewErrNotMatch("expected", "actual", position)
err := pc.NewErrCritical("fatal error", position)

Complete Example: Mathematical Expressions

package main

import (
    "fmt"
    "strconv"
    pc "github.com/shibukawa/parsercombinator"
)

func Digit() pc.Parser[int] {
    return pc.Trace("digit", func(pctx *pc.ParseContext[int], src []pc.Token[int]) (int, []pc.Token[int], error) {
        if src[0].Type == "raw" {
            i, err := strconv.Atoi(src[0].Raw)
            if err != nil {
                return 0, nil, pc.NewErrNotMatch("integer", src[0].Raw, src[0].Pos)
            }
            return 1, []pc.Token[int]{{Type: "digit", Pos: src[0].Pos, Val: i}}, nil
        }
        return 0, nil, pc.NewErrNotMatch("digit", src[0].Type, src[0].Pos)
    })
}

func Operator() pc.Parser[int] {
    return pc.Trace("operator", func(pctx *pc.ParseContext[int], src []pc.Token[int]) (int, []pc.Token[int], error) {
        if src[0].Type == "raw" {
            switch src[0].Raw {
            case "+", "-", "*", "/":
                return 1, []pc.Token[int]{{Type: "operator", Pos: src[0].Pos, Raw: src[0].Raw}}, nil
            }
        }
        return 0, nil, pc.NewErrNotMatch("operator", src[0].Raw, src[0].Pos)
    })
}

func Expression() pc.Parser[int] {
    return pc.Trans(
        pc.Seq(Digit(), Operator(), Digit()),
        func(pctx *pc.ParseContext[int], tokens []pc.Token[int]) ([]pc.Token[int], error) {
            left, op, right := tokens[0].Val, tokens[1].Raw, tokens[2].Val
            var result int
            switch op {
            case "+": result = left + right
            case "-": result = left - right
            case "*": result = left * right
            case "/": result = left / right
            }
            return []pc.Token[int]{{
                Type: "result",
                Pos:  tokens[0].Pos,
                Val:  result,
            }}, nil
        },
    )
}

func main() {
    context := pc.NewParseContext[int]()
    context.TraceEnable = true
    
    input := []string{"10", "+", "5"}
    result, err := pc.EvaluateWithRawTokens(context, input, Expression())
    
    if err != nil {
        fmt.Printf("Error: %v\n", err)
        context.DumpTrace()
        return
    }
    
    fmt.Printf("Result: %d\n", result[0]) // Output: 15
}

Best Practices

  1. Use Labels: Always use Label() for user-facing parsers to provide clear error messages
  2. Enable Tracing: Use tracing during development to understand parser behavior
  3. Handle Errors Gracefully: Use Expected() and Fail() to provide meaningful error messages
  4. Compose Incrementally: Build complex parsers from simple, well-tested components
  5. Use Recovery: Implement error recovery for robust parsing of malformed input
  6. Type Safety: Leverage Go's type system to catch errors at compile time

Practical Compiler Construction Patterns

Real-world AST Construction from Token Streams

In actual compilers, AST construction from token streams often follows staged patterns like these:

// AST Node definitions
type ASTNode interface {
    Type() string
    Position() *pc.Pos
    String() string
}

type BinaryOpNode struct {
    pos   *pc.Pos
    left  ASTNode
    op    string
    right ASTNode
}

func (n *BinaryOpNode) Type() string { return "BinaryOp" }
func (n *BinaryOpNode) Position() *pc.Pos { return n.pos }
func (n *BinaryOpNode) String() string { 
    return fmt.Sprintf("(%s %s %s)", n.left.String(), n.op, n.right.String()) 
}

type LiteralNode struct {
    pos   *pc.Pos
    value interface{}
}

func (n *LiteralNode) Type() string { return "Literal" }
func (n *LiteralNode) Position() *pc.Pos { return n.pos }
func (n *LiteralNode) String() string { return fmt.Sprintf("%v", n.value) }

// Staged AST construction parsers
func NumberLiteral() pc.Parser[ASTNode] {
    return pc.Trans(
        pc.Label("number literal", Digit()),
        func(pctx *pc.ParseContext[ASTNode], tokens []pc.Token[ASTNode]) ([]pc.Token[ASTNode], error) {
            // Extract value from original token and create new AST node
            digitToken := tokens[0]
            astNode := &LiteralNode{
                pos:   digitToken.Pos,
                value: digitToken.Val, // Preserve original int value
            }
            
            return []pc.Token[ASTNode]{{
                Type: "ast_node",
                Pos:  digitToken.Pos,
                Val:  astNode,
            }}, nil
        },
    )
}

func BinaryExpression() pc.Parser[ASTNode] {
    return pc.Trans(
        pc.Seq(NumberLiteral(), Operator(), NumberLiteral()),
        func(pctx *pc.ParseContext[ASTNode], tokens []pc.Token[ASTNode]) ([]pc.Token[ASTNode], error) {
            // Reference existing AST nodes to build new node
            leftNode := tokens[0].Val.(ASTNode)    // Old node reference
            opToken := tokens[1]                   // Operator token
            rightNode := tokens[2].Val.(ASTNode)   // Old node reference
            
            // Create new AST node
            binaryNode := &BinaryOpNode{
                pos:   leftNode.Position(),
                left:  leftNode,
                op:    opToken.Raw,
                right: rightNode,
            }
            
            return []pc.Token[ASTNode]{{
                Type: "ast_node",
                Pos:  leftNode.Position(),
                Val:  binaryNode,
            }}, nil
        },
    )
}
Complex AST Construction Patterns

For more complex structures, staged construction makes management easier:

// Function call node
type FunctionCallNode struct {
    pos       *pc.Pos
    name      string
    arguments []ASTNode
}

func (n *FunctionCallNode) Type() string { return "FunctionCall" }
func (n *FunctionCallNode) Position() *pc.Pos { return n.pos }
func (n *FunctionCallNode) String() string {
    args := make([]string, len(n.arguments))
    for i, arg := range n.arguments {
        args[i] = arg.String()
    }
    return fmt.Sprintf("%s(%s)", n.name, strings.Join(args, ", "))
}

// Argument list construction
func ArgumentList() pc.Parser[ASTNode] {
    return pc.Trans(
        pc.Seq(
            pc.Literal("("),
            pc.Optional(pc.Seq(
                Expression(),
                pc.ZeroOrMore("additional_args", pc.Seq(pc.Literal(","), Expression())),
            )),
            pc.Literal(")"),
        ),
        func(pctx *pc.ParseContext[ASTNode], tokens []pc.Token[ASTNode]) ([]pc.Token[ASTNode], error) {
            var arguments []ASTNode
            
            // If optional arguments exist
            if len(tokens) > 2 && tokens[1].Type == "ast_node" {
                // First argument
                arguments = append(arguments, tokens[1].Val.(ASTNode))
                
                // Additional arguments (, expression repetitions)
                for i := 2; i < len(tokens)-1; i += 2 {
                    if tokens[i].Type == "ast_node" {
                        arguments = append(arguments, tokens[i].Val.(ASTNode))
                    }
                }
            }
            
            // Create meta-node representing argument list
            argListNode := &ArgumentListNode{
                pos:  tokens[0].Pos,
                args: arguments,
            }
            
            return []pc.Token[ASTNode]{{
                Type: "argument_list",
                Pos:  tokens[0].Pos,
                Val:  argListNode,
            }}, nil
        },
    )
}

// Function call construction
func FunctionCall() pc.Parser[ASTNode] {
    return pc.Trans(
        pc.Seq(Identifier(), ArgumentList()),
        func(pctx *pc.ParseContext[ASTNode], tokens []pc.Token[ASTNode]) ([]pc.Token[ASTNode], error) {
            nameToken := tokens[0]
            argListNode := tokens[1].Val.(*ArgumentListNode)
            
            funcCallNode := &FunctionCallNode{
                pos:       nameToken.Pos,
                name:      nameToken.Raw,
                arguments: argListNode.args,
            }
            
            return []pc.Token[ASTNode]{{
                Type: "ast_node", 
                Pos:  nameToken.Pos,
                Val:  funcCallNode,
            }}, nil
        },
    )
}
Post-Tree Processing Patterns

For processing after tree construction, these approaches are effective:

// Visitor pattern for AST processing
type ASTVisitor interface {
    VisitBinaryOp(node *BinaryOpNode) error
    VisitLiteral(node *LiteralNode) error
    VisitFunctionCall(node *FunctionCallNode) error
}

// Type checker example
type TypeChecker struct {
    errors []error
    symbolTable map[string]Type
}

func (tc *TypeChecker) VisitBinaryOp(node *BinaryOpNode) error {
    // Process left and right child nodes recursively
    if err := node.left.Accept(tc); err != nil {
        return err
    }
    if err := node.right.Accept(tc); err != nil {
        return err
    }
    
    // Type checking logic
    leftType := tc.getNodeType(node.left)
    rightType := tc.getNodeType(node.right)
    
    if !tc.isCompatible(leftType, rightType, node.op) {
        return fmt.Errorf("type error: %s and %s cannot be used with operator %s", 
                         leftType, rightType, node.op)
    }
    
    return nil
}

// Transform pattern for AST transformation
type ASTTransformer interface {
    Transform(node ASTNode) (ASTNode, error)
}

// Optimizer example
type Optimizer struct{}

func (o *Optimizer) Transform(node ASTNode) (ASTNode, error) {
    switch n := node.(type) {
    case *BinaryOpNode:
        // Constant folding optimization
        if isConstant(n.left) && isConstant(n.right) {
            result := evaluateConstant(n)
            return &LiteralNode{pos: n.pos, value: result}, nil
        }
        
        // Recursively optimize child nodes
        optimizedLeft, err := o.Transform(n.left)
        if err != nil {
            return nil, err
        }
        optimizedRight, err := o.Transform(n.right)
        if err != nil {
            return nil, err
        }
        
        return &BinaryOpNode{
            pos:   n.pos,
            left:  optimizedLeft,
            op:    n.op,
            right: optimizedRight,
        }, nil
        
    default:
        return node, nil
    }
}
Multi-Pass Processing Patterns

Real compilers typically use multiple passes for processing:

// Main compiler processing
func CompileProgram(input []string) (*Program, error) {
    // Pass 1: Syntax analysis (using parser combinators)
    context := pc.NewParseContext[ASTNode]()
    ast, err := pc.EvaluateWithRawTokens(context, input, Program())
    if err != nil {
        return nil, fmt.Errorf("syntax analysis error: %w", err)
    }
    
    programNode := ast[0].Val.(*ProgramNode)
    
    // Pass 2: Symbol table construction
    symbolBuilder := &SymbolTableBuilder{}
    if err := programNode.Accept(symbolBuilder); err != nil {
        return nil, fmt.Errorf("symbol analysis error: %w", err)
    }
    
    // Pass 3: Type checking
    typeChecker := &TypeChecker{symbolTable: symbolBuilder.table}
    if err := programNode.Accept(typeChecker); err != nil {
        return nil, fmt.Errorf("type checking error: %w", err)
    }
    
    // Pass 4: Optimization
    optimizer := &Optimizer{}
    optimizedAST, err := optimizer.Transform(programNode)
    if err != nil {
        return nil, fmt.Errorf("optimization error: %w", err)
    }
    
    // Pass 5: Code generation
    codeGenerator := &CodeGenerator{}
    code, err := codeGenerator.Generate(optimizedAST)
    if err != nil {
        return nil, fmt.Errorf("code generation error: %w", err)
    }
    
    return &Program{AST: optimizedAST, Code: code}, nil
}

This way, parser combinators are used for the syntax analysis stage, and subsequent processing combines traditional AST processing patterns (Visitor, Transform, Multi-pass) to build practical compilers.

Structured Data Validation Patterns

Tree Structure Serialization for Validation

The approach you suggested is highly effective. Here's how to serialize tree structures with pseudo-nodes for parser validation:

// Tree structure representation
type TreeNode struct {
    Type     string
    Value    interface{}
    Children []*TreeNode
    Pos      *pc.Pos
}

// Serialization pseudo-tokens
type SerializedToken struct {
    Type  string  // "open", "close", "leaf"
    Node  string  // Node name
    Value interface{}
    Pos   *pc.Pos
}

// Serialize tree (DFS order to pseudo-token stream)
func SerializeTree(node *TreeNode) []SerializedToken {
    var tokens []SerializedToken
    
    if len(node.Children) == 0 {
        // Leaf node
        tokens = append(tokens, SerializedToken{
            Type:  "leaf",
            Node:  node.Type,
            Value: node.Value,
            Pos:   node.Pos,
        })
    } else {
        // Internal node: start
        tokens = append(tokens, SerializedToken{
            Type:  "open",
            Node:  node.Type,
            Value: node.Value,
            Pos:   node.Pos,
        })
        
        // Recursively process child nodes
        for _, child := range node.Children {
            tokens = append(tokens, SerializeTree(child)...)
        }
        
        // Internal node: end
        tokens = append(tokens, SerializedToken{
            Type: "close",
            Node: node.Type,
            Pos:  node.Pos,
        })
    }
    
    return tokens
}

// Validator for serialized tokens
func ValidateHTMLStructure() pc.Parser[bool] {
    // HTML tag start
    htmlOpen := pc.Trace("html_open", func(pctx *pc.ParseContext[bool], src []pc.Token[bool]) (int, []pc.Token[bool], error) {
        token := src[0].Val.(SerializedToken)
        if token.Type == "open" && token.Node == "html" {
            return 1, []pc.Token[bool]{{Type: "validated", Val: true}}, nil
        }
        return 0, nil, pc.NewErrNotMatch("HTML start tag", token.Node, src[0].Pos)
    })
    
    // HTML tag end
    htmlClose := pc.Trace("html_close", func(pctx *pc.ParseContext[bool], src []pc.Token[bool]) (int, []pc.Token[bool], error) {
        token := src[0].Val.(SerializedToken)
        if token.Type == "close" && token.Node == "html" {
            return 1, []pc.Token[bool]{{Type: "validated", Val: true}}, nil
        }
        return 0, nil, pc.NewErrNotMatch("HTML end tag", token.Node, src[0].Pos)
    })
    
    // body element validation
    bodyElement := pc.Seq(
        pc.Literal("body_open"),
        pc.ZeroOrMore("body_content", pc.Or(textContent, divElement)),
        pc.Literal("body_close"),
    )
    
    // Complete HTML structure validation
    return pc.Seq(htmlOpen, headElement, bodyElement, htmlClose)
}

// Execute validation
func ValidateHTMLTree(tree *TreeNode) error {
    // Serialize tree
    tokens := SerializeTree(tree)
    
    // Validate with parser combinators
    context := pc.NewParseContext[bool]()
    _, err := pc.EvaluateWithTokens(context, tokens, ValidateHTMLStructure())
    
    return err
}
Schema-Based Structure Validation

More general schema validation patterns:

// Schema definition
type Schema struct {
    Type       string             // "object", "array", "string", etc.
    Properties map[string]*Schema // Object properties
    Items      *Schema            // Array element schema
    Required   []string           // Required fields
    MinItems   int               // Minimum array elements
    MaxItems   int               // Maximum array elements
}

// JSON-like data structure
type DataNode struct {
    Type  string                 // "object", "array", "string", "number", "boolean"
    Value interface{}            // Actual value
    Props map[string]*DataNode   // Object properties
    Items []*DataNode            // Array elements
    Pos   *pc.Pos
}

// Schema validation parser generator
func CreateSchemaValidator(schema *Schema) pc.Parser[bool] {
    return pc.Trace(fmt.Sprintf("validate_%s", schema.Type), 
        func(pctx *pc.ParseContext[bool], src []pc.Token[bool]) (int, []pc.Token[bool], error) {
            data := src[0].Val.(*DataNode)
            
            // Type check
            if data.Type != schema.Type {
                return 0, nil, pc.NewErrNotMatch(
                    fmt.Sprintf("type %s", schema.Type), 
                    data.Type, 
                    data.Pos,
                )
            }
            
            switch schema.Type {
            case "object":
                return validateObject(schema, data, pctx)
            case "array":
                return validateArray(schema, data, pctx)
            default:
                return validatePrimitive(schema, data)
            }
        })
}

func validateObject(schema *Schema, data *DataNode, pctx *pc.ParseContext[bool]) (int, []pc.Token[bool], error) {
    // Required field validation
    for _, required := range schema.Required {
        if _, exists := data.Props[required]; !exists {
            return 0, nil, pc.NewErrCritical(
                fmt.Sprintf("required field '%s' not found", required),
                data.Pos,
            )
        }
    }
    
    // Validate each property
    for propName, propData := range data.Props {
        propSchema, exists := schema.Properties[propName]
        if !exists {
            return 0, nil, pc.NewErrNotMatch(
                "valid property",
                propName,
                propData.Pos,
            )
        }
        
        // Recursively validate property
        validator := CreateSchemaValidator(propSchema)
        _, _, err := validator(pctx, []pc.Token[bool]{{Val: propData, Pos: propData.Pos}})
        if err != nil {
            return 0, nil, fmt.Errorf("property '%s': %w", propName, err)
        }
    }
    
    return 1, []pc.Token[bool]{{Type: "validated_object", Val: true}}, nil
}

// Configuration file validation example
func ValidateConfigFile() pc.Parser[bool] {
    // Configuration file schema definition
    configSchema := &Schema{
        Type: "object",
        Required: []string{"server", "database"},
        Properties: map[string]*Schema{
            "server": {
                Type: "object",
                Required: []string{"host", "port"},
                Properties: map[string]*Schema{
                    "host": {Type: "string"},
                    "port": {Type: "number"},
                    "ssl":  {Type: "boolean"},
                },
            },
            "database": {
                Type: "object",
                Required: []string{"url"},
                Properties: map[string]*Schema{
                    "url":         {Type: "string"},
                    "max_connections": {Type: "number"},
                },
            },
        },
    }
    
    return pc.Label("configuration file", CreateSchemaValidator(configSchema))
}
Flat Structure Partial Validation

Methods for partial validation of existing structured data:

// CSV data row validation
type CSVRow struct {
    Fields []string
    LineNo int
}

func ValidateCSVRow(expectedColumns []string, validators map[string]pc.Parser[bool]) pc.Parser[bool] {
    return pc.Trans(
        pc.Trace("csv_row", func(pctx *pc.ParseContext[bool], src []pc.Token[bool]) (int, []pc.Token[bool], error) {
            row := src[0].Val.(*CSVRow)
            
            // Column count check
            if len(row.Fields) != len(expectedColumns) {
                return 0, nil, pc.NewErrNotMatch(
                    fmt.Sprintf("%d fields", len(expectedColumns)),
                    fmt.Sprintf("%d fields", len(row.Fields)),
                    &pc.Pos{Line: row.LineNo},
                )
            }
            
            // Validate each field
            for i, field := range row.Fields {
                columnName := expectedColumns[i]
                if validator, exists := validators[columnName]; exists {
                    fieldToken := pc.Token[bool]{
                        Type: "field",
                        Raw:  field,
                        Pos:  &pc.Pos{Line: row.LineNo, Column: i + 1},
                        Val:  field,
                    }
                    
                    _, _, err := validator(pctx, []pc.Token[bool]{fieldToken})
                    if err != nil {
                        return 0, nil, fmt.Errorf("column '%s' (line %d): %w", columnName, row.LineNo, err)
                    }
                }
            }
            
            return 1, []pc.Token[bool]{{Type: "validated_row", Val: true}}, nil
        }),
        func(pctx *pc.ParseContext[bool], tokens []pc.Token[bool]) ([]pc.Token[bool], error) {
            return tokens, nil
        },
    )
}

// Usage example: User data CSV validation
func CreateUserCSVValidator() pc.Parser[bool] {
    columns := []string{"name", "email", "age", "active"}
    
    validators := map[string]pc.Parser[bool]{
        "name": pc.Label("username", validateNonEmptyString()),
        "email": pc.Label("email address", validateEmail()),
        "age": pc.Label("age", validatePositiveNumber()),
        "active": pc.Label("active flag", validateBoolean()),
    }
    
    return pc.OneOrMore("csv_rows", ValidateCSVRow(columns, validators))
}
Real-time Validation Patterns

Validation for streaming or real-time data:

// Event stream validation
type Event struct {
    Type      string
    Timestamp time.Time
    Data      interface{}
    Pos       *pc.Pos
}

// State machine-based sequence validation
func ValidateEventSequence() pc.Parser[bool] {
    // User login flow validation
    loginFlow := pc.Seq(
        pc.Label("login start", expectEvent("login_start")),
        pc.Optional(pc.Label("auth attempt", expectEvent("auth_attempt"))),
        pc.Or(
            pc.Label("login success", expectEvent("login_success")),
            pc.Seq(
                pc.Label("login failure", expectEvent("login_failure")),
                pc.Optional(pc.Label("retry", ValidateEventSequence())), // Recursively allow retry
            ),
        ),
    )
    
    return loginFlow
}

func expectEvent(eventType string) pc.Parser[bool] {
    return pc.Trace(eventType, func(pctx *pc.ParseContext[bool], src []pc.Token[bool]) (int, []pc.Token[bool], error) {
        event := src[0].Val.(*Event)
        if event.Type == eventType {
            return 1, []pc.Token[bool]{{Type: "validated_event", Val: true}}, nil
        }
        return 0, nil, pc.NewErrNotMatch(eventType, event.Type, event.Pos)
    })
}

These patterns enable validation of various structured data types:

  1. Tree Serialization: Validation of complex hierarchical structures
  2. Schema-Based: Type-safe validation of JSON/XML-like data
  3. Flat Structure: Validation of tabular data like CSV/TSV
  4. Real-time: Validation of event streams and state transitions

License

Apache 2.0 License - see LICENSE file for details.

Contributing

Contributions are welcome! Please feel free to submit a Pull Request.

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	// ErrNotMatch means parser doesn't match structure
	// Repeat, Or ignore this error
	ErrNotMatch = fmt.Errorf("not match")

	// ErrRepeatCount means repeat count miss matching.
	// Repeat, Or don't ignore this error
	ErrRepeatCount = fmt.Errorf("repeat count")

	// ErrCritical means critical error
	//
	// It is not just structure error that doesn't have extra options.
	// Repeat, Or don't ignore this error
	ErrCritical = fmt.Errorf("critical error")
)

Functions

func Evaluate

func Evaluate[T any](pctx *ParseContext[T], src []Token[T], parser Parser[T]) (result []T, err error)

func EvaluateWithRawTokens

func EvaluateWithRawTokens[T any](pc *ParseContext[T], src []string, parser Parser[T]) (result []T, err error)

func NewErrCritical

func NewErrCritical(message string, pos *Pos) error

func NewErrNotMatch

func NewErrNotMatch(expected, actual string, pos *Pos) error

func NewErrRepeatCount

func NewErrRepeatCount(label string, expected, actual int, pos *Pos) error

Types

type Alias

type Alias[T any] struct {
	// contains filtered or unexported fields
}

type ParseContext

type ParseContext[T any] struct {
	Tokens         []Token[T]
	Pos            int
	RemainedTokens []Token[T]
	Results        []Token[T]
	Traces         []*TraceInfo
	Errors         []*ParseError
	Depth          int
	TraceEnable    bool
}

func NewParseContext

func NewParseContext[T any]() *ParseContext[T]

func (*ParseContext[T]) AppendError

func (pc *ParseContext[T]) AppendError(err error, pos *Pos) error

func (*ParseContext[T]) DumpTrace

func (pc *ParseContext[T]) DumpTrace()

func (*ParseContext[T]) DumpTraceAsText

func (pc *ParseContext[T]) DumpTraceAsText() string

func (*ParseContext[T]) DumpTraceTo

func (pc *ParseContext[T]) DumpTraceTo(w io.Writer)

func (ParseContext[T]) GetError

func (pc ParseContext[T]) GetError() error

type ParseError

type ParseError struct {
	Parent error
	Pos    *Pos
}

func (ParseError) Error

func (e ParseError) Error() string

func (ParseError) Unwrap

func (e ParseError) Unwrap() error

type Parser

type Parser[T any] func(*ParseContext[T], []Token[T]) (consumed int, newTokens []Token[T], err error)

func Before

func Before[T any](callback func(token Token[T]) bool) Parser[T]

func Expected

func Expected[T any](message string) Parser[T]

Expected creates a parser that fails with a specific expected message Useful for creating custom error messages or placeholders

func Fail

func Fail[T any](message string) Parser[T]

Fail always fails with the given message Useful for debugging or creating conditional failures

func FollowedBy

func FollowedBy[T any](parser Parser[T]) Parser[T]

FollowedBy is an alias for Lookahead for better readability

func Label

func Label[T any](label string, parser Parser[T]) Parser[T]

Label provides a user-friendly label for error messages When the parser fails, it replaces technical error details with the provided label Unlike Trace, this is purely for error message improvement, not debugging

func Lookahead

func Lookahead[T any](parser Parser[T]) Parser[T]

Lookahead checks if the parser matches without consuming tokens Returns empty tokens if match, error if not match

func NewAlias

func NewAlias[T any](name string) (instance func(Parser[T]) Parser[T], alias Parser[T])

func None

func None[T any](label ...string) Parser[T]

func NotFollowedBy

func NotFollowedBy[T any](parser Parser[T]) Parser[T]

NotFollowedBy succeeds if the parser does NOT match (negative lookahead) Returns empty tokens if parser fails, error if parser succeeds

func OneOrMore

func OneOrMore[T any](label string, parser Parser[T]) Parser[T]

func Optional

func Optional[T any](parser Parser[T]) Parser[T]

func Or

func Or[T any](parsers ...Parser[T]) Parser[T]

func Peek

func Peek[T any](parser Parser[T]) Parser[T]

Peek returns the result of the parser without consuming tokens Useful for inspection or conditional parsing

func Recover

func Recover[T any](search, body, skipUntil Parser[T]) Parser[T]

func Repeat

func Repeat[T any](label string, min uint, max int, parser Parser[T]) Parser[T]

func Seq

func Seq[T any](parsers ...Parser[T]) Parser[T]

func SeqWithLabel

func SeqWithLabel[T any](label string, parsers ...Parser[T]) Parser[T]

func Trace

func Trace[T any](name string, p Parser[T]) Parser[T]

func Trans

func Trans[T any](parser Parser[T], tf Transformer[T]) Parser[T]

func ZeroOrMore

func ZeroOrMore[T any](label string, parser Parser[T]) Parser[T]

type Pos

type Pos struct {
	Line   int
	Col    int
	Index  int
	Length int
}

func (Pos) Copy

func (p Pos) Copy() *Pos

func (*Pos) String

func (p *Pos) String() string

type Token

type Token[T any] struct {
	Type string
	Pos  *Pos
	Raw  string
	Val  T
}

func (Token[T]) GoString

func (t Token[T]) GoString() string

type Tokens

type Tokens[T any] []*Token[T]

type TraceInfo

type TraceInfo struct {
	TraceType TraceType
	Depth     int
	Name      string
	Pos       *Pos
	Result    string
}

type TraceType

type TraceType int
const (
	Enter TraceType = iota
	Match
	EnterMatch
	NotMatch
	EnterNotMatch
)

func (TraceType) String

func (tp TraceType) String() string

type Transformer

type Transformer[T any] func(pctx *ParseContext[T], src []Token[T]) (converted []Token[T], err error)

Directories

Path Synopsis
examples
basic command
ZeroOrMore Example - コンマ区切りの数値リストをパースする例
ZeroOrMore Example - コンマ区切りの数値リストをパースする例
compiler command
Simple Math Expression to JSON AST Compiler - シンプルな数式→JSON AST コンパイラ
Simple Math Expression to JSON AST Compiler - シンプルな数式→JSON AST コンパイラ
interpreter command
Simple Calculator Interpreter - 数式を直接評価する電卓
Simple Calculator Interpreter - 数式を直接評価する電卓

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL