lex

package

v0.5.1 Latest Latest Go to latest Published: Dec 9, 2018 License: BSD-3-Clause Imports: 15 Imported by: 6

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/goki/pi

Links

Open Source Insights

Documentation ¶

Overview ¶

Package lex provides all the lexing functions that transform text into lexical tokens, using token types defined in the pi/token package. It also has the basic file source and position / region management functionality.

Index ¶

Variables
func DigitVal(ch rune) int
func IsDigit(ch rune) bool
func IsLetter(ch rune) bool
func IsWhiteSpace(ch rune) bool
func PrintError(w io.Writer, err error)
type Actions
- func (i *Actions) FromString(s string) error
- func (ev Actions) MarshalJSON() ([]byte, error)
- func (i Actions) String() string
- func (ev *Actions) UnmarshalJSON(b []byte) error
type Error
- func (e Error) Error() string
type ErrorList
- func (p *ErrorList) Add(pos Pos, fname, msg string)
- func (p ErrorList) AllString() string
- func (p ErrorList) Err() error
- func (p ErrorList) Error() string
- func (p ErrorList) Len() int
- func (p ErrorList) Less(i, j int) bool
- func (p *ErrorList) RemoveMultiples()
- func (p *ErrorList) Reset()
- func (p ErrorList) Sort()
- func (p ErrorList) Swap(i, j int)
type File
- func (fl *File) AllocLines()
- func (fl *File) IsLexPosValid(pos Pos) bool
- func (fl *File) LexAt(cp Pos) *Lex
- func (fl *File) LexAtSafe(cp Pos) Lex
- func (fl *File) LexLine(ln int) Line
- func (fl *File) LexTagSrc() string
- func (fl *File) LexTagSrcLn(ln int) string
- func (fl *File) LinesDeleted(stln, edln int)
- func (fl *File) LinesInserted(stln, nsz int)
- func (fl *File) NLines() int
- func (fl *File) NTokens(ln int) int
- func (fl *File) NextTokenPos(pos Pos) (Pos, bool)
- func (fl *File) PrevDepth(ln int) int
- func (fl *File) PrevStack(ln int) Stack
- func (fl *File) PrevTokenPos(pos Pos) (Pos, bool)
- func (fl *File) RegSrc(reg Reg) string
- func (fl *File) SetLine(ln int, lexs, comments Line, stack Stack)
- func (fl *File) SetSrc(src *[][]rune, fname string)
- func (fl *File) Token(pos Pos) token.Tokens
- func (fl *File) TokenMapReg(reg Reg) TokenMap
- func (fl *File) TokenRegSrc(reg Reg) string
- func (fl *File) TokenSrc(pos Pos) []rune
- func (fl *File) TokenSrcPos(pos Pos) Reg
- func (fl *File) TokenSrcReg(reg Reg) Reg
- func (fl *File) ValidTokenPos(pos Pos) (Pos, bool)
type LangLexer
type Lex
- func NewLex(tok token.Tokens, st, ed int) Lex
- func (lx *Lex) ContainsPos(pos int) bool
- func (lx *Lex) OverlapsReg(or Lex) bool
- func (lx Lex) String() string
type Lexer
type Line
- func MergeLines(t1, t2 Line) Line
- func (ll *Line) Add(lx Lex)
- func (ll *Line) AddLex(tok token.Tokens, st, ed int) *Lex
- func (ll *Line) AddSort(lx Lex)
- func (ll *Line) Clone() Line
- func (ll *Line) Insert(idx int, lx Lex)
- func (ll *Line) Sort()
- func (ll *Line) String() string
- func (ll *Line) TagSrc(src []rune) string
type MatchPos
- func (i *MatchPos) FromString(s string) error
- func (ev MatchPos) MarshalJSON() ([]byte, error)
- func (i MatchPos) String() string
- func (ev *MatchPos) UnmarshalJSON(b []byte) error
type Matches
- func (i *Matches) FromString(s string) error
- func (ev Matches) MarshalJSON() ([]byte, error)
- func (i Matches) String() string
- func (ev *Matches) UnmarshalJSON(b []byte) error
type PassTwo
- func (pt *PassTwo) EosDetect(ts *TwoState)
- func (pt *PassTwo) ErrString(ts *TwoState) string
- func (pt *PassTwo) Error(ts *TwoState, msg string)
- func (pt *PassTwo) HasErrs(ts *TwoState) bool
- func (pt *PassTwo) MismatchError(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) NestDepth(ts *TwoState)
- func (pt *PassTwo) NestDepthLine(line Line, initDepth int)
- func (pt *PassTwo) PopNest(ts *TwoState, tok token.Tokens)
- func (pt *PassTwo) PushNest(ts *TwoState, tok token.Tokens)
type Pos
- func (ps *Pos) IsLess(cmp Pos) bool
- func (ps Pos) String() string
type Reg
- func (tr Reg) IsNil() bool
type Rule
- func (lr *Rule) AsLexRule() *Rule
- func (lr *Rule) BaseIface() reflect.Type
- func (lr *Rule) DoAct(ls *State, act Actions)
- func (lr *Rule) Find(find string) []*Rule
- func (lr *Rule) IsMatch(ls *State) bool
- func (lr *Rule) IsMatchPos(ls *State) bool
- func (lr *Rule) Lex(ls *State) *Rule
- func (lr *Rule) LexStart(ls *State) *Rule
- func (lr *Rule) TargetLen(ls *State) int
- func (lr *Rule) Validate(ls *State) bool
- func (lr *Rule) WriteGrammar(writer io.Writer, depth int)
type Stack
- func (ss *Stack) Clone() Stack
- func (ss *Stack) Pop() string
- func (ss *Stack) Push(state string)
- func (ss *Stack) Top() string
type State
- func (ls *State) Add(tok token.Tokens, st, ed int)
- func (ls *State) AtEol() bool
- func (ls *State) CurRune() bool
- func (ls *State) CurState() string
- func (ls *State) Error(pos int, msg string)
- func (ls *State) Init()
- func (ls *State) LineString() string
- func (ls *State) Next(inc int) bool
- func (ls *State) NextRune() bool
- func (ls *State) NextSrcLine() string
- func (ls *State) PopState() string
- func (ls *State) PushState(st string)
- func (ls *State) ReadEscape(quote rune) bool
- func (ls *State) ReadName()
- func (ls *State) ReadNumber() token.Tokens
- func (ls *State) ReadQuoted()
- func (ls *State) Rune(off int) (rune, bool)
- func (ls *State) ScanMantissa(base int)
- func (ls *State) SetLine(src []rune)
- func (ls *State) String(off, sz int) (string, bool)
type TokenMap
- func (tm TokenMap) Has(tok token.Tokens) bool
- func (tm TokenMap) Set(tok token.Tokens)
type TwoState
- func (ts *TwoState) Error(msg string)
- func (ts *TwoState) Init()
- func (ts *TwoState) InsertEOS(cp Pos)
- func (ts *TwoState) NestStackStr() string
- func (ts *TwoState) NextLine()
- func (ts *TwoState) ReplaceEOS(cp Pos)
- func (ts *TwoState) SetSrc(src *File)

Constants ¶

This section is empty.

Variables ¶

View Source

var KiT_Actions = kit.Enums.AddEnum(ActionsN, false, nil)

View Source

var KiT_MatchPos = kit.Enums.AddEnum(MatchPosN, false, nil)

View Source

var KiT_Matches = kit.Enums.AddEnum(MatchesN, false, nil)

View Source

var KiT_Rule = kit.Types.AddType(&Rule{}, RuleProps)

View Source

var PosErr = Pos{-1, -1}

PosErr represents an error text position (-1 for both line and char) used as a return value for cases where error positions are possible

View Source

var PosZero = Pos{}

PosZero is the uninitialized zero text position (which is still a valid position)

View Source

var RegZero = Reg{}

RegZero is the zero region

View Source

var RuleProps = ki.Props{}

Functions ¶

func DigitVal ¶

func DigitVal(ch rune) int

func IsDigit ¶

func IsDigit(ch rune) bool

func IsLetter ¶

func IsLetter(ch rune) bool

func IsWhiteSpace ¶

func IsWhiteSpace(ch rune) bool

func PrintError ¶

func PrintError(w io.Writer, err error)

PrintError is a utility function that prints a list of errors to w, one error per line, if the err parameter is an ErrorList. Otherwise it prints the err string.

Types ¶

type Actions ¶

type Actions int

Actions are lexing actions to perform

const (
	// Next means advance input position to the next character(s) after the matched characters
	Next Actions = iota

	// Name means read in an entire name, which is letters, _ and digits after first letter
	// position will be advanced to just after
	Name

	// Number means read in an entire number -- the token type will automatically be
	// set to the actual type of number that was read in, and position advanced to just after
	Number

	// Quoted means read in an entire string enclosed in quote delimeter
	// that is present at current position, with proper skipping of escaped.
	// Position advanced to just after
	Quoted

	// QuotedRaw means read in an entire string enclosed in quote delimeter
	// that is present at start position, with proper skipping of escaped.
	// Position advanced to just after.
	// Raw version supports multi-line and includes CR etc at end of lines (e.g., back-tick
	// in various languages)
	QuotedRaw

	// EOL means read till the end of the line (e.g., for single-line comments)
	EOL

	// PushState means push the given state value onto the state stack
	PushState

	// PopState means pop given state value off the state stack
	PopState

	// SetGuestLex means install the Name (must be a prior action) as the guest
	// lexer -- it will take over lexing until PopGuestLex is called
	SetGuestLex

	// PopGuestLex removes the current guest lexer and returns to the original
	// language lexer
	PopGuestLex

	ActionsN
)

The lexical acts

func (*Actions) FromString ¶

func (i *Actions) FromString(s string) error

func (Actions) MarshalJSON ¶

func (ev Actions) MarshalJSON() ([]byte, error)

func (Actions) String ¶

func (i Actions) String() string

func (*Actions) UnmarshalJSON ¶

func (ev *Actions) UnmarshalJSON(b []byte) error

type Error ¶

type Error struct {
	Pos      Pos
	Filename string
	Msg      string
}

In an ErrorList, an error is represented by an *Error. The position Pos, if valid, points to the beginning of the offending token, and the error condition is described by Msg.

func (Error) Error ¶

func (e Error) Error() string

Error implements the error interface.

type ErrorList ¶

type ErrorList []*Error

ErrorList is a list of *Errors. The zero value for an ErrorList is an empty ErrorList ready to use.

func (*ErrorList) Add ¶

func (p *ErrorList) Add(pos Pos, fname, msg string)

Add adds an Error with given position and error message to an ErrorList.

func (ErrorList) AllString ¶

func (p ErrorList) AllString() string

AllString returns all the errors in the list in one string

func (ErrorList) Err ¶

func (p ErrorList) Err() error

Err returns an error equivalent to this error list. If the list is empty, Err returns nil.

func (ErrorList) Error ¶

func (p ErrorList) Error() string

An ErrorList implements the error interface.

func (ErrorList) Len ¶

func (p ErrorList) Len() int

ErrorList implements the sort Interface.

func (ErrorList) Less ¶

func (p ErrorList) Less(i, j int) bool

func (*ErrorList) RemoveMultiples ¶

func (p *ErrorList) RemoveMultiples()

RemoveMultiples sorts an ErrorList and removes all but the first error per line.

func (*ErrorList) Reset ¶

func (p *ErrorList) Reset()

Reset resets an ErrorList to no errors.

func (ErrorList) Sort ¶

func (p ErrorList) Sort()

Sort sorts an ErrorList. *Error entries are sorted by position, other errors are sorted by error message, and before any *Error entry.

func (ErrorList) Swap ¶

func (p ErrorList) Swap(i, j int)

type File ¶

type File struct {
	Filename   string    `desc:"the current file being lex'd"`
	Lines      *[][]rune `desc:"contents of the file as lines of runes"`
	Lexs       []Line    `desc:"lex'd version of the lines -- allocated to size of Lines"`
	Comments   []Line    `` /* 148-byte string literal not displayed */
	LastStacks []Stack   `desc:"stack present at the end of each line -- needed for contextualizing line-at-time lexing while editing"`
}

File contains the contents of the file being parsed -- all kept in memory, and represented by Line as runes, so that positions in the file are directly convertible to indexes in Lines structure

func (*File) AllocLines ¶

func (fl *File) AllocLines()

AllocLines allocates the data per line: lex outputs and stack. We reset state so stale state is not hanging around.

func (*File) IsLexPosValid ¶

func (fl *File) IsLexPosValid(pos Pos) bool

IsLexPosValid returns true if given lexical token position is valid

func (*File) LexAt ¶

func (fl *File) LexAt(cp Pos) *Lex

LexAt returns Lex item at given position, with no checking

func (*File) LexAtSafe ¶

func (fl *File) LexAtSafe(cp Pos) Lex

LexAtSafe returns the Lex item at given position, or last lex item if beyond end

func (*File) LexLine ¶

func (fl *File) LexLine(ln int) Line

LexLine returns the lexing output for given line, combining comments and all other tokens and allocating new memory using clone

func (*File) LexTagSrc ¶

func (fl *File) LexTagSrc() string

LexTagSrc returns the lex'd tagged source for entire source

func (*File) LexTagSrcLn ¶

func (fl *File) LexTagSrcLn(ln int) string

LexTagSrcLn returns the lex'd tagged source line for given line

func (*File) LinesDeleted ¶

func (fl *File) LinesDeleted(stln, edln int)

LinesDeleted deletes lines -- called e.g., by giv.TextBuf to sync the markup with ongoing edits

func (*File) LinesInserted ¶

func (fl *File) LinesInserted(stln, nsz int)

LinesInserted inserts new lines -- called e.g., by giv.TextBuf to sync the markup with ongoing edits

func (*File) NLines ¶

func (fl *File) NLines() int

NLines returns the number of lines in source

func (*File) NTokens ¶

func (fl *File) NTokens(ln int) int

NTokens returns number of lex tokens for given line

func (*File) NextTokenPos ¶

func (fl *File) NextTokenPos(pos Pos) (Pos, bool)

NextTokenPos returns the next token position, false if at end of tokens

func (*File) PrevDepth ¶

func (fl *File) PrevDepth(ln int) int

PrevDepth returns the depth of the token immediately prior to given line

func (*File) PrevStack ¶

func (fl *File) PrevStack(ln int) Stack

PrevStack returns the stack from the previous line

func (*File) PrevTokenPos ¶

func (fl *File) PrevTokenPos(pos Pos) (Pos, bool)

PrevTokenPos returns the previous token position, false if at end of tokens

func (*File) RegSrc ¶

func (fl *File) RegSrc(reg Reg) string

RegSrc returns the source (as a string) for given region

func (*File) SetLine ¶

func (fl *File) SetLine(ln int, lexs, comments Line, stack Stack)

SetLine sets the line data from the lexer -- does a clone to keep the copy

func (*File) SetSrc ¶

func (fl *File) SetSrc(src *[][]rune, fname string)

SetSrc sets the source to given content, and alloc Lexs

func (*File) Token ¶

func (fl *File) Token(pos Pos) token.Tokens

Token gets lex token at given Pos (Ch = token index)

func (*File) TokenMapReg ¶

func (fl *File) TokenMapReg(reg Reg) TokenMap

TokenMapReg creates a TokenMap of tokens in region, including their Cat and SubCat levels -- err's on side of inclusiveness -- used for optimizing token matching

func (*File) TokenRegSrc ¶

func (fl *File) TokenRegSrc(reg Reg) string

TokenRegSrc returns the source code associated with the given token region

func (*File) TokenSrc ¶

func (fl *File) TokenSrc(pos Pos) []rune

TokenSrc gets source runes for given token position

func (*File) TokenSrcPos ¶

func (fl *File) TokenSrcPos(pos Pos) Reg

TokenSrcPos returns source reg associated with lex token at given token position

func (*File) TokenSrcReg ¶

func (fl *File) TokenSrcReg(reg Reg) Reg

TokenSrcReg translates a region of tokens into a region of source

func (*File) ValidTokenPos ¶

func (fl *File) ValidTokenPos(pos Pos) (Pos, bool)

ValidTokenPos returns the next valid token position starting at given point, false if at end of tokens

type LangLexer ¶

type LangLexer interface {
	// Lexer returns the top-level lex.Rule for given language (case invariant lookup)
	Lexer(lang string) *Rule
}

LangLexer looks up lexer for given language -- impl in parent pi package so we need the interface

var TheLangLexer LangLexer

TheLangLexer is the instance of LangLexer interface used to lookup lexers for languages -- is set in pi/langs.go

type Lex ¶

type Lex struct {
	Tok   token.Tokens `desc:"token"`
	Depth int          `` /* 295-byte string literal not displayed */
	St    int          `desc:"start rune index within original source line for this token"`
	Ed    int          `desc:"end rune index within original source line for this token (exclusive -- ends one before this)"`
	Time  nptime.Time  `` /* 129-byte string literal not displayed */
}

Lex represents a single lexical element, with a token, and start and end rune positions within a line of a file. Critically it also contains the nesting depth computed from all the parens, brackets, braces. Todo: also support XML < > </ > tag depth.

func NewLex ¶

func NewLex(tok token.Tokens, st, ed int) Lex

func (*Lex) ContainsPos ¶

func (lx *Lex) ContainsPos(pos int) bool

ContainsPos returns true if the Lex element contains given character position

func (*Lex) OverlapsReg ¶

func (lx *Lex) OverlapsReg(or Lex) bool

OverlapsReg returns true if the two regions overlap

func (Lex) String ¶

func (lx Lex) String() string

String satisfies the fmt.Stringer interface

type Lexer ¶

type Lexer interface {
	ki.Ki

	// Validate checks for any errors in the rules and issues warnings,
	// returns true if valid (no err) and false if invalid (errs)
	Validate(ls *State) bool

	// Lex tries to apply rule to given input state, returns true if matched, false if not
	Lex(ls *State) *Rule

	// AsLexRule returns object as a lex.Rule
	AsLexRule() *Rule
}

Lexer is the interface type for lexers -- likely not necessary except is essential for defining the BaseIface for gui in making new nodes

type Line ¶

type Line []Lex

Line is one line of Lex'd text

func MergeLines ¶

func MergeLines(t1, t2 Line) Line

MergeLines merges the two lines of lex regions into a combined list properly ordered by sequence of tags within the line.

func (*Line) Add ¶

func (ll *Line) Add(lx Lex)

Add adds one element to the lex line (just append)

func (*Line) AddLex ¶

func (ll *Line) AddLex(tok token.Tokens, st, ed int) *Lex

Add adds one element to the lex line with given params, returns pointer to that new lex

func (*Line) AddSort ¶

func (ll *Line) AddSort(lx Lex)

AddSort adds a new lex element in sorted order to list, sorted by start position, and if at the same start position, then sorted by end position

func (*Line) Clone ¶

func (ll *Line) Clone() Line

Clone returns a new copy of the line

func (*Line) Insert ¶

func (ll *Line) Insert(idx int, lx Lex)

Insert inserts one element to the lex line at given point

func (*Line) Sort ¶

func (ll *Line) Sort()

Sort sorts the lex elements by starting pos, and ending pos if a tie

func (*Line) String ¶

func (ll *Line) String() string

String satisfies the fmt.Stringer interface

func (*Line) TagSrc ¶

func (ll *Line) TagSrc(src []rune) string

TagSrc returns the token-tagged source

type MatchPos ¶

type MatchPos int

MatchPos are special positions for a match to occur

const (
	// AnyPos matches at any position
	AnyPos MatchPos = iota

	// StartOfLine matches at start of line
	StartOfLine

	// EndOfLine matches at end of line
	EndOfLine

	// MiddleOfLine matches not at the start or end
	MiddleOfLine

	MatchPosN
)

Matching rules

func (*MatchPos) FromString ¶

func (i *MatchPos) FromString(s string) error

func (MatchPos) MarshalJSON ¶

func (ev MatchPos) MarshalJSON() ([]byte, error)

func (MatchPos) String ¶

func (i MatchPos) String() string

func (*MatchPos) UnmarshalJSON ¶

func (ev *MatchPos) UnmarshalJSON(b []byte) error

type Matches ¶

type Matches int

Matches are what kind of lexing matches to make

const (
	// String means match a specific string as given in the rule
	// Note: this only looks for the string with no constraints on
	// what happens after this string -- use StrName to match entire names
	String Matches = iota

	// StrName means match a specific string that is a complete alpha-numeric
	// string (including underbar _) with some other char at the end
	// must use this for all keyword matches to ensure that it isn't just
	// the start of a longer name
	StrName

	// Match any letter, including underscore
	Letter

	// Match digit 0-9
	Digit

	// Match any white space (space, tab) -- input is already broken into lines
	WhiteSpace

	// CurState means match current state value set by a PushState action, using String value in rule
	// all CurState cases must generally be first in list of rules so they can preempt
	// other rules when the state is active
	CurState

	// AnyRune means match any rune -- use this as the last condition where other terminators
	// come first!
	AnyRune

	MatchesN
)

Matching rules

func (*Matches) FromString ¶

func (i *Matches) FromString(s string) error

func (Matches) MarshalJSON ¶

func (ev Matches) MarshalJSON() ([]byte, error)

func (Matches) String ¶

func (i Matches) String() string

func (*Matches) UnmarshalJSON ¶

func (ev *Matches) UnmarshalJSON(b []byte) error

type PassTwo ¶

type PassTwo struct {
	DoEos     bool               `desc:"should we perform EOS detection on this type of file?"`
	Eol       bool               `desc:"use end-of-line as a default EOS, if nesting depth is same as start of line (python) -- see also EolToks"`
	Semi      bool               `desc:"replace all semicolons with EOS to keep it consistent (C, Go..)"`
	Backslash bool               `desc:"use backslash as a line continuer (python)"`
	EolToks   token.KeyTokenList `desc:"specific tokens to recognize at the end of a line that trigger an EOS (Go)"`
}

PassTwo performs second pass(s) through the lexicalized version of the source, computing nesting depth for every token once and for all -- this is essential for properly matching tokens and also for colorization in syntax highlighting. Optionally, a subsequent pass finds end-of-statement (EOS) tokens, which are essential for parsing to first break the source down into statement-sized chunks. A separate list of EOS token positions is maintained for very fast access.

func (*PassTwo) EosDetect ¶

func (pt *PassTwo) EosDetect(ts *TwoState)

Perform EOS detection

func (*PassTwo) ErrString ¶

func (pt *PassTwo) ErrString(ts *TwoState) string

ErrString returns the errors as a single string

func (*PassTwo) Error ¶

func (pt *PassTwo) Error(ts *TwoState, msg string)

Error adds an passtwo error at given position

func (*PassTwo) HasErrs ¶

func (pt *PassTwo) HasErrs(ts *TwoState) bool

HasErrs reports if there are errors in eosing process

func (*PassTwo) MismatchError ¶

func (pt *PassTwo) MismatchError(ts *TwoState, tok token.Tokens)

MismatchError reports a mismatch for given type of parentheses / bracket

func (*PassTwo) NestDepth ¶

func (pt *PassTwo) NestDepth(ts *TwoState)

Perform nesting depth computation

func (*PassTwo) NestDepthLine ¶

func (pt *PassTwo) NestDepthLine(line Line, initDepth int)

Perform nesting depth computation on only one line, starting at given initial depth -- updates the given line

func (*PassTwo) PopNest ¶

func (pt *PassTwo) PopNest(ts *TwoState, tok token.Tokens)

PopNest attempts to pop given token off of nesting stack, generating error if it mismatches

func (*PassTwo) PushNest ¶

func (pt *PassTwo) PushNest(ts *TwoState, tok token.Tokens)

PushNest pushes a nesting left paren / bracket onto stack

type Pos ¶

type Pos struct {
	Ln int
	Ch int
}

Pos is a position within the source file -- it is recorded always in 0, 0 offset positions, but is converted into 1,1 offset for public consumption Ch positions are always in runes, not bytes. Also used for lex token indexes.

func (*Pos) IsLess ¶

func (ps *Pos) IsLess(cmp Pos) bool

IsLess returns true if receiver position is less than given comparison

func (Pos) String ¶

func (ps Pos) String() string

String satisfies the fmt.Stringer interferace

type Reg ¶

type Reg struct {
	St Pos `desc:"starting position of region"`
	Ed Pos `desc:"ending position of region"`
}

Reg is a contiguous region within the source file

func (Reg) IsNil ¶

func (tr Reg) IsNil() bool

IsNil checks if the region is empty, because the start is after or equal to the end

type Rule ¶

type Rule struct {
	ki.Node
	Desc      string       `desc:"description / comments about this rule"`
	Token     token.Tokens `desc:"the token value that this rule generates -- use None for non-terminals"`
	Match     Matches      `desc:"the lexical match that we look for to engage this rule"`
	Pos       MatchPos     `desc:"position where match can occur"`
	String    string       `desc:"if action is LexMatch, this is the string we match"`
	Off       int          `desc:"offset into the input to look for a match: 0 = current char, 1 = next one, etc"`
	Acts      []Actions    `desc:"the action(s) to perform, in order, if there is a match -- these are performed prior to iterating over child nodes"`
	PushState string       `desc:"the state to push if our action is PushState -- note that State matching is on String, not this value"`
	TokEff    token.Tokens `view:"-" json:"-" desc:"effective token based on input -- e.g., for number is the type of number"`
	MatchLen  int          `view:"-" json:"-" desc:"length of source that matched -- if Next is called, this is what will be skipped to"`
}

lex.Rule operates on the text input to produce the lexical tokens it is assembled into a lexical grammar structure to perform lexing

Lexing is done line-by-line -- you must push and pop states to coordinate across multiple lines, e.g., for multi-line comments

In general it is best to keep lexing as simple as possible and leave the more complex things for the parsing step.

func (*Rule) AsLexRule ¶

func (lr *Rule) AsLexRule() *Rule

func (*Rule) BaseIface ¶

func (lr *Rule) BaseIface() reflect.Type

func (*Rule) DoAct ¶

func (lr *Rule) DoAct(ls *State, act Actions)

DoAct performs given action

func (*Rule) Find ¶

func (lr *Rule) Find(find string) []*Rule

Find looks for rules in the tree that contain given string in String or Name fields

func (*Rule) IsMatch ¶

func (lr *Rule) IsMatch(ls *State) bool

IsMatch tests if the rule matches for current input state, returns true if so, false if not

func (*Rule) IsMatchPos ¶

func (lr *Rule) IsMatchPos(ls *State) bool

IsMatchPos tests if the rule matches position

func (*Rule) Lex ¶

func (lr *Rule) Lex(ls *State) *Rule

Lex tries to apply rule to given input state, returns lowest-level rule that matched, nil if none

func (*Rule) LexStart ¶

func (lr *Rule) LexStart(ls *State) *Rule

LexStart is called on the top-level lex node to start lexing process for one step

func (*Rule) TargetLen ¶

func (lr *Rule) TargetLen(ls *State) int

TargetLen returns the length of the target including offset

func (*Rule) Validate ¶

func (lr *Rule) Validate(ls *State) bool

Validate checks for any errors in the rules and issues warnings, returns true if valid (no err) and false if invalid (errs)

func (*Rule) WriteGrammar ¶

func (lr *Rule) WriteGrammar(writer io.Writer, depth int)

WriteGrammar outputs the lexer rules as a formatted grammar in a BNF-like format it is called recursively

type Stack ¶

type Stack []string

Stack is the stack for states

func (*Stack) Clone ¶

func (ss *Stack) Clone() Stack

Clone returns a copy of the stack

func (*Stack) Pop ¶

func (ss *Stack) Pop() string

Pop takes state off the stack and returns it

func (*Stack) Push ¶

func (ss *Stack) Push(state string)

Push appends state to stack

func (*Stack) Top ¶

func (ss *Stack) Top() string

Top returns the state at the top of the stack

type State ¶

type State struct {
	Filename  string      `desc:"the current file being lex'd"`
	KeepWS    bool        `desc:"if true, record whitespace tokens -- else ignore"`
	Src       []rune      `desc:"the current line of source being processed"`
	Lex       Line        `desc:"the lex output for this line"`
	Comments  Line        `desc:"the comments output for this line -- kept separately"`
	Pos       int         `desc:"the current rune char position within the line"`
	Ln        int         `desc:"the line within overall source that we're operating on (0 indexed)"`
	Ch        rune        `desc:"the current rune read by NextRune"`
	Stack     Stack       `desc:"state stack"`
	LastName  string      `desc:"the last name that was read"`
	GuestLex  *Rule       `desc:"a guest lexer that can be installed for managing a different language type, e.g., quoted text in markdown files"`
	SaveStack Stack       `desc:"copy of stack at point when guest lexer was installed -- restore when popped"`
	Time      nptime.Time `desc:"time stamp for lexing -- set at start of new lex process"`
	Errs      ErrorList   `desc:"any error messages accumulated during lexing specifically"`
}

lex.State is the state maintained for lexing

func (*State) Add ¶

func (ls *State) Add(tok token.Tokens, st, ed int)

Add adds a lex token for given region -- merges with prior if same

func (*State) AtEol ¶

func (ls *State) AtEol() bool

AtEol returns true if current position is at end of line

func (*State) CurRune ¶

func (ls *State) CurRune() bool

CurRune reads the current rune into Ch and returns false if at end of line

func (*State) CurState ¶

func (ls *State) CurState() string

CurState returns the current state

func (*State) Error ¶

func (ls *State) Error(pos int, msg string)

Error adds a lexing error at given position

func (*State) Init ¶

func (ls *State) Init()

Init initializes the state at start of parsing

func (*State) LineString ¶

func (ls *State) LineString() string

LineString returns the current lex output as tagged source

func (*State) Next ¶

func (ls *State) Next(inc int) bool

Next moves to next position using given increment in source line -- returns false if at end

func (*State) NextRune ¶

func (ls *State) NextRune() bool

NextRune reads the next rune into Ch and returns false if at end of line

func (*State) NextSrcLine ¶

func (ls *State) NextSrcLine() string

NextSrcLine returns the next line of text

func (*State) PopState ¶

func (ls *State) PopState() string

PopState pops state off of stack

func (*State) PushState ¶

func (ls *State) PushState(st string)

PushState pushes state onto stack

func (*State) ReadEscape ¶

func (ls *State) ReadEscape(quote rune) bool

ReadEscape parses an escape sequence where rune is the accepted escaped quote. In case of a syntax error, it stops at the offending character (without consuming it) and returns false. Otherwise it returns true.

func (*State) ReadName ¶

func (ls *State) ReadName()

func (*State) ReadNumber ¶

func (ls *State) ReadNumber() token.Tokens

func (*State) ReadQuoted ¶

func (ls *State) ReadQuoted()

func (*State) Rune ¶

func (ls *State) Rune(off int) (rune, bool)

Rune gets the rune at given offset from current position, returns false if out of range

func (*State) ScanMantissa ¶

func (ls *State) ScanMantissa(base int)

func (*State) SetLine ¶

func (ls *State) SetLine(src []rune)

SetLine sets a new line for parsing and initializes the lex output and pos

func (*State) String ¶

func (ls *State) String(off, sz int) (string, bool)

String gets the string at given offset and length from current position, returns false if out of range

type TokenMap ¶

type TokenMap map[token.Tokens]struct{}

TokenMap is a token map, for optimizing token exclusion

func (TokenMap) Has ¶

func (tm TokenMap) Has(tok token.Tokens) bool

Has returns true if given token is in the map

func (TokenMap) Set ¶

func (tm TokenMap) Set(tok token.Tokens)

Set sets map for given token

type TwoState ¶

type TwoState struct {
	Pos       Pos            `desc:"position in lex tokens we're on"`
	Src       *File          `desc:"file that we're operating on"`
	NestStack []token.Tokens `desc:"stack of nesting tokens"`
	EosPos    []Pos          `desc:"positions *in token coordinates* of the EOS markers generated"`
	Errs      ErrorList      `desc:"any error messages accumulated during lexing specifically"`
}

TwoState is the state maintained for the PassTwo process

func (*TwoState) Error ¶

func (ts *TwoState) Error(msg string)

Error adds an passtwo error at current position

func (*TwoState) Init ¶

func (ts *TwoState) Init()

Init initializes state for a new pass -- called at start of NestDepth

func (*TwoState) InsertEOS ¶

func (ts *TwoState) InsertEOS(cp Pos)

InsertEOS inserts an EOS just after the given token position (e.g., cp = last token in line)

func (*TwoState) NestStackStr ¶

func (ts *TwoState) NestStackStr() string

NestStackStr returns the token stack as strings

func (*TwoState) NextLine ¶

func (ts *TwoState) NextLine()

NextLine advances to next line

func (*TwoState) ReplaceEOS ¶

func (ts *TwoState) ReplaceEOS(cp Pos)

ReplaceEOS replaces given token with an EOS

func (*TwoState) SetSrc ¶

func (ts *TwoState) SetSrc(src *File)

SetSrc sets the source we're operating on

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL