Documentation
¶
Overview ¶
Package textscan Implemented a parser that quickly scans and analyzes text content. It can be used to parse INI, Properties and other formats
Index ¶
- Constants
- Variables
- func AddKind(k Kind, name string)
- func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error)
- func CommentsDetectEnd(line string) bool
- func HasKind(k Kind) bool
- func IsKindToken(k Kind, tok Token) bool
- func KindString(k Kind) string
- type BaseToken
- type CommentToken
- type CommentsMatcher
- type ErrScan
- type HandleFn
- type KeyValueMatcher
- type Kind
- type LiteToken
- type Matcher
- type Parser
- type StringToken
- type TextScanner
- func (s *TextScanner) AddKind(k Kind, name string)
- func (s *TextScanner) AddMatchers(ms ...Matcher)
- func (s *TextScanner) Each(fn func(t Token)) error
- func (s *TextScanner) Err() error
- func (s *TextScanner) Line() int
- func (s *TextScanner) PrevToken() Token
- func (s *TextScanner) Scan() bool
- func (s *TextScanner) ScanNext() (ok bool, text string)
- func (s *TextScanner) SetInput(in any)
- func (s *TextScanner) SetNext(text string)
- func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
- func (s *TextScanner) Token() Token
- type Token
- type ValueToken
- func (t *ValueToken) Comment() string
- func (t *ValueToken) HasComment() bool
- func (t *ValueToken) HasMore() bool
- func (t *ValueToken) Key() string
- func (t *ValueToken) Mark() string
- func (t *ValueToken) MergeSame(_ Token) error
- func (t *ValueToken) ScanMore(ts *TextScanner) error
- func (t *ValueToken) String() string
- func (t *ValueToken) Value() string
- func (t *ValueToken) Values() []string
Examples ¶
Constants ¶
const ( MultiLineValMarkS = "'''" MultiLineValMarkD = `"""` MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT MultiLineValMarkQ = "\\" // at end. eg: properties contents MultiLineCmtEnd = "*/" )
define special chars constants
Variables ¶
var ErrCommentsNotEnd = errors.New("not end of multi-line comments")
ErrCommentsNotEnd error
var ErrMLineValueNotEnd = errors.New("not end of multi line value")
ErrMLineValueNotEnd error
Functions ¶
func CommentsDetect ¶
CommentsDetect check.
- inlineChars: #
default match:
- inline #, //
- multi line: /*
func CommentsDetectEnd ¶
CommentsDetectEnd multi line comments end
Types ¶
type BaseToken ¶
type BaseToken struct {
// contains filtered or unexported fields
}
BaseToken struct
type CommentToken ¶
type CommentToken struct {
BaseToken
// contains filtered or unexported fields
}
CommentToken struct
func (*CommentToken) MergeSame ¶
func (t *CommentToken) MergeSame(tok Token) error
MergeSame comments token
func (*CommentToken) ScanMore ¶
func (t *CommentToken) ScanMore(ts *TextScanner) error
ScanMore scan multi line values
type CommentsMatcher ¶
type CommentsMatcher struct {
// InlineChars for match inline comments. default is: #
InlineChars []byte
// MatchFn for comments line
// - mark useful on multi line comments
MatchFn func(text string) (ok, more bool, err error)
// DetectEnd for multi line comments
DetectEnd func(text string) bool
}
CommentsMatcher match comments lines. will auto merge prev comments token
func (*CommentsMatcher) Match ¶
func (m *CommentsMatcher) Match(text string, prev Token) (Token, error)
Match comments token
func (*CommentsMatcher) MatchEnd ¶
func (m *CommentsMatcher) MatchEnd(text string) bool
MatchEnd for multi line comments
type ErrScan ¶
type ErrScan struct {
Msg string // error message
Line int // error line number, start 1
Text string // text contents on error
}
ErrScan error on scan or parse contents
type KeyValueMatcher ¶
type KeyValueMatcher struct {
// Separator string for split key and value, default is "="
Separator string
// MergeComments collect previous comments token to value token.
// If set as True, on each s.Scan() please notice skip TokComments
MergeComments bool
// InlineComment parse and split inline comment
InlineComment bool
// DisableMultiLine value parse
DisableMultiLine bool
// KeyCheckFn set func check key string is valid
KeyCheckFn func(key string) error
}
KeyValueMatcher match key-value token. Support parse `KEY=VALUE` line text contents.
type Matcher ¶
type Matcher interface {
// Match text line by kind, if success returns a new Token
Match(line string, prev Token) (tok Token, err error)
}
Matcher interface
type Parser ¶
type Parser struct {
// Func for handle tokens
Func HandleFn
// contains filtered or unexported fields
}
Parser struct
func (*Parser) AddMatchers ¶
AddMatchers register token matchers
type StringToken ¶
type StringToken struct {
BaseToken
}
StringToken struct
func NewEmptyToken ¶
func NewEmptyToken() *StringToken
NewEmptyToken instance. Can use for want skip parse some contents
func (*StringToken) ScanMore ¶
func (t *StringToken) ScanMore(_ *TextScanner) error
ScanMore implements
type TextScanner ¶
type TextScanner struct {
// contains filtered or unexported fields
}
TextScanner struct.
func NewScanner ¶
func NewScanner(in any) *TextScanner
NewScanner instance
Example ¶
package main
import (
"fmt"
"github.com/gookit/goutil/strutil/textscan"
)
func main() {
ts := textscan.NewScanner(`source code`)
// add token matcher, can add your custom matcher
ts.AddMatchers(
&textscan.CommentsMatcher{
InlineChars: []byte{'#'},
},
&textscan.KeyValueMatcher{
MergeComments: true,
},
)
// scan and parsing
for ts.Scan() {
tok := ts.Token()
if !tok.IsValid() {
continue
}
// Custom handle the parsed token
if tok.Kind() == textscan.TokValue {
vt := tok.(*textscan.ValueToken)
fmt.Println(vt)
}
}
if ts.Err() != nil {
fmt.Println("ERROR:", ts.Err())
}
}
func (*TextScanner) AddKind ¶
func (s *TextScanner) AddKind(k Kind, name string)
AddKind register new kind
func (*TextScanner) AddMatchers ¶
func (s *TextScanner) AddMatchers(ms ...Matcher)
AddMatchers register token matchers
func (*TextScanner) Each ¶
func (s *TextScanner) Each(fn func(t Token)) error
Each every token by given func
func (*TextScanner) PrevToken ¶
func (s *TextScanner) PrevToken() Token
PrevToken get of previous scan.
func (*TextScanner) Scan ¶
func (s *TextScanner) Scan() bool
Scan source input and parsing. Can use Token() get current parsed token value
Usage:
ts := textscan.NewScanner(`source ...`)
for ts.Scan() {
tok := ts.Token()
// do something...
}
fmt.Println(ts.Err())
func (*TextScanner) ScanNext ¶
func (s *TextScanner) ScanNext() (ok bool, text string)
ScanNext advance and fetch next line text
func (*TextScanner) SetNext ¶
func (s *TextScanner) SetNext(text string)
SetNext text for scan and parse
func (*TextScanner) SetSplit ¶
func (s *TextScanner) SetSplit(fn bufio.SplitFunc)
SetSplit set split func on scan
type Token ¶
type Token interface {
LiteToken
String() string
// HasMore is multi line values
HasMore() bool
// ScanMore scan multi line values
ScanMore(ts *TextScanner) error
MergeSame(tok Token) error
}
Token parser
type ValueToken ¶
type ValueToken struct {
BaseToken
// contains filtered or unexported fields
}
ValueToken contains key and value contents
func (*ValueToken) Mark ¶ added in v0.5.15
func (t *ValueToken) Mark() string
Mark for multi line values
func (*ValueToken) MergeSame ¶
func (t *ValueToken) MergeSame(_ Token) error
MergeSame comments token
func (*ValueToken) ScanMore ¶
func (t *ValueToken) ScanMore(ts *TextScanner) error
ScanMore scan multi line values
func (*ValueToken) Values ¶ added in v0.5.15
func (t *ValueToken) Values() []string
Values for multi line values