tokenizer

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 2, 2023 License: MIT Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrorIncorrectCharacter                = errors.New("incorrect character")
	ErrorIncorrectValueForState            = errors.New("extracted value not match state")
	ErrorIncorrectValueTypeForConstructAST = errors.New("incorrect value type for construct ast")
	ErrorInternalASTProcotolChanged        = errors.New("detect unexpect ast stack change, not kv, array, object, NodeType at top of stack")
	ErrorUnexpectedEOF                     = errors.New("unexpected EOF")
	ErrorTokenRouteNotConfigure            = errors.New("token route not configure")
	ErrorExtendedVariableFormatIncorrect   = errors.New("variable should be of ${variableName} format")
)

Functions

func IsSymbolToken

func IsSymbolToken(t TokenType) bool

symbol token of json, these are the format token that need to use to construct the AST/ syntax checker double quotaion though is also symbol, it is value symbol, not json protocol symbol to hold the format

func ShouldUnreadByte

func ShouldUnreadByte(t TokenType) bool

these symbols should be unread to buffer, they are read first to determine the state change, not using peek to collect them because there may be a long way to go till we see it.

Types

type ArrayState

type ArrayState struct {
	TokenReader
}

func (*ArrayState) GetMode

func (i *ArrayState) GetMode() StateMode

func (*ArrayState) ProcessData

func (i *ArrayState) ProcessData(dataSource *bufio.Reader) error

type BooleanState

type BooleanState struct {
	PrimitiveValueTokenStateBase
}

func (*BooleanState) GetMode

func (i *BooleanState) GetMode() StateMode

func (*BooleanState) ProcessData

func (i *BooleanState) ProcessData(dataSource *bufio.Reader) error

type InitState

type InitState struct {
	TokenReader
}

func (*InitState) GetMode

func (i *InitState) GetMode() StateMode

func (*InitState) ProcessData

func (i *InitState) ProcessData(dataSource *bufio.Reader) error

type JzonePrimitiveStateProcessor

type JzonePrimitiveStateProcessor interface {
	// contains filtered or unexported methods
}

for none-symbol token, they hold primitive value (not array/object), these value should be hold by array/object later, store their value to somewhere based on extracted state, able to switchState() without indicator, because there's no way from a primitive value to say where the state should go next

type JzonePrimitiveTokenizer

type JzonePrimitiveTokenizer interface {
	JzoneTokenizer
	JzonePrimitiveStateProcessor
}

eventually as a state in the state machine, it also implement its way of ProcessData from the stream, but this would leave to the concrete state, case by case

type JzoneTokenizer

type JzoneTokenizer interface {
	ProcessData(dataSource *bufio.Reader) error
	GetMode() StateMode
}

common tokenizer implementation

type JzoneTokenizerStateMachine

type JzoneTokenizerStateMachine interface {
	ProcessData(dataSource io.Reader) error
	Reset()
	SwitchToLatestState() error
	SwitchStateByToken(tokenType TokenType) error
	GetCurrentMode() StateMode
	GetASTConstructor() *ast.ASTConstructor
	RecordSyntaxValue(t StateMode, value interface{}) error
	RecordSyntaxSymbol(b byte) error
}

func NewTokenizerStateMachine

func NewTokenizerStateMachine() JzoneTokenizerStateMachine

type NullState

type NullState struct {
	PrimitiveValueTokenStateBase
}

func (*NullState) GetMode

func (i *NullState) GetMode() StateMode

func (*NullState) ProcessData

func (i *NullState) ProcessData(dataSource *bufio.Reader) error

type NumberState

type NumberState struct {
	PrimitiveValueTokenStateBase
}

func (*NumberState) GetMode

func (i *NumberState) GetMode() StateMode

func (*NumberState) ProcessData

func (i *NumberState) ProcessData(dataSource *bufio.Reader) error

type ObjectState

type ObjectState struct {
	TokenReader
}

func (*ObjectState) GetMode

func (i *ObjectState) GetMode() StateMode

func (*ObjectState) ProcessData

func (i *ObjectState) ProcessData(dataSource *bufio.Reader) error

type PrimitiveValueTokenStateBase

type PrimitiveValueTokenStateBase struct {
	// contains filtered or unexported fields
}

use a statemachine to route the state and handle the storage of the token value

type StateMode

type StateMode uint
const (
	INIT_MODE StateMode = iota
	OBJECT_MODE
	ARRAY_MODE
	STRING_MODE
	NUMBER_MODE
	BOOLEAN_MODE
	NULL_MODE
	VARIABLE_MODE
	STRING_VARIABLE_MODE
)

type StringState

type StringState struct {
	PrimitiveValueTokenStateBase
}

func (*StringState) GetMode

func (i *StringState) GetMode() StateMode

func (*StringState) ProcessData

func (i *StringState) ProcessData(dataSource *bufio.Reader) error

type TokenPreProcessor

type TokenPreProcessor interface {
	PreprocessToken(dataSource *bufio.Reader) (TokenType, error)
}

type TokenReader

type TokenReader struct {
	// contains filtered or unexported fields
}

func (*TokenReader) PreprocessToken

func (i *TokenReader) PreprocessToken(dataSource *bufio.Reader) (TokenType, error)

type TokenType

type TokenType uint
const (
	// The token type is a string
	TOKEN_STRING TokenType = iota
	// The token type is a number
	TOKEN_NUMBER
	// The token type is a boolean
	TOKEN_BOOLEAN
	// The token type is a null
	TOKEN_NULL
	// The token type is a left brace
	TOKEN_LEFT_BRACE
	// The token type is a right brace
	TOKEN_RIGHT_BRACE
	// The token type is a left bracket
	TOKEN_LEFT_BRACKET
	// The token type is a right bracket
	TOKEN_RIGHT_BRACKET
	// The token type is a colon
	TOKEN_COLON
	// The token type is a comma
	TOKEN_COMMA
	// customize token type
	TOKEN_VARIABLE
	// variable in string, a string can have multiple variable
	TOKEN_STRING_WITH_VARIABLE
	TOKEN_SPACE
	// deciaml token
	TOKEN_NUMBER_DECIMAL
	//
	TOKEN_DOUBLE_QUOTATION
	TOKEN_DUMMY = 98
	TOKEN_DROP  = 99
)

func GetTokenTypeByStartCharacter

func GetTokenTypeByStartCharacter(b byte) TokenType

in json k-v start bytes

type TokenizerStateMachine

type TokenizerStateMachine struct {
	// contains filtered or unexported fields
}

func (*TokenizerStateMachine) GetAST

func (i *TokenizerStateMachine) GetAST() ast.JsonNode

func (*TokenizerStateMachine) GetASTConstructor

func (i *TokenizerStateMachine) GetASTConstructor() *ast.ASTConstructor

func (*TokenizerStateMachine) GetCurrentMode

func (i *TokenizerStateMachine) GetCurrentMode() StateMode

func (*TokenizerStateMachine) ProcessData

func (i *TokenizerStateMachine) ProcessData(dataSource io.Reader) error

func (*TokenizerStateMachine) RecordSyntaxSymbol

func (i *TokenizerStateMachine) RecordSyntaxSymbol(b byte) error

func (*TokenizerStateMachine) RecordSyntaxValue

func (i *TokenizerStateMachine) RecordSyntaxValue(valueType StateMode, nodeValue interface{}) error

func (*TokenizerStateMachine) Reset

func (i *TokenizerStateMachine) Reset()

func (*TokenizerStateMachine) SwitchStateByToken

func (i *TokenizerStateMachine) SwitchStateByToken(tokenType TokenType) error

func (*TokenizerStateMachine) SwitchToLatestState

func (i *TokenizerStateMachine) SwitchToLatestState() error

use AST to switch the state of the machine when primitive values end of their processing

type VariableState

type VariableState struct {
	PrimitiveValueTokenStateBase
}

func (*VariableState) GetMode

func (i *VariableState) GetMode() StateMode

func (*VariableState) ProcessData

func (i *VariableState) ProcessData(dataSource *bufio.Reader) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL