token

package

v0.0.12 Latest Latest Go to latest Published: Dec 12, 2025 License: Apache-2.0 Imports: 12 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/signadot/tony-format

Links

Open Source Insights

Documentation ¶

Overview ¶

Package token provides tokenization support for Tony and related formats.

Tokenize is a function for tokenizing bytes.

Balance provides tree structure discovery based on indentation and normalizes the token sequence so that it is context free.

Index ¶

Constants
Variables
func ExpectedErr(what string, p *Pos) error
func KPathQuoteField(v string) bool
func LeadingZeroErr(pos *Pos) error
func NeedsQuote(v string) bool
func PrintTokens(toks []Token, msg string)
func Quote(v string, autoSingle bool) string
func QuotedToString(d []byte) string
func UnexpectedErr(what string, p *Pos) error
func Unquote(v string) (string, error)
type ErrImbalancedStructure
- func (i *ErrImbalancedStructure) Error() string
- func (i *ErrImbalancedStructure) Unwrap() error
type NodeOffsetCallback
type Pos
- func (p *Pos) Col() int
- func (p *Pos) Line() int
- func (p *Pos) LineCol() (int, int)
- func (p Pos) String() string
type PosDoc
- func (p *PosDoc) LineCol(off int) (int, int)
- func (d *PosDoc) Pos(i int) *Pos
- func (p *PosDoc) PosWithContext(absoluteOffset int, context []byte, bufferStartOffset int) *Pos
type Token
- func Balance(toks []Token, f format.Format) ([]Token, error)
- func Tokenize(dst []Token, src []byte, opts ...TokenOpt) ([]Token, error)
- func YAMLQuotedString(d []byte, pos *Pos) (*Token, int, error)
- func (t *Token) Info() string
- func (t *Token) String() string
type TokenOpt
- func TokenJSON() TokenOpt
- func TokenTony() TokenOpt
- func TokenYAML() TokenOpt
type TokenSink
- func NewTokenSink(w io.Writer, onNodeStart NodeOffsetCallback) *TokenSink
- func (ts *TokenSink) Offset() int
- func (ts *TokenSink) Write(tokens []Token) error
type TokenSource
- func NewTokenSource(r io.Reader, opts ...TokenOpt) *TokenSource
- func (ts *TokenSource) CurrentPath() string
- func (ts *TokenSource) Depth() int
- func (ts *TokenSource) Read() ([]Token, error)
type TokenType
- func (t TokenType) String() string
type TokenizeErr
- func NewTokenizeErr(e error, p *Pos) *TokenizeErr
- func (e *TokenizeErr) Error() string
- func (t *TokenizeErr) Unwrap() error
type Tokenizer
- func NewTokenizer(reader io.Reader, opts ...TokenOpt) *Tokenizer
- func NewTokenizerFromBytes(doc []byte, opts ...TokenOpt) *Tokenizer
- func (t *Tokenizer) Read() (data []byte, startOffset int64, err error)
- func (t *Tokenizer) TokenizeOne(data []byte, pos int, bufferStartOffset int64) ([]Token, int, error)

Constants ¶

View Source

const (
	MLitChomp = '-'
	MLitKeep  = '+'
)

View Source

const (
	TIndent = iota
	TInteger
	TFloat
	TColon
	TArrayElt
	TDocSep
	TComment
	TNull
	TTrue
	TFalse
	TTag
	TString
	TMString
	TLiteral
	TMLit
	TMergeKey
	TLCurl
	TRCurl
	TLSquare
	TRSquare
	TComma
)

Variables ¶

View Source

var (
	ErrBadUTF8           = errors.New("bad utf8")
	ErrUnterminated      = errors.New("unterminated")
	ErrNumberLeadingZero = errors.New("leading zero")
	ErrNoIndent          = errors.New("indentation needed")
	ErrDocBalance        = errors.New("imbalanced document")
	ErrLiteral           = errors.New("bad literal")
	ErrBadEscape         = errors.New("bad escape")
	ErrBadUnicode        = errors.New("bad unicode")
	ErrUnicodeControl    = errors.New("unicode control")
	ErrMalformedMLit     = errors.New("malformed multiline literal")
	ErrColonSpace        = errors.New("colon should be followed by space")
	ErrEmptyDoc          = errors.New("empty document")
	ErrMultilineString   = errors.New("multiline string")
	ErrYAMLDoubleQuote   = errors.New("yaml double quote")
	ErrMLitPlacement     = errors.New("bad placement of |")
	ErrYAMLPlain         = errors.New("yaml plain string")
	ErrUnsupported       = errors.New("unsupported")
	ErrNumber            = errors.New("number")
)

Functions ¶

func ExpectedErr ¶

func ExpectedErr(what string, p *Pos) error

func KPathQuoteField ¶ added in v0.0.10

func KPathQuoteField(v string) bool

KPathQuoteField returns true if a field name needs to be quoted in a kinded path. A field needs quoting if:

It contains characters that require quoting according to NeedsQuote (spaces, special chars)
It contains any of the path syntax characters: ".", "[", "{"

func LeadingZeroErr ¶

func LeadingZeroErr(pos *Pos) error

func NeedsQuote ¶

func NeedsQuote(v string) bool

func PrintTokens ¶

func PrintTokens(toks []Token, msg string)

func Quote ¶

func Quote(v string, autoSingle bool) string

func QuotedToString ¶

func QuotedToString(d []byte) string

func UnexpectedErr ¶

func UnexpectedErr(what string, p *Pos) error

func Unquote ¶

func Unquote(v string) (string, error)

Types ¶

type ErrImbalancedStructure ¶

type ErrImbalancedStructure struct {
	Open, Close *Token
}

func (*ErrImbalancedStructure) Error ¶

func (i *ErrImbalancedStructure) Error() string

func (*ErrImbalancedStructure) Unwrap ¶

func (i *ErrImbalancedStructure) Unwrap() error

type NodeOffsetCallback ¶ added in v0.0.10

type NodeOffsetCallback func(offset int, path string, token Token)

NodeOffsetCallback is called when a node starts in the output stream. The offset is the absolute byte position where the node begins. The path is the kinded path from document root (e.g., "", "key", "key[0]", "a.b.c", "a{0}"). The token is the token that triggered the node start detection.

type Pos ¶

type Pos struct {
	I       int
	D       *PosDoc
	Context []byte // context snippet around this position (for error messages)
}

func (*Pos) Col ¶

func (p *Pos) Col() int

func (*Pos) Line ¶

func (p *Pos) Line() int

func (*Pos) LineCol ¶

func (p *Pos) LineCol() (int, int)

func (Pos) String ¶

func (p Pos) String() string

type PosDoc ¶

type PosDoc struct {
	// contains filtered or unexported fields
}

func (*PosDoc) LineCol ¶

func (p *PosDoc) LineCol(off int) (int, int)

func (*PosDoc) Pos ¶

func (d *PosDoc) Pos(i int) *Pos

func (*PosDoc) PosWithContext ¶ added in v0.0.10

func (p *PosDoc) PosWithContext(absoluteOffset int, context []byte, bufferStartOffset int) *Pos

PosWithContext creates a Pos with embedded context snippet. This allows Pos.String() to work without the full document. Parameters:

absoluteOffset: absolute byte offset in the stream
context: buffer slice containing bytes around the position
bufferStartOffset: absolute offset where the context buffer starts

type Token ¶

type Token struct {
	Type  TokenType
	Pos   *Pos
	Bytes []byte
}

func Balance ¶

func Balance(toks []Token, f format.Format) ([]Token, error)

func Tokenize ¶

func Tokenize(dst []Token, src []byte, opts ...TokenOpt) ([]Token, error)

func YAMLQuotedString ¶

func YAMLQuotedString(d []byte, pos *Pos) (*Token, int, error)

func (*Token) Info ¶

func (t *Token) Info() string

func (*Token) String ¶

func (t *Token) String() string

type TokenOpt ¶

type TokenOpt func(*tokenOpts)

func TokenJSON ¶

func TokenJSON() TokenOpt

func TokenTony ¶

func TokenTony() TokenOpt

func TokenYAML ¶

func TokenYAML() TokenOpt

type TokenSink ¶ added in v0.0.10

type TokenSink struct {
	// contains filtered or unexported fields
}

TokenSink provides streaming token encoding to an io.Writer. It tracks absolute byte offsets and calls a callback when nodes start.

func NewTokenSink ¶ added in v0.0.10

func NewTokenSink(w io.Writer, onNodeStart NodeOffsetCallback) *TokenSink

NewTokenSink creates a new TokenSink writing to w. If onNodeStart is provided, it will be called whenever a node starts.

func (*TokenSink) Offset ¶ added in v0.0.10

func (ts *TokenSink) Offset() int

Offset returns the current absolute byte offset in the output stream.

func (*TokenSink) Write ¶ added in v0.0.10

func (ts *TokenSink) Write(tokens []Token) error

Write writes tokens to the underlying io.Writer. It tracks absolute byte offsets and detects node starts.

type TokenSource ¶ added in v0.0.10

type TokenSource struct {
	// contains filtered or unexported fields
}

TokenSource provides streaming tokenization from an io.Reader. It maintains internal state and buffers data as needed.

func NewTokenSource ¶ added in v0.0.10

func NewTokenSource(r io.Reader, opts ...TokenOpt) *TokenSource

NewTokenSource creates a new TokenSource reading from r.

func (*TokenSource) CurrentPath ¶ added in v0.0.10

func (ts *TokenSource) CurrentPath() string

CurrentPath returns the current kinded path from root (e.g., "", "key", "key[0]", "a.b"). Only tracks bracketed structures (objects with {}/arrays with []). Block-style arrays and objects are not tracked.

func (*TokenSource) Depth ¶ added in v0.0.10

func (ts *TokenSource) Depth() int

Depth returns the current bracket nesting depth.

func (*TokenSource) Read ¶ added in v0.0.10

func (ts *TokenSource) Read() ([]Token, error)

Read reads tokens from the stream. It returns tokens until:

A complete token (or tokens) is found
EOF is reached
An error occurs

When EOF is reached, Read will return any remaining tokens and then return (nil, io.EOF) on subsequent calls.

Some constructs, such as multiline strings, are encoded as sequences of tokens.

type TokenType ¶

type TokenType int

func (TokenType) String ¶

func (t TokenType) String() string

type TokenizeErr ¶

type TokenizeErr struct {
	Err error
	Pos Pos
}

func NewTokenizeErr ¶

func NewTokenizeErr(e error, p *Pos) *TokenizeErr

func (*TokenizeErr) Error ¶

func (e *TokenizeErr) Error() string

func (*TokenizeErr) Unwrap ¶

func (t *TokenizeErr) Unwrap() error

type Tokenizer ¶ added in v0.0.12

type Tokenizer struct {
	// contains filtered or unexported fields
}

Tokenizer provides stateful tokenization with proper buffer management and trailing whitespace tracking. It supports both streaming (io.Reader) and non-streaming ([]byte) modes.

func NewTokenizer ¶ added in v0.0.12

func NewTokenizer(reader io.Reader, opts ...TokenOpt) *Tokenizer

NewTokenizer creates a new Tokenizer for streaming mode (from io.Reader).

func NewTokenizerFromBytes ¶ added in v0.0.12

func NewTokenizerFromBytes(doc []byte, opts ...TokenOpt) *Tokenizer

NewTokenizerFromBytes creates a new Tokenizer for non-streaming mode (from []byte).

func (*Tokenizer) Read ¶ added in v0.0.12

func (t *Tokenizer) Read() (data []byte, startOffset int64, err error)

Read reads the next chunk of data from the source. For streaming mode: reads from io.Reader, accumulates trailing whitespace. For non-streaming mode: returns remaining bytes from doc.

Returns:

data: bytes read (with trailing whitespace from previous read prepended if any)
startOffset: absolute offset where this data starts in the stream
err: io.EOF when no more data, or other error

func (*Tokenizer) TokenizeOne ¶ added in v0.0.12

func (t *Tokenizer) TokenizeOne(data []byte, pos int, bufferStartOffset int64) ([]Token, int, error)

TokenizeOne tokenizes one or more tokens from a buffer slice. This is the core tokenization logic, adapted to use Tokenizer's state and lineStartOffset for comment prefix calculation (no recentBuf/docPrefix fallback).

Parameters:

data: buffer slice to tokenize from (may be partial document)
pos: current offset within buffer (relative offset, 0-based)
bufferStartOffset: absolute offset where buffer starts in stream (for PosDoc and lineStartOffset calculation)

Returns:

tokens: slice of tokens found (empty slice for whitespace)
consumed: number of bytes consumed from buffer
error: any error encountered, or io.EOF if need more buffer

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func ExpectedErr ¶

func KPathQuoteField ¶ added in v0.0.10

func LeadingZeroErr ¶

func NeedsQuote ¶

func PrintTokens ¶

func Quote ¶

func QuotedToString ¶

func UnexpectedErr ¶

func Unquote ¶

Types ¶

type ErrImbalancedStructure ¶

func (*ErrImbalancedStructure) Error ¶

func (*ErrImbalancedStructure) Unwrap ¶

type NodeOffsetCallback ¶ added in v0.0.10

type Pos ¶

func (*Pos) Col ¶

func (*Pos) Line ¶

func (*Pos) LineCol ¶

func (Pos) String ¶

type PosDoc ¶

func (*PosDoc) LineCol ¶

func (*PosDoc) Pos ¶

func (*PosDoc) PosWithContext ¶ added in v0.0.10

type Token ¶

func Balance ¶

func Tokenize ¶

func YAMLQuotedString ¶

func (*Token) Info ¶

func (*Token) String ¶

type TokenOpt ¶

func TokenJSON ¶

func TokenTony ¶

func TokenYAML ¶

type TokenSink ¶ added in v0.0.10

func NewTokenSink ¶ added in v0.0.10

func (*TokenSink) Offset ¶ added in v0.0.10

func (*TokenSink) Write ¶ added in v0.0.10

type TokenSource ¶ added in v0.0.10

func NewTokenSource ¶ added in v0.0.10

func (*TokenSource) CurrentPath ¶ added in v0.0.10

func (*TokenSource) Depth ¶ added in v0.0.10

func (*TokenSource) Read ¶ added in v0.0.10

type TokenType ¶

func (TokenType) String ¶

type TokenizeErr ¶

func NewTokenizeErr ¶

func (*TokenizeErr) Error ¶

func (*TokenizeErr) Unwrap ¶

type Tokenizer ¶ added in v0.0.12

func NewTokenizer ¶ added in v0.0.12

func NewTokenizerFromBytes ¶ added in v0.0.12

func (*Tokenizer) Read ¶ added in v0.0.12

func (*Tokenizer) TokenizeOne ¶ added in v0.0.12

Source Files ¶