formats

package
v0.9.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 26, 2025 License: Apache-2.0 Imports: 7 Imported by: 5

Documentation

Index

Constants

This section is empty.

Variables

View Source
var DefaultDelimited = &Delimited{
	Name:        "default",
	Description: "Default Delimited format",
	Delimiter:   utils.ToPointer(","),
	Header:      utils.ToPointer(true),
}

DefaultDelimited is the default Delimited format - this is exported by the core plugin

View Source
var DefaultJsonLines = &JsonLines{
	Name:        "default",
	Description: "Default JSONL format",
}

DefaultJsonLines is the default JSONL format - this is exported by the core plugin

Functions

This section is empty.

Types

type Delimited

type Delimited struct {
	Name        string `hcl:",label"`
	Description string `hcl:"description,optional"`
	// Option to skip type detection for CSV parsing and assume all columns to be of type VARCHAR
	AllVarchar *bool `hcl:"all_varchar,optional"`

	// Option to allow the conversion of quoted values to NULL values
	AllowQuotedNulls *bool `hcl:"allow_quoted_nulls,optional"`

	// Character used to initiate comments.
	// Lines starting with a comment character (optionally preceded by space characters) are completely ignored; other lines containi
	Comment *string `hcl:"comment,optional"`

	// Specifies the date format to use when parsing dates.
	DateFormat *string `hcl:"date_format,optional"`

	// The decimal separator of numbers.
	DecimalSeparator *string `hcl:"decimal_separator,optional"`

	// Specifies the delimiter character that separates columns within each row (line) of the file.
	Delimiter *string `hcl:"delimiter,optional"`

	// Specifies the string that should appear before a data character sequence that matches the quote value.
	Escape *string `hcl:"escape,optional"`

	// Whether or not an extra filename column should be included in the result.
	Filename *bool `hcl:"filename,optional"`

	// Do not match the specified columns' values against the NULL string.
	// In the default case where the NULL string is empty,
	// this means that empty values will be read as zero-length strings rather than NULLs.
	ForceNotNull *[]string `hcl:"force_not_null,optional"`

	// Specifies that the file contains a header line with the names of each column in the file.
	Header *bool `hcl:"header,optional"`

	// Option to ignore any parsing errors encountered – and instead ignore rows with errors.
	IgnoreErrors *bool `hcl:"ignore_errors,optional"`

	// The maximum line size in bytes.
	MaxLineSize *int `hcl:"max_line_size,optional"`

	// Set the new line character(s) in the file. Options are '\r','\n', or '\r\n'.
	// Note that the CSV parser only distinguishes between single-character and double-character line delimiters.
	// Therefore, it does not differentiate between '\r' and '\n'.
	NewLine *string `hcl:"new_line,optional"`

	// Boolean value that specifies whether or not column names should be normalized,
	// removing any non-alphanumeric characters from them.
	NormalizeNames *bool `hcl:"normalize_names,optional"`

	// If this option is enabled, when a row lacks columns, it will pad the remaining columns on the right with NULL values.
	NullPadding *bool `hcl:"null_padding,optional"`

	// Specifies the string that represents a NULL value or (since v0.10.2) a list of strings that represent a NULL value.
	NullStr *string `hcl:"null_str,optional"`

	// Specifies the quoting string to be used when a data value is quoted.
	Quote *string `hcl:"quote,optional"`

	// The number of sample rows for auto detection of parameters.
	SampleSize *int `hcl:"sample_size,optional"`

	// Specifies the date format to use when parsing timestamps
	TimestampFormat *string `hcl:"timestamp_format,optional"`
}

func (*Delimited) GetCsvOpts

func (d *Delimited) GetCsvOpts() []string

GetCsvOpts converts the Delimited configuration into a slice of CSV options strings in the format expected by DuckDb read_csv function

func (*Delimited) GetDescription

func (d *Delimited) GetDescription() string

GetDescription returns the description of this format instance

func (*Delimited) GetMapper

func (d *Delimited) GetMapper() (mappers.Mapper[*types.DynamicRow], error)

func (*Delimited) GetName

func (d *Delimited) GetName() string

GetName returns the name of this format instance

func (*Delimited) GetProperties

func (d *Delimited) GetProperties() map[string]string

GetProperties returns the format as a string which can be included in the introspection response

func (*Delimited) GetRegex

func (d *Delimited) GetRegex() (string, error)

func (*Delimited) Identifier

func (d *Delimited) Identifier() string

Identifier returns the format type identifier

func (*Delimited) SetName

func (d *Delimited) SetName(name string)

SetName sets the name of this format instance

func (*Delimited) Validate

func (d *Delimited) Validate() error

type Format

type Format interface {
	parse.Config

	GetName() string
	SetName(name string)
	GetMapper() (mappers.Mapper[*types.DynamicRow], error)
	GetRegex() (string, error)
	GetDescription() string
	// GetProperties returns the format properties as a string map - used for introspection
	GetProperties() map[string]string
}

func NewDelimited

func NewDelimited() Format

func NewGrok

func NewGrok() Format

func NewJsonLines added in v0.3.0

func NewJsonLines() Format

func NewRegex

func NewRegex() Format

func ParseFormat

func ParseFormat(formatData *types.FormatConfigData, formatConstructorMap map[string]func() Format) (Format, error)

type Grok

type Grok struct {
	Name        string `hcl:",label"`
	Description string `hcl:"description,optional"`
	// the layout of the log line
	// NOTE that as will contain grok patterns, this property is included in constants.GrokConfigProperties
	// meaning and '{' will be auto-escaped in the hcl
	Layout string `hcl:"layout"`

	// grok patterns to add to the grok parser used to parse the layout
	Patterns map[string]string `hcl:"patterns,optional"`
}

func (*Grok) GetDescription

func (g *Grok) GetDescription() string

func (*Grok) GetMapper

func (g *Grok) GetMapper() (mappers.Mapper[*types.DynamicRow], error)

func (*Grok) GetName

func (g *Grok) GetName() string

GetName returns the name of this format instance

func (*Grok) GetProperties

func (g *Grok) GetProperties() map[string]string

func (*Grok) GetRegex

func (g *Grok) GetRegex() (string, error)

func (*Grok) Identifier

func (g *Grok) Identifier() string

Identifier returns the format type identifier

func (*Grok) SetName

func (g *Grok) SetName(name string)

SetName sets the name of this format instance

func (*Grok) Validate

func (g *Grok) Validate() error

type JsonLines added in v0.3.0

type JsonLines struct {
	Name        string `hcl:",label"`
	Description string `hcl:"description,optional"`
	// Option to define number of sample objects for automatic JSON type detection.
	// Set to -1 to scan the entire input file (if not provided, DuckDB defaults to 20480)
	SampleSize *int `hcl:"sample_size,optional"`
	// Specifies the date format to use when parsing timestamps.
	// (If not provided, DuckDB defaults to the ISO 8601 format)
	DateFormat *string `hcl:"date_format,optional"`
}

func (*JsonLines) GetDescription added in v0.3.0

func (d *JsonLines) GetDescription() string

GetDescription returns the description of this format instance

func (*JsonLines) GetMapper added in v0.3.0

func (d *JsonLines) GetMapper() (mappers.Mapper[*types.DynamicRow], error)

func (*JsonLines) GetName added in v0.3.0

func (d *JsonLines) GetName() string

GetName returns the name of this format instance

func (*JsonLines) GetProperties added in v0.3.0

func (d *JsonLines) GetProperties() map[string]string

func (*JsonLines) GetReadJsonOpts added in v0.3.0

func (d *JsonLines) GetReadJsonOpts() []string

GetReadJsonOpts converts the Delimited configuration into a slice of CSV options strings in the format expected by DuckDb read_csv function

func (*JsonLines) GetRegex added in v0.3.0

func (d *JsonLines) GetRegex() (string, error)

func (*JsonLines) Identifier added in v0.3.0

func (d *JsonLines) Identifier() string

Identifier returns the format type identifier

func (*JsonLines) SetName added in v0.3.0

func (d *JsonLines) SetName(name string)

SetName sets the name of this format instance

func (*JsonLines) Validate added in v0.3.0

func (d *JsonLines) Validate() error

type Regex

type Regex struct {
	Name        string `hcl:",label"`
	Description string `hcl:"description,optional"`
	// the layout of the log line
	// NOTE that as will contain grok patterns, this property is included in constants.GrokConfigProperties
	// meaning and '{' will be auto-escaped in the hcl
	Layout string `hcl:"layout"`
}

func (*Regex) GetDescription

func (r *Regex) GetDescription() string

func (*Regex) GetMapper

func (r *Regex) GetMapper() (mappers.Mapper[*types.DynamicRow], error)

func (*Regex) GetName

func (r *Regex) GetName() string

GetName returns the name of this format instance

func (*Regex) GetProperties

func (r *Regex) GetProperties() map[string]string

func (*Regex) GetRegex

func (r *Regex) GetRegex() (string, error)

func (*Regex) Identifier

func (r *Regex) Identifier() string

Identifier returns the format type identifier

func (*Regex) SetName

func (r *Regex) SetName(name string)

SetName sets the name of this format instance

func (*Regex) Validate

func (r *Regex) Validate() error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL