schema

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 30, 2025 License: Apache-2.0 Imports: 10 Imported by: 35

Documentation

Index

Constants

View Source
const DefaultIndex = "default"

Variables

This section is empty.

Functions

func DefaultCommonFieldDescriptions

func DefaultCommonFieldDescriptions() map[string]string

TODO improve these descriptions https://github.com/turbot/tailpipe-plugin-sdk/issues/83

func IsCommonField

func IsCommonField(name string) bool

func IsValidColumnName

func IsValidColumnName(name string) bool

IsValidColumnName checks if a column name is valid in DuckDB.

func IsValidColumnType

func IsValidColumnType(columnType string) bool

IsValidColumnType checks if a column type is valid in DuckDB.

Types

type ColumnSchema

type ColumnSchema struct {
	// SourceName refers to one of 2 things depdending on where the schema is used
	// 1. When the schemas is used by a mapper, SourceName refers to the field name in the raw row data
	// 2. When the schema is used by the JSONL conversion, SourceName refers to the column name in the JSONL
	SourceName string `json:"-"`
	ColumnName string `json:"name,omitempty"`
	// DuckDB type for the column
	Type string `json:"type"`
	// for struct and struct[]
	StructFields []*ColumnSchema `json:"struct_fields,omitempty"`
	// the column description (optional)
	Description string `json:"description,omitempty"`
}

func ColumnFromProto

func ColumnFromProto(p *proto.ColumnSchema) *ColumnSchema

ColumnFromProto creates a new ColumnSchema from proto

func (*ColumnSchema) FullType

func (c *ColumnSchema) FullType() string

type ColumnType

type ColumnType struct {
	// DuckDB type`
	Type string
	// for structs/maps/struct[]
	ChildFields []*ColumnSchema
}

type CommonFields

type CommonFields struct {
	// Mandatory fields
	TpID              string    `json:"tp_id"`
	TpSourceType      string    `json:"tp_source_type"`
	TpIngestTimestamp time.Time `json:"tp_ingest_timestamp"`
	TpTimestamp       time.Time `json:"tp_timestamp"`

	// Hive fields
	TpTable     string    `json:"tp_table"`
	TpPartition string    `json:"tp_partition"`
	TpIndex     string    `json:"tp_index"`
	TpDate      time.Time `json:"tp_date" parquet:"type=DATE"`

	// Optional fields
	TpSourceIP       *string `json:"tp_source_ip"`
	TpDestinationIP  *string `json:"tp_destination_ip"`
	TpSourceName     *string `json:"tp_source_name"`
	TpSourceLocation *string `json:"tp_source_location"`

	// Searchable
	TpAkas      []string `json:"tp_akas,omitempty"`
	TpIps       []string `json:"tp_ips,omitempty"`
	TpTags      []string `json:"tp_tags,omitempty"`
	TpDomains   []string `json:"tp_domains,omitempty"`
	TpEmails    []string `json:"tp_emails,omitempty"`
	TpUsernames []string `json:"tp_usernames,omitempty"`
}

CommonFields represents the common fields with JSON tags

func (*CommonFields) AsMap

func (c *CommonFields) AsMap() map[string]string

AsMap converts the CommonFields struct into a map[string]string.

func (*CommonFields) GetCommonFields

func (c *CommonFields) GetCommonFields() CommonFields

GetCommonFields implements RowStruct

func (*CommonFields) InitialiseFromMap

func (c *CommonFields) InitialiseFromMap(source map[string]string)

InitialiseFromMap initializes a CommonFields struct using a source map

func (*CommonFields) Validate

func (c *CommonFields) Validate() error

Validate implements the Validatable interface and is used to validate that the required fields have been set it can also be overridden by RowStruct implementations to perform additional validation - in this case CommonFields.Validate() should be called first

type GetColumnDescriptions

type GetColumnDescriptions interface {
	GetColumnDescriptions() map[string]string
}

GetColumnDescriptions is an interface that can be implemented by a row struct to provide descriptions for each column

type GetDescription

type GetDescription interface {
	GetDescription() string
}

GetDescription is an interface that can be implemented by any struct that has a description it is used by tables to specify the description of the table

type Mode

type Mode string

Mode values are set on the schema config which is provided in a dynamic table config

const (
	// ModeFull means that the schema is fully defined (the default)
	ModeFull Mode = "full"
	// ModePartial means that the schema is dynamic and is partially defined
	ModePartial Mode = "partial"
	// ModeDynamic means that the schema is fully dynamic and will be determined at runtime
	// NOTE: we weill never explicitly specify this mode - as it means there is no defined schema
	ModeDynamic Mode = "dynamic"
)

type ParquetTag

type ParquetTag struct {
	Name string
	Type string
	Skip bool
}

ParquetTag represents the components of a parquet tag

func ParseParquetTag

func ParseParquetTag(tag string) (*ParquetTag, error)

ParseParquetTag parses and validates a parquet tag string

type RowSchema

type RowSchema struct {
	Columns []*ColumnSchema `json:"columns"`
	// should we include ALL source fields in addition to any defined columns, or ONLY include the columns defined
	AutoMapSourceFields bool `json:"automap_source_fields"`
	// should we exclude any source fields from the output (only applicable if automap_source_fields is true)
	ExcludeSourceFields []string `json:"exclude_source_fields"`
	// the table description (optional)
	Description string `json:"description,omitempty"`
}

func RowSchemaFromProto

func RowSchemaFromProto(p *proto.Schema) *RowSchema

func SchemaFromStruct

func SchemaFromStruct(s any) (*RowSchema, error)

func (*RowSchema) AsMap

func (r *RowSchema) AsMap() map[string]*ColumnSchema

func (*RowSchema) Complete

func (r *RowSchema) Complete() bool

func (*RowSchema) InitialiseFromInferredSchema

func (r *RowSchema) InitialiseFromInferredSchema(inferredSchema *RowSchema)

InitialiseFromInferredSchema populates this schema using an inferred row schema this is called from the CLI when we are trying to determine the full schema after receiving the first JSONL file it either adds all fields in the inferred schema (if AutoMapSourceFields is true) or just populate missing types if AutoMapSourceFields is false

func (*RowSchema) MapRow

func (r *RowSchema) MapRow(rowMap map[string]string) (map[string]string, error)

MapRow maps a row from a map of source fields to a map of target fields, applying the schema and respecting the automap and exclude fields

func (*RowSchema) ToProto

func (r *RowSchema) ToProto() *proto.Schema

type SchemaBuilder

type SchemaBuilder struct {
	// contains filtered or unexported fields
}

func NewSchemaBuilder

func NewSchemaBuilder() *SchemaBuilder

func (*SchemaBuilder) SchemaFromStruct

func (b *SchemaBuilder) SchemaFromStruct(s any) (*RowSchema, error)

type SchemaMap

type SchemaMap map[string]*RowSchema

SchemaMap is a map of table names to RowSchema

func SchemaMapFromProto

func SchemaMapFromProto(p map[string]*proto.Schema) SchemaMap

func (SchemaMap) ToProto

func (s SchemaMap) ToProto() map[string]*proto.Schema

type SourceEnrichment

type SourceEnrichment struct {
	// a map of metadata values the source has extracted - perhaps by parsing th artifact path with a grok pattern
	Metadata map[string]string
	// CommonFields - a set of common fields that are added to every row
	CommonFields CommonFields
}

SourceEnrichment - is a set of metadata about a row - this is built by the row source and passed to the enrichment

func NewSourceEnrichment

func NewSourceEnrichment(metadata map[string]string) *SourceEnrichment

func SourceEnrichmentFromProto

func SourceEnrichmentFromProto(p *proto.SourceEnrichment) *SourceEnrichment

func (*SourceEnrichment) ToProto

func (s *SourceEnrichment) ToProto() *proto.SourceEnrichment

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL