schema

package
v0.2.1-rc.6 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 11, 2025 License: Apache-2.0 Imports: 14 Imported by: 35

Documentation

Index

Constants

View Source
const DefaultIndex = "default"

Variables

View Source
var DefaultCommonFieldDescriptions = map[string]string{
	constants.TpID:              "A unique identifier for the row.",
	constants.TpSourceType:      "The name of the source that collected the row.",
	constants.TpIngestTimestamp: "The timestamp in UTC when the row was ingested into the system.",
	constants.TpTimestamp:       "The original timestamp in UTC when the event or log entry was generated.",
	constants.TpTable:           "The name of the table.",
	constants.TpPartition:       "The name of the partition as defined in the Tailpipe configuration file.",
	constants.TpIndex:           "The name of the optional index used to partition the data.",
	constants.TpDate:            "The original date when the event or log entry was generated in YYYY-MM-DD format.",
	constants.TpSourceIP:        "The IP address of the source.",
	constants.TpDestinationIP:   "The IP address of the destination.",
	constants.TpSourceName:      "The name or identifier of the source generating the row, such as a service name.",
	constants.TpSourceLocation:  "The geographic or network location of the source, such as a region.",
	constants.TpAkas:            "A list of associated globally unique identifier strings (also known as).",
	constants.TpIps:             "A list of associated IP addresses.",
	constants.TpTags:            "A list of associated tags or labels.",
	constants.TpDomains:         "A list of associated domain names.",
	constants.TpEmails:          "A list of associated email addresses.",
	constants.TpUsernames:       "A list of associated usernames or identities.",
}

TODO improve these descriptions https://github.com/turbot/tailpipe-plugin-sdk/issues/83

Functions

func IsCommonField

func IsCommonField(name string) bool

func IsValidColumnName

func IsValidColumnName(name string) bool

IsValidColumnName checks if a column name is valid in DuckDB.

func IsValidColumnType

func IsValidColumnType(columnType string) bool

IsValidColumnType checks if a column type is valid in DuckDB.

Types

type ColumnDescriptionProvider added in v0.2.0

type ColumnDescriptionProvider interface {
	GetColumnDescriptions() map[string]string
}

ColumnDescriptionProvider is an interface that can be implemented by a row struct to provide descriptions for each column

type ColumnSchema

type ColumnSchema struct {
	// SourceName refers to the column name in the JSONL
	SourceName string
	// ColumnName refers to the column name in the parquet
	ColumnName string
	// DuckDB type for the column
	Type string
	// struct schema for for struct and struct[]
	StructFields []*ColumnSchema
	// the column description (optional)
	Description string
	// is the column required
	Required bool
	// The null value for the column
	NullIf string
	// a custom select clause for the column
	Transform string
}

func ColumnFromProto

func ColumnFromProto(p *proto.ColumnSchema) *ColumnSchema

ColumnFromProto creates a new ColumnSchema from proto

func (*ColumnSchema) Clone added in v0.3.0

func (c *ColumnSchema) Clone() *ColumnSchema

func (*ColumnSchema) FullType

func (c *ColumnSchema) FullType() string

func (*ColumnSchema) NormaliseColumnTypes added in v0.2.0

func (c *ColumnSchema) NormaliseColumnTypes()

NormaliseColumnTypes normalises the column types to lower case, including all child fields

type ColumnType

type ColumnType struct {
	// DuckDB type`
	Type string
	// for structs/maps/struct[]
	ChildFields []*ColumnSchema
}

type CommonFields

type CommonFields struct {
	// Mandatory fields
	TpID              string    `json:"tp_id"`
	TpSourceType      string    `json:"tp_source_type"`
	TpIngestTimestamp time.Time `json:"tp_ingest_timestamp"`
	TpTimestamp       time.Time `json:"tp_timestamp"`

	// Hive fields
	TpTable     string    `json:"tp_table"`
	TpPartition string    `json:"tp_partition"`
	TpIndex     string    `json:"tp_index"`
	TpDate      time.Time `json:"tp_date" parquet:"type=DATE"`

	// Optional fields
	TpSourceIP       *string `json:"tp_source_ip"`
	TpDestinationIP  *string `json:"tp_destination_ip"`
	TpSourceName     *string `json:"tp_source_name"`
	TpSourceLocation *string `json:"tp_source_location"`

	// Searchable
	TpAkas      []string `json:"tp_akas,omitempty"`
	TpIps       []string `json:"tp_ips,omitempty"`
	TpTags      []string `json:"tp_tags,omitempty"`
	TpDomains   []string `json:"tp_domains,omitempty"`
	TpEmails    []string `json:"tp_emails,omitempty"`
	TpUsernames []string `json:"tp_usernames,omitempty"`
}

CommonFields represents the common fields with JSON tags

func (*CommonFields) AsMap

func (c *CommonFields) AsMap() map[string]string

AsMap converts the CommonFields struct into a map[string]string.

func (*CommonFields) InitialiseFromMap

func (c *CommonFields) InitialiseFromMap(source map[string]string)

InitialiseFromMap initializes a CommonFields struct using a source map

func (*CommonFields) Validate

func (c *CommonFields) Validate() error

Validate implements the Validatable interface and is used to validate that the required fields have been set it can also be overridden by RowStruct implementations to perform additional validation - in this case CommonFields.Validate() should be called first

type ConversionSchema added in v0.3.0

type ConversionSchema struct {
	TableSchema
	// the source columns - these are the columns in the source data
	// this is to ensure we have the inputs required for any transforms
	SourceColumns []SourceColumnDef
}

ConversionSchema is a specialised TableSchema which also contains a list of all source columns

func NewConversionSchema added in v0.3.0

func NewConversionSchema(tableSchema *TableSchema) *ConversionSchema

func NewConversionSchemaWithInferredSchema added in v0.3.0

func NewConversionSchemaWithInferredSchema(tableSchema, inferredSchema *TableSchema) *ConversionSchema

NewConversionSchemaWithInferredSchema populates a ConversionSchema schema using a table schema and an inferred row schema this is called from the CLI after receiving the first JSONL file If a 'Select' pattern is provided, it will be used to select source fields to include in the schema

type DescriptionProvider added in v0.2.0

type DescriptionProvider interface {
	GetDescription() string
}

DescriptionProvider is an interface that can be implemented by any struct that has a description it is used by tables to specify the description of the table

type Mode

type Mode string

Mode values are set on the schema config which is provided in a dynamic table config

const (
	// ModeFull means that the schema is fully defined (the default)
	ModeFull Mode = "full"
	// ModePartial means that the schema is dynamic and is partially defined
	ModePartial Mode = "partial"
	// ModeDynamic means that the schema is fully dynamic and will be determined at runtime
	// NOTE: we weill never explicitly specify this mode - as it means there is no defined schema
	ModeDynamic Mode = "dynamic"
)

type ParquetTag

type ParquetTag struct {
	Name string
	Type string
	Skip bool
}

ParquetTag represents the components of a parquet tag

func ParseParquetTag

func ParseParquetTag(tag string) (*ParquetTag, error)

ParseParquetTag parses and validates a parquet tag string

type SchemaBuilder

type SchemaBuilder struct {
	// contains filtered or unexported fields
}

func NewSchemaBuilder

func NewSchemaBuilder() *SchemaBuilder

func (*SchemaBuilder) SchemaFromStruct

func (b *SchemaBuilder) SchemaFromStruct(s any) (*TableSchema, error)

type SchemaMap

type SchemaMap map[string]*TableSchema

SchemaMap is a map of table names to TableSchema

func SchemaMapFromProto

func SchemaMapFromProto(p map[string]*proto.Schema) SchemaMap

func (SchemaMap) ToProto

func (s SchemaMap) ToProto() map[string]*proto.Schema

type SourceColumnDef added in v0.3.0

type SourceColumnDef struct {
	Name string

	Type string
}

SourceColumnDef is a simple struct to hold the column name and type for a source column

func NewSourceColumnDef added in v0.3.0

func NewSourceColumnDef(columnSchema *ColumnSchema) SourceColumnDef

type SourceEnrichment

type SourceEnrichment struct {
	// a map of metadata values the source has extracted - perhaps by parsing th artifact path with a grok pattern
	Metadata map[string]string
	// CommonFields - a set of common fields that are added to every row
	CommonFields CommonFields
}

SourceEnrichment - is a set of metadata about a row - this is built by the row source and passed to the enrichment

func NewSourceEnrichment

func NewSourceEnrichment(metadata map[string]string) *SourceEnrichment

func SourceEnrichmentFromProto

func SourceEnrichmentFromProto(p *proto.SourceEnrichment) *SourceEnrichment

func (*SourceEnrichment) ToProto

func (s *SourceEnrichment) ToProto() *proto.SourceEnrichment

type TableSchema added in v0.2.0

type TableSchema struct {
	Name    string
	Columns []*ColumnSchema
	// optional list of source columns to include
	MapFields []string
	// the table description (optional)
	Description string
	// the default null value for the table (may be overridden for specific columns)
	NullIf string
}

func CommonFieldsSchema added in v0.2.0

func CommonFieldsSchema() *TableSchema

CommonFieldsSchema is the TableSchema for the common fields it is used for custom tables

func SchemaFromStruct

func SchemaFromStruct(s any) (*TableSchema, error)

func TableSchemaFromProto added in v0.2.0

func TableSchemaFromProto(p *proto.Schema) *TableSchema

func (*TableSchema) AsMap added in v0.2.0

func (r *TableSchema) AsMap() map[string]*ColumnSchema

func (*TableSchema) Clone added in v0.3.0

func (r *TableSchema) Clone() *TableSchema

func (*TableSchema) Complete added in v0.2.0

func (r *TableSchema) Complete() bool

func (*TableSchema) EnsureComplete added in v0.2.0

func (r *TableSchema) EnsureComplete() error

EnsureComplete checks that all columns have a type and returns an error if not

func (*TableSchema) MapRow added in v0.2.0

func (r *TableSchema) MapRow(sourceMap map[string]string) (map[string]interface{}, error)

MapRow maps a row from a map of source fields to a map of target fields, applying the schema and respecting the automap and exclude fields

func (*TableSchema) MergeWithCommonSchema added in v0.2.0

func (r *TableSchema) MergeWithCommonSchema() *TableSchema

MergeWithCommonSchema merges the table schema with the common fields schema. The resulting schema will contain: - All fields from this schema - For common fields, Type and Required are taken from the common schema, and Description if not already set - Any common fields not in this schema are added The original schema is not modified.

func (*TableSchema) NormaliseColumnTypes added in v0.2.0

func (r *TableSchema) NormaliseColumnTypes()

NormaliseColumnTypes normalises the column types to lower case

func (*TableSchema) ShouldMapSourceColumn added in v0.3.0

func (r *TableSchema) ShouldMapSourceColumn(columnName string) bool

func (*TableSchema) ToProto added in v0.2.0

func (r *TableSchema) ToProto() *proto.Schema

func (*TableSchema) Validate added in v0.2.0

func (r *TableSchema) Validate() error

Validate checks that all optional columns have a type and returns an error if not The purpose of this function is to validate the TableDefinition provided by a 'predefined custom table' This validation ensures that any optional columns have a type specified, so we can correctly create the parquet schema even if the column is not present in the source data NOTE: this is the same validation as we perform in tailpipe Table.Validate - that validfates the TableDef in config, whereas as this validates the hardcoded TableDef provided by the plugin

func (*TableSchema) WithSourceFieldsAndTransformsCleared

func (r *TableSchema) WithSourceFieldsAndTransformsCleared() *TableSchema

WithSourceFieldsAndTransformsCleared returns a copy with the source fields set to the column names and the transforms cleared this is called from ArtifactConversionCollector as it will already have applied field mappings and transforms

func (*TableSchema) WithSourceFieldsCleared added in v0.2.0

func (r *TableSchema) WithSourceFieldsCleared() *TableSchema

WithSourceFieldsCleared returns a copy with the source fields set to the column names this is called from RowEnrichmentCollector as it will already have applied field mappings

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL