Documentation
¶
Index ¶
- Constants
- Variables
- func IsCommonField(name string) bool
- func IsValidColumnName(name string) bool
- func IsValidColumnType(ty string) bool
- type ColumnDescriptionProvider
- type ColumnSchema
- type ColumnType
- type CommonFields
- type ConversionSchema
- type DescriptionProvider
- type Mode
- type ParquetTag
- type SchemaBuilder
- type SchemaMap
- type SourceColumnDef
- type SourceEnrichment
- type TableSchema
- func (r *TableSchema) AsMap() map[string]*ColumnSchema
- func (r *TableSchema) Clone() *TableSchema
- func (r *TableSchema) Complete() bool
- func (r *TableSchema) EnsureComplete() error
- func (r *TableSchema) MapRow(sourceMap map[string]string) (map[string]interface{}, error)
- func (r *TableSchema) MergeWithCommonSchema() *TableSchema
- func (r *TableSchema) NormaliseColumnTypes()
- func (r *TableSchema) ShouldMapSourceColumn(columnName string) bool
- func (r *TableSchema) ToProto() *proto.Schema
- func (r *TableSchema) Validate() error
- func (r *TableSchema) WithSourceFieldsCleared() *TableSchema
Constants ¶
const DefaultIndex = "default"
Variables ¶
var DefaultCommonFieldDescriptions = map[string]string{ constants.TpID: "A unique identifier for the row.", constants.TpSourceType: "The name of the source that collected the row.", constants.TpIngestTimestamp: "The timestamp in UTC when the row was ingested into the system.", constants.TpTimestamp: "The original timestamp in UTC when the event or log entry was generated.", constants.TpTable: "The name of the table.", constants.TpPartition: "The name of the partition as defined in the Tailpipe configuration file.", constants.TpIndex: "The name of the optional index used to partition the data.", constants.TpDate: "The original date when the event or log entry was generated in YYYY-MM-DD format.", constants.TpSourceIP: "The IP address of the source.", constants.TpDestinationIP: "The IP address of the destination.", constants.TpSourceName: "The name or identifier of the source generating the row, such as a service name.", constants.TpSourceLocation: "The geographic or network location of the source, such as a region.", constants.TpAkas: "A list of associated globally unique identifier strings (also known as).", constants.TpIps: "A list of associated IP addresses.", constants.TpTags: "A list of associated tags or labels.", constants.TpDomains: "A list of associated domain names.", constants.TpEmails: "A list of associated email addresses.", constants.TpUsernames: "A list of associated usernames or identities.", }
Functions ¶
func IsCommonField ¶
func IsValidColumnName ¶
IsValidColumnName checks if a column name is valid in DuckDB.
func IsValidColumnType ¶
Types ¶
type ColumnDescriptionProvider ¶ added in v0.2.0
ColumnDescriptionProvider is an interface that can be implemented by a row struct to provide descriptions for each column
type ColumnSchema ¶
type ColumnSchema struct {
// SourceName refers to the column name in the JSONL
SourceName string
// ColumnName refers to the column name in the parquet
ColumnName string
// DuckDB type for the column
Type string
// struct schema for struct
StructFields []*ColumnSchema
// the column description (optional)
Description string
// is the column required
Required bool
// The null value for the column
NullIf string
// a custom select clause for the column
Transform string
}
func ColumnFromProto ¶
func ColumnFromProto(p *proto.ColumnSchema) *ColumnSchema
ColumnFromProto creates a new ColumnSchema from proto
func (*ColumnSchema) Clone ¶ added in v0.3.0
func (c *ColumnSchema) Clone() *ColumnSchema
func (*ColumnSchema) FullType ¶
func (c *ColumnSchema) FullType() string
func (*ColumnSchema) NormaliseColumnTypes ¶ added in v0.2.0
func (c *ColumnSchema) NormaliseColumnTypes()
NormaliseColumnTypes normalizes the type of this column and all its child fields. It recursively processes nested structures to ensure all types are in their canonical form.
type ColumnType ¶
type ColumnType struct {
// DuckDB type`
Type string
// for structs/maps/struct[]
ChildFields []*ColumnSchema
}
type CommonFields ¶
type CommonFields struct {
// Mandatory fields
TpID string `json:"tp_id"`
TpSourceType string `json:"tp_source_type"`
TpIngestTimestamp time.Time `json:"tp_ingest_timestamp"`
TpTimestamp time.Time `json:"tp_timestamp"`
// Hive fields
TpTable string `json:"tp_table"`
TpPartition string `json:"tp_partition"`
TpDate time.Time `json:"tp_date" parquet:"type=DATE"`
// Optional fields
TpSourceIP *string `json:"tp_source_ip"`
TpDestinationIP *string `json:"tp_destination_ip"`
TpSourceName *string `json:"tp_source_name"`
TpSourceLocation *string `json:"tp_source_location"`
// Searchable
TpAkas []string `json:"tp_akas,omitempty"`
TpIps []string `json:"tp_ips,omitempty"`
TpTags []string `json:"tp_tags,omitempty"`
TpDomains []string `json:"tp_domains,omitempty"`
TpEmails []string `json:"tp_emails,omitempty"`
TpUsernames []string `json:"tp_usernames,omitempty"`
}
CommonFields represents the common fields with JSON tags
func (*CommonFields) AsMap ¶
func (c *CommonFields) AsMap() map[string]string
AsMap converts the CommonFields struct into a map[string]string.
func (*CommonFields) InitialiseFromMap ¶
func (c *CommonFields) InitialiseFromMap(source map[string]string)
InitialiseFromMap initializes a CommonFields struct using a source map
type ConversionSchema ¶ added in v0.3.0
type ConversionSchema struct {
TableSchema
// the source columns - these are the columns in the source data
// this is to ensure we have the inputs required for any transforms
SourceColumns []SourceColumnDef
ColumnString string
}
ConversionSchema is a specialised TableSchema which also contains a list of ALL source columns the embedded Schema defines the columns which appear in the output parquet file, and the source columns includes all available fields - these are necessary as there may be a transform which uses any of them
func NewConversionSchema ¶ added in v0.3.0
func NewConversionSchema(tableSchema *TableSchema) *ConversionSchema
func NewConversionSchemaWithInferredSchema ¶ added in v0.3.0
func NewConversionSchemaWithInferredSchema(tableSchema, inferredSchema *TableSchema) *ConversionSchema
NewConversionSchemaWithInferredSchema populates a ConversionSchema schema using a table schema and an inferred row schema this is called from the CLI after receiving the first JSONL file If a 'Select' pattern is provided, it will be used to select source fields to include in the schema
type DescriptionProvider ¶ added in v0.2.0
type DescriptionProvider interface {
GetDescription() string
}
DescriptionProvider is an interface that can be implemented by any struct that has a description it is used by tables to specify the description of the table
type Mode ¶
type Mode string
Mode values are set on the schema config which is provided in a dynamic table config
const ( // ModeFull means that the schema is fully defined (the default) ModeFull Mode = "full" // ModePartial means that the schema is dynamic and is partially defined ModePartial Mode = "partial" // ModeDynamic means that the schema is fully dynamic and will be determined at runtime // NOTE: we weill never explicitly specify this mode - as it means there is no defined schema ModeDynamic Mode = "dynamic" )
type ParquetTag ¶
ParquetTag represents the components of a parquet tag
func ParseParquetTag ¶
func ParseParquetTag(tag string) (*ParquetTag, error)
ParseParquetTag parses and validates a parquet tag string
type SchemaBuilder ¶
type SchemaBuilder struct {
// contains filtered or unexported fields
}
func NewSchemaBuilder ¶
func NewSchemaBuilder() *SchemaBuilder
func (*SchemaBuilder) SchemaFromStruct ¶
func (b *SchemaBuilder) SchemaFromStruct(s any) (*TableSchema, error)
type SchemaMap ¶
type SchemaMap map[string]*TableSchema
SchemaMap is a map of table names to TableSchema
type SourceColumnDef ¶ added in v0.3.0
SourceColumnDef is a simple struct to hold the column name and type for a source column
func NewSourceColumnDef ¶ added in v0.3.0
func NewSourceColumnDef(columnSchema *ColumnSchema) SourceColumnDef
type SourceEnrichment ¶
type SourceEnrichment struct {
// a map of metadata values the source has extracted - perhaps by parsing th artifact path with a grok pattern
Metadata map[string]string
// CommonFields - a set of common fields that are added to every row
CommonFields CommonFields
}
SourceEnrichment - is a set of metadata about a row - this is built by the row source and passed to the enrichment
func NewSourceEnrichment ¶
func NewSourceEnrichment(metadata map[string]string) *SourceEnrichment
func SourceEnrichmentFromProto ¶
func SourceEnrichmentFromProto(p *proto.SourceEnrichment) *SourceEnrichment
func (*SourceEnrichment) ResolveSourceLocation ¶ added in v0.4.0
func (s *SourceEnrichment) ResolveSourceLocation() string
ResolveSourceLocation - returns the source location for this row - TpSourceLocation fallbacks to TpSourceName
func (*SourceEnrichment) ToProto ¶
func (s *SourceEnrichment) ToProto() *proto.SourceEnrichment
type TableSchema ¶ added in v0.2.0
type TableSchema struct {
Name string
Columns []*ColumnSchema
// optional list of source columns match patterns to include in the table
MapFields []string
// the table description (optional)
Description string
// the default null value for the table (may be overridden for specific columns)
NullIf string
}
func CommonFieldsSchema ¶ added in v0.2.0
func CommonFieldsSchema() *TableSchema
CommonFieldsSchema is the TableSchema for the common fields it is used for custom tables
func SchemaFromStruct ¶
func SchemaFromStruct(s any) (*TableSchema, error)
func TableSchemaFromProto ¶ added in v0.2.0
func TableSchemaFromProto(p *proto.Schema) *TableSchema
func (*TableSchema) AsMap ¶ added in v0.2.0
func (r *TableSchema) AsMap() map[string]*ColumnSchema
func (*TableSchema) Clone ¶ added in v0.3.0
func (r *TableSchema) Clone() *TableSchema
func (*TableSchema) Complete ¶ added in v0.2.0
func (r *TableSchema) Complete() bool
Complete checks if the types for all columns is known and that no source fields m,ust be mapped (if any types are unknown or any source fields are being mapped, we need to infer the full schema once we have some source data)
func (*TableSchema) EnsureComplete ¶ added in v0.2.0
func (r *TableSchema) EnsureComplete() error
EnsureComplete checks that all columns have a type and returns an error if not
func (*TableSchema) MapRow ¶ added in v0.2.0
func (r *TableSchema) MapRow(sourceMap map[string]string) (map[string]interface{}, error)
MapRow maps a row from a map of source fields to a map of target fields, applying the schema and respecting the automap and exclude fields
func (*TableSchema) MergeWithCommonSchema ¶ added in v0.2.0
func (r *TableSchema) MergeWithCommonSchema() *TableSchema
MergeWithCommonSchema merges the table schema with the common fields schema. The resulting schema will contain: - All fields from this schema - For common fields, Type and Required are taken from the common schema, and Description if not already set - Any common fields not in this schema are added The original schema is not modified.
func (*TableSchema) NormaliseColumnTypes ¶ added in v0.2.0
func (r *TableSchema) NormaliseColumnTypes()
NormaliseColumnTypes normalises the column types to lower case
func (*TableSchema) ShouldMapSourceColumn ¶ added in v0.3.0
func (r *TableSchema) ShouldMapSourceColumn(columnName string) bool
func (*TableSchema) ToProto ¶ added in v0.2.0
func (r *TableSchema) ToProto() *proto.Schema
func (*TableSchema) Validate ¶ added in v0.2.0
func (r *TableSchema) Validate() error
Validate checks that all optional columns have a type and returns an error if not The purpose of this function is to validate the TableDefinition provided by a 'predefined custom table' This validation ensures that any optional columns have a type specified, so we can correctly create the parquet schema even if the column is not present in the source data NOTE: this is the same validation as we perform in tailpipe Table.Validate - that validates the TableDef in config, whereas as this validates the hardcoded TableDef provided by the plugin
func (*TableSchema) WithSourceFieldsCleared ¶ added in v0.2.0
func (r *TableSchema) WithSourceFieldsCleared() *TableSchema
WithSourceFieldsCleared returns a copy with the source fields set to the column names this is called from RowEnrichmentCollector as it will already have applied field mappings